| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- /*
- FastLZ - Byte-aligned LZ77 compression library
- Copyright (C) 2005-2020 Ariya Hidayat <ariya.hidayat@gmail.com>
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #include "fastlz.h"
- #include "PikaObj.h"
- #include <stdint.h>
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
- /*
- * Give hints to the compiler for branch prediction optimization.
- */
- #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))
- #define FASTLZ_LIKELY(c) (__builtin_expect(!!(c), 1))
- #define FASTLZ_UNLIKELY(c) (__builtin_expect(!!(c), 0))
- #else
- #define FASTLZ_LIKELY(c) (c)
- #define FASTLZ_UNLIKELY(c) (c)
- #endif
- /*
- * Specialize custom 64-bit implementation for speed improvements.
- */
- #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
- #define FLZ_ARCH64
- #endif
- /*
- * Workaround for DJGPP to find uint8_t, uint16_t, etc.
- */
- #if defined(__MSDOS__) && defined(__GNUC__)
- #include <stdint-gcc.h>
- #endif
- #if defined(FASTLZ_USE_MEMMOVE) && (FASTLZ_USE_MEMMOVE == 0)
- static void fastlz_memmove(uint8_t* dest, const uint8_t* src, uint32_t count) {
- do {
- *dest++ = *src++;
- } while (--count);
- }
- static void fastlz_memcpy(uint8_t* dest, const uint8_t* src, uint32_t count) {
- return fastlz_memmove(dest, src, count);
- }
- #else
- #include <string.h>
- static void fastlz_memmove(uint8_t* dest, const uint8_t* src, uint32_t count) {
- if ((count > 4) && (dest >= src + count)) {
- memmove(dest, src, count);
- } else {
- switch (count) {
- default:
- do {
- *dest++ = *src++;
- } while (--count);
- break;
- case 3:
- *dest++ = *src++;
- case 2:
- *dest++ = *src++;
- case 1:
- *dest++ = *src++;
- case 0:
- break;
- }
- }
- }
- static void fastlz_memcpy(uint8_t* dest, const uint8_t* src, uint32_t count) {
- memcpy(dest, src, count);
- }
- #endif
- #if defined(FLZ_ARCH64)
- static uint32_t flz_readu32(const void* ptr) {
- return *(const uint32_t*)ptr;
- }
- static uint32_t flz_cmp(const uint8_t* p, const uint8_t* q, const uint8_t* r) {
- const uint8_t* start = p;
- if (flz_readu32(p) == flz_readu32(q)) {
- p += 4;
- q += 4;
- }
- while (q < r)
- if (*p++ != *q++)
- break;
- return p - start;
- }
- #endif /* FLZ_ARCH64 */
- #if !defined(FLZ_ARCH64)
- static uint32_t flz_readu32(const void* ptr) {
- const uint8_t* p = (const uint8_t*)ptr;
- return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
- }
- static uint32_t flz_cmp(const uint8_t* p, const uint8_t* q, const uint8_t* r) {
- const uint8_t* start = p;
- while (q < r)
- if (*p++ != *q++)
- break;
- return p - start;
- }
- #endif /* !FLZ_ARCH64 */
- #define MAX_COPY 32
- #define MAX_LEN 264 /* 256 + 8 */
- #define MAX_L1_DISTANCE 8192
- #define MAX_L2_DISTANCE 8191
- #define MAX_FARDISTANCE (65535 + MAX_L2_DISTANCE - 1)
- #define HASH_LOG 13
- #define HASH_SIZE (1 << HASH_LOG)
- #define HASH_MASK (HASH_SIZE - 1)
- static uint16_t flz_hash(uint32_t v) {
- uint32_t h = (v * 2654435769LL) >> (32 - HASH_LOG);
- return h & HASH_MASK;
- }
- /* special case of memcpy: at most MAX_COPY bytes */
- static void flz_smallcopy(uint8_t* dest, const uint8_t* src, uint32_t count) {
- #if defined(FLZ_ARCH64)
- if (count >= 4) {
- const uint32_t* p = (const uint32_t*)src;
- uint32_t* q = (uint32_t*)dest;
- while (count > 4) {
- *q++ = *p++;
- count -= 4;
- dest += 4;
- src += 4;
- }
- }
- #endif
- fastlz_memcpy(dest, src, count);
- }
- /* special case of memcpy: exactly MAX_COPY bytes */
- static void flz_maxcopy(void* dest, const void* src) {
- #if defined(FLZ_ARCH64)
- const uint32_t* p = (const uint32_t*)src;
- uint32_t* q = (uint32_t*)dest;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- #else
- fastlz_memcpy(dest, src, MAX_COPY);
- #endif
- }
- static uint8_t* flz_literals(uint32_t runs, const uint8_t* src, uint8_t* dest) {
- while (runs >= MAX_COPY) {
- *dest++ = MAX_COPY - 1;
- flz_maxcopy(dest, src);
- src += MAX_COPY;
- dest += MAX_COPY;
- runs -= MAX_COPY;
- }
- if (runs > 0) {
- *dest++ = runs - 1;
- flz_smallcopy(dest, src, runs);
- dest += runs;
- }
- return dest;
- }
- static uint8_t* flz1_match(uint32_t len, uint32_t distance, uint8_t* op) {
- --distance;
- if (FASTLZ_UNLIKELY(len > MAX_LEN - 2))
- while (len > MAX_LEN - 2) {
- *op++ = (7 << 5) + (distance >> 8);
- *op++ = MAX_LEN - 2 - 7 - 2;
- *op++ = (distance & 255);
- len -= MAX_LEN - 2;
- }
- if (len < 7) {
- *op++ = (len << 5) + (distance >> 8);
- *op++ = (distance & 255);
- } else {
- *op++ = (7 << 5) + (distance >> 8);
- *op++ = len - 7;
- *op++ = (distance & 255);
- }
- return op;
- }
- #define FASTLZ_BOUND_CHECK(cond) \
- if (FASTLZ_UNLIKELY(!(cond))) \
- return 0;
- int fastlz1_compress(const void* input, int length, void* output) {
- const uint8_t* ip = (const uint8_t*)input;
- const uint8_t* ip_start = ip;
- const uint8_t* ip_bound = ip + length - 4; /* because readU32 */
- const uint8_t* ip_limit = ip + length - 12 - 1;
- uint8_t* op = (uint8_t*)output;
- // uint32_t htab[HASH_SIZE];
- uint32_t* htab = (uint32_t*)pikaMalloc(sizeof(uint32_t) * HASH_SIZE);
- uint32_t seq, hash;
- /* initializes hash table */
- for (hash = 0; hash < HASH_SIZE; ++hash)
- htab[hash] = 0;
- /* we start with literal copy */
- const uint8_t* anchor = ip;
- ip += 2;
- /* main loop */
- while (FASTLZ_LIKELY(ip < ip_limit)) {
- const uint8_t* ref;
- uint32_t distance, cmp;
- /* find potential match */
- do {
- seq = flz_readu32(ip) & 0xffffff;
- hash = flz_hash(seq);
- ref = ip_start + htab[hash];
- htab[hash] = ip - ip_start;
- distance = ip - ref;
- cmp = FASTLZ_LIKELY(distance < MAX_L1_DISTANCE)
- ? flz_readu32(ref) & 0xffffff
- : 0x1000000;
- if (FASTLZ_UNLIKELY(ip >= ip_limit))
- break;
- ++ip;
- } while (seq != cmp);
- if (FASTLZ_UNLIKELY(ip >= ip_limit))
- break;
- --ip;
- if (FASTLZ_LIKELY(ip > anchor)) {
- op = flz_literals(ip - anchor, anchor, op);
- }
- uint32_t len = flz_cmp(ref + 3, ip + 3, ip_bound);
- op = flz1_match(len, distance, op);
- /* update the hash at match boundary */
- ip += len;
- seq = flz_readu32(ip);
- hash = flz_hash(seq & 0xffffff);
- htab[hash] = ip++ - ip_start;
- seq >>= 8;
- hash = flz_hash(seq);
- htab[hash] = ip++ - ip_start;
- anchor = ip;
- }
- uint32_t copy = (uint8_t*)input + length - anchor;
- op = flz_literals(copy, anchor, op);
- pikaFree(htab, sizeof(uint32_t) * HASH_SIZE);
- return op - (uint8_t*)output;
- }
- int fastlz1_decompress(const void* input,
- int length,
- void* output,
- int maxout) {
- const uint8_t* ip = (const uint8_t*)input;
- const uint8_t* ip_limit = ip + length;
- const uint8_t* ip_bound = ip_limit - 2;
- uint8_t* op = (uint8_t*)output;
- uint8_t* op_limit = op + maxout;
- uint32_t ctrl = (*ip++) & 31;
- while (1) {
- if (ctrl >= 32) {
- uint32_t len = (ctrl >> 5) - 1;
- uint32_t ofs = (ctrl & 31) << 8;
- const uint8_t* ref = op - ofs - 1;
- if (len == 7 - 1) {
- FASTLZ_BOUND_CHECK(ip <= ip_bound);
- len += *ip++;
- }
- ref -= *ip++;
- len += 3;
- FASTLZ_BOUND_CHECK(op + len <= op_limit);
- FASTLZ_BOUND_CHECK(ref >= (uint8_t*)output);
- fastlz_memmove(op, ref, len);
- op += len;
- } else {
- ctrl++;
- FASTLZ_BOUND_CHECK(op + ctrl <= op_limit);
- FASTLZ_BOUND_CHECK(ip + ctrl <= ip_limit);
- fastlz_memcpy(op, ip, ctrl);
- ip += ctrl;
- op += ctrl;
- }
- if (FASTLZ_UNLIKELY(ip > ip_bound))
- break;
- ctrl = *ip++;
- }
- return op - (uint8_t*)output;
- }
- static uint8_t* flz2_match(uint32_t len, uint32_t distance, uint8_t* op) {
- --distance;
- if (distance < MAX_L2_DISTANCE) {
- if (len < 7) {
- *op++ = (len << 5) + (distance >> 8);
- *op++ = (distance & 255);
- } else {
- *op++ = (7 << 5) + (distance >> 8);
- for (len -= 7; len >= 255; len -= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = (distance & 255);
- }
- } else {
- /* far away, but not yet in the another galaxy... */
- if (len < 7) {
- distance -= MAX_L2_DISTANCE;
- *op++ = (len << 5) + 31;
- *op++ = 255;
- *op++ = distance >> 8;
- *op++ = distance & 255;
- } else {
- distance -= MAX_L2_DISTANCE;
- *op++ = (7 << 5) + 31;
- for (len -= 7; len >= 255; len -= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = 255;
- *op++ = distance >> 8;
- *op++ = distance & 255;
- }
- }
- return op;
- }
- int fastlz2_compress(const void* input, int length, void* output) {
- const uint8_t* ip = (const uint8_t*)input;
- const uint8_t* ip_start = ip;
- const uint8_t* ip_bound = ip + length - 4; /* because readU32 */
- const uint8_t* ip_limit = ip + length - 12 - 1;
- uint8_t* op = (uint8_t*)output;
- // uint32_t htab[HASH_SIZE];
- uint32_t* htab = (uint32_t*)pikaMalloc(sizeof(uint32_t) * HASH_SIZE);
- uint32_t seq, hash;
- /* initializes hash table */
- for (hash = 0; hash < HASH_SIZE; ++hash)
- htab[hash] = 0;
- /* we start with literal copy */
- const uint8_t* anchor = ip;
- ip += 2;
- /* main loop */
- while (FASTLZ_LIKELY(ip < ip_limit)) {
- const uint8_t* ref;
- uint32_t distance, cmp;
- /* find potential match */
- do {
- seq = flz_readu32(ip) & 0xffffff;
- hash = flz_hash(seq);
- ref = ip_start + htab[hash];
- htab[hash] = ip - ip_start;
- distance = ip - ref;
- cmp = FASTLZ_LIKELY(distance < MAX_FARDISTANCE)
- ? flz_readu32(ref) & 0xffffff
- : 0x1000000;
- if (FASTLZ_UNLIKELY(ip >= ip_limit))
- break;
- ++ip;
- } while (seq != cmp);
- if (FASTLZ_UNLIKELY(ip >= ip_limit))
- break;
- --ip;
- /* far, needs at least 5-byte match */
- if (distance >= MAX_L2_DISTANCE) {
- if (ref[3] != ip[3] || ref[4] != ip[4]) {
- ++ip;
- continue;
- }
- }
- if (FASTLZ_LIKELY(ip > anchor)) {
- op = flz_literals(ip - anchor, anchor, op);
- }
- uint32_t len = flz_cmp(ref + 3, ip + 3, ip_bound);
- op = flz2_match(len, distance, op);
- /* update the hash at match boundary */
- ip += len;
- seq = flz_readu32(ip);
- hash = flz_hash(seq & 0xffffff);
- htab[hash] = ip++ - ip_start;
- seq >>= 8;
- hash = flz_hash(seq);
- htab[hash] = ip++ - ip_start;
- anchor = ip;
- }
- uint32_t copy = (uint8_t*)input + length - anchor;
- op = flz_literals(copy, anchor, op);
- /* marker for fastlz2 */
- *(uint8_t*)output |= (1 << 5);
- pikaFree(htab, sizeof(uint32_t) * HASH_SIZE);
- return op - (uint8_t*)output;
- }
- int fastlz2_decompress(const void* input,
- int length,
- void* output,
- int maxout) {
- const uint8_t* ip = (const uint8_t*)input;
- const uint8_t* ip_limit = ip + length;
- const uint8_t* ip_bound = ip_limit - 2;
- uint8_t* op = (uint8_t*)output;
- uint8_t* op_limit = op + maxout;
- uint32_t ctrl = (*ip++) & 31;
- while (1) {
- if (ctrl >= 32) {
- uint32_t len = (ctrl >> 5) - 1;
- uint32_t ofs = (ctrl & 31) << 8;
- const uint8_t* ref = op - ofs - 1;
- uint8_t code;
- if (len == 7 - 1)
- do {
- FASTLZ_BOUND_CHECK(ip <= ip_bound);
- code = *ip++;
- len += code;
- } while (code == 255);
- code = *ip++;
- ref -= code;
- len += 3;
- /* match from 16-bit distance */
- if (FASTLZ_UNLIKELY(code == 255))
- if (FASTLZ_LIKELY(ofs == (31 << 8))) {
- FASTLZ_BOUND_CHECK(ip < ip_bound);
- ofs = (*ip++) << 8;
- ofs += *ip++;
- ref = op - ofs - MAX_L2_DISTANCE - 1;
- }
- FASTLZ_BOUND_CHECK(op + len <= op_limit);
- FASTLZ_BOUND_CHECK(ref >= (uint8_t*)output);
- fastlz_memmove(op, ref, len);
- op += len;
- } else {
- ctrl++;
- FASTLZ_BOUND_CHECK(op + ctrl <= op_limit);
- FASTLZ_BOUND_CHECK(ip + ctrl <= ip_limit);
- fastlz_memcpy(op, ip, ctrl);
- ip += ctrl;
- op += ctrl;
- }
- if (FASTLZ_UNLIKELY(ip >= ip_limit))
- break;
- ctrl = *ip++;
- }
- return op - (uint8_t*)output;
- }
- int fastlz_compress(const void* input, int length, void* output) {
- /* for short block, choose fastlz1 */
- if (length < 65536)
- return fastlz1_compress(input, length, output);
- /* else... */
- return fastlz2_compress(input, length, output);
- }
- int fastlz_decompress(const void* input, int length, void* output, int maxout) {
- /* magic identifier for compression level */
- int level = ((*(const uint8_t*)input) >> 5) + 1;
- if (level == 1)
- return fastlz1_decompress(input, length, output, maxout);
- if (level == 2)
- return fastlz2_decompress(input, length, output, maxout);
- /* unknown level, trigger error */
- return 0;
- }
- int fastlz_compress_level(int level,
- const void* input,
- int length,
- void* output) {
- if (level == 1)
- return fastlz1_compress(input, length, output);
- if (level == 2)
- return fastlz2_compress(input, length, output);
- return 0;
- }
- #pragma GCC diagnostic pop
|