You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
3.5 KiB
91 lines
3.5 KiB
/*
|
|
* Simple LZ77 streaming compressor.
|
|
*
|
|
* The scheme implemented here doesn't use a hash table and instead does a brute
|
|
* force search in the history for a previous string. It is relatively slow
|
|
* (but still O(N)) but gives good compression and minimal memory usage. For a
|
|
* small history window (eg 256 bytes) it's not too slow and compresses well.
|
|
*
|
|
* MIT license; Copyright (c) 2021 Damien P. George
|
|
*/
|
|
|
|
#include "uzlib.h"
|
|
|
|
#include "defl_static.c"
|
|
|
|
#define MATCH_LEN_MIN (3)
|
|
#define MATCH_LEN_MAX (258)
|
|
|
|
// hist should be a preallocated buffer of hist_max size bytes.
|
|
// hist_max should be greater than 0 a power of 2 (ie 1, 2, 4, 8, ...).
|
|
// It's possible to pass in hist=NULL, and then the history window will be taken from the
|
|
// src passed in to uzlib_lz77_compress (this is useful when not doing streaming compression).
|
|
void uzlib_lz77_init(uzlib_lz77_state_t *state, uint8_t *hist, size_t hist_max) {
|
|
memset(state, 0, sizeof(uzlib_lz77_state_t));
|
|
state->hist_buf = hist;
|
|
state->hist_max = hist_max;
|
|
state->hist_start = 0;
|
|
state->hist_len = 0;
|
|
}
|
|
|
|
// Search back in the history for the maximum match of the given src data,
|
|
// with support for searching beyond the end of the history and into the src buffer
|
|
// (effectively the history and src buffer are concatenated).
|
|
static size_t uzlib_lz77_search_max_match(uzlib_lz77_state_t *state, const uint8_t *src, size_t len, size_t *longest_offset) {
|
|
size_t longest_len = 0;
|
|
for (size_t hist_search = 0; hist_search < state->hist_len; ++hist_search) {
|
|
// Search for a match.
|
|
size_t match_len;
|
|
for (match_len = 0; match_len <= MATCH_LEN_MAX && match_len < len; ++match_len) {
|
|
uint8_t hist;
|
|
if (hist_search + match_len < state->hist_len) {
|
|
hist = state->hist_buf[(state->hist_start + hist_search + match_len) & (state->hist_max - 1)];
|
|
} else {
|
|
hist = src[hist_search + match_len - state->hist_len];
|
|
}
|
|
if (src[match_len] != hist) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Take this match if its length is at least the minimum, and larger than previous matches.
|
|
// If the length is the same as the previous longest then take this match as well, because
|
|
// this match will be closer (more recent in the history) and take less bits to encode.
|
|
if (match_len >= MATCH_LEN_MIN && match_len >= longest_len) {
|
|
longest_len = match_len;
|
|
*longest_offset = state->hist_len - hist_search;
|
|
}
|
|
}
|
|
|
|
return longest_len;
|
|
}
|
|
|
|
// Compress the given chunk of data.
|
|
void uzlib_lz77_compress(uzlib_lz77_state_t *state, const uint8_t *src, unsigned len) {
|
|
const uint8_t *top = src + len;
|
|
while (src < top) {
|
|
// Look for a match in the history window.
|
|
size_t match_offset = 0;
|
|
size_t match_len = uzlib_lz77_search_max_match(state, src, top - src, &match_offset);
|
|
|
|
// Encode the literal byte or the match.
|
|
if (match_len == 0) {
|
|
uzlib_literal(state, *src);
|
|
match_len = 1;
|
|
} else {
|
|
uzlib_match(state, match_offset, match_len);
|
|
}
|
|
|
|
// Push the bytes into the history buffer.
|
|
size_t mask = state->hist_max - 1;
|
|
while (match_len--) {
|
|
uint8_t b = *src++;
|
|
state->hist_buf[(state->hist_start + state->hist_len) & mask] = b;
|
|
if (state->hist_len == state->hist_max) {
|
|
state->hist_start = (state->hist_start + 1) & mask;
|
|
} else {
|
|
++state->hist_len;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|