some experimental space-optimization
This commit is contained in:
parent
ab1823487b
commit
1062bda5c3
3 changed files with 109 additions and 58 deletions
138
compress.c
138
compress.c
|
@ -50,13 +50,13 @@ typedef enum {
|
||||||
|
|
||||||
// used to store and compare backref candidates
|
// used to store and compare backref candidates
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint16_t offset, size;
|
uint16_t offset, size, ahead;
|
||||||
method_e method;
|
method_e method;
|
||||||
} backref_t;
|
} backref_t;
|
||||||
|
|
||||||
// used to store RLE candidates
|
// used to store RLE candidates
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint16_t size, data;
|
uint16_t size, data, ahead;
|
||||||
method_e method;
|
method_e method;
|
||||||
} rle_t;
|
} rle_t;
|
||||||
|
|
||||||
|
@ -81,7 +81,11 @@ void free_offsets(tuple_t*);
|
||||||
// unpacked/packed are 65536 byte buffers to read/from write to,
|
// unpacked/packed are 65536 byte buffers to read/from write to,
|
||||||
// inputsize is the length of the uncompressed data.
|
// inputsize is the length of the uncompressed data.
|
||||||
// Returns the size of the compressed data in bytes, or 0 if compression failed.
|
// Returns the size of the compressed data in bytes, or 0 if compression failed.
|
||||||
size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
||||||
|
return pack_la(unpacked, inputsize, packed, fast, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t pack_la(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast, uint16_t lookahead) {
|
||||||
if (inputsize > DATA_SIZE) return 0;
|
if (inputsize > DATA_SIZE) return 0;
|
||||||
|
|
||||||
// current input/output positions
|
// current input/output positions
|
||||||
|
@ -89,8 +93,8 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
||||||
uint32_t outpos = 0;
|
uint32_t outpos = 0;
|
||||||
|
|
||||||
// backref and RLE compression candidates
|
// backref and RLE compression candidates
|
||||||
backref_t backref;
|
backref_t backref, backref_ahead;
|
||||||
rle_t rle;
|
rle_t rle, rle_ahead;
|
||||||
|
|
||||||
// used to collect data which should be written uncompressed
|
// used to collect data which should be written uncompressed
|
||||||
uint8_t dontpack[LONG_RUN_SIZE];
|
uint8_t dontpack[LONG_RUN_SIZE];
|
||||||
|
@ -116,21 +120,61 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
||||||
}
|
}
|
||||||
|
|
||||||
while (inpos < inputsize) {
|
while (inpos < inputsize) {
|
||||||
|
if (lookahead > (inputsize - inpos))
|
||||||
|
lookahead = inputsize - inpos;
|
||||||
|
|
||||||
// check for a potential RLE
|
// check for a potential RLE
|
||||||
rle = rle_check(unpacked, unpacked + inpos, inputsize, fast);
|
rle = rle_check(unpacked, unpacked + inpos, inputsize, fast);
|
||||||
|
for (unsigned la = 1; la <= lookahead; la++) {
|
||||||
|
rle_ahead = rle_check(unpacked, unpacked + inpos + la, inputsize, fast);
|
||||||
|
rle_ahead.ahead = la;
|
||||||
|
|
||||||
|
if (rle_ahead.size - rle_ahead.ahead > rle.size - rle.ahead) {
|
||||||
|
debug("lookahead found new best RLE (%u bytes at %x -> %u bytes at %x)\n",
|
||||||
|
rle.size, inpos + rle.ahead, rle_ahead.size, inpos + la);
|
||||||
|
rle = rle_ahead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check for a potential back reference
|
// check for a potential back reference
|
||||||
if (rle.size < LONG_RUN_SIZE && inpos < inputsize - 3)
|
if (rle.size < LONG_RUN_SIZE && inpos < inputsize - 3) {
|
||||||
backref = ref_search(unpacked, unpacked + inpos, inputsize, offsets, fast);
|
backref = ref_search(unpacked, unpacked + inpos, inputsize, offsets, fast);
|
||||||
else backref.size = 0;
|
|
||||||
|
for (unsigned la = 1; la <= lookahead; la++) {
|
||||||
|
backref_ahead = ref_search(unpacked, unpacked + inpos + la, inputsize, offsets, fast);
|
||||||
|
backref_ahead.ahead = la;
|
||||||
|
|
||||||
|
if (backref_ahead.size - backref_ahead.ahead > backref.size - backref.ahead) {
|
||||||
|
debug("lookahead found new best LZ (%u bytes at %x -> %u bytes at %x)\n",
|
||||||
|
backref.size, inpos + backref.ahead, backref_ahead.size, inpos + la);
|
||||||
|
backref = backref_ahead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
backref.size = 0;
|
||||||
|
backref.ahead = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// if the backref is a better candidate, use it
|
// if the backref is a better candidate, use it
|
||||||
if (backref.size > 3 && backref.size > rle.size) {
|
if (backref.size - backref.ahead > 3 && (backref.size - backref.ahead) > (rle.size - rle.ahead)) {
|
||||||
if (outpos + dontpacksize + backref.size >= DATA_SIZE) {
|
if (outpos + dontpacksize + backref.size + backref.ahead >= DATA_SIZE) {
|
||||||
free_offsets(offsets);
|
free_offsets(offsets);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// write any bytes that were skipped by lookahead
|
||||||
|
for (unsigned la = 0; la < backref.ahead; la++) {
|
||||||
|
dontpack[dontpacksize++] = unpacked[inpos++];
|
||||||
|
|
||||||
|
// if the raw data buffer is full, flush it
|
||||||
|
if (dontpacksize == LONG_RUN_SIZE) {
|
||||||
|
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
||||||
|
dontpacksize = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// flush the raw data buffer first
|
// flush the raw data buffer
|
||||||
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
||||||
dontpacksize = 0;
|
dontpacksize = 0;
|
||||||
|
|
||||||
|
@ -138,13 +182,24 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
||||||
inpos += backref.size;
|
inpos += backref.size;
|
||||||
}
|
}
|
||||||
// or if the RLE is a better candidate, use it instead
|
// or if the RLE is a better candidate, use it instead
|
||||||
else if (rle.size >= 2) {
|
else if (rle.size - rle.ahead >= 2) {
|
||||||
if (outpos + dontpacksize + rle.size >= DATA_SIZE) {
|
if (outpos + dontpacksize + rle.size + rle.ahead >= DATA_SIZE) {
|
||||||
free_offsets(offsets);
|
free_offsets(offsets);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// write any bytes that were skipped by lookahead
|
||||||
|
for (unsigned la = 0; la < rle.ahead; la++) {
|
||||||
|
dontpack[dontpacksize++] = unpacked[inpos++];
|
||||||
|
|
||||||
|
// if the raw data buffer is full, flush it
|
||||||
|
if (dontpacksize == LONG_RUN_SIZE) {
|
||||||
|
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
||||||
|
dontpacksize = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// flush the raw data buffer first
|
// flush the raw data buffer
|
||||||
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
||||||
dontpacksize = 0;
|
dontpacksize = 0;
|
||||||
|
|
||||||
|
@ -152,19 +207,24 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
|
||||||
inpos += rle.size;
|
inpos += rle.size;
|
||||||
|
|
||||||
}
|
}
|
||||||
// otherwise, write this byte uncompressed
|
// otherwise, write any bytes uncompressed that we know aren't usable
|
||||||
else {
|
else {
|
||||||
dontpack[dontpacksize++] = unpacked[inpos++];
|
unsigned dp = (rle.ahead < backref.ahead) ? rle.ahead : backref.ahead;
|
||||||
|
if (!dp) dp++;
|
||||||
if (outpos + dontpacksize >= DATA_SIZE) {
|
|
||||||
free_offsets(offsets);
|
for (unsigned i = 0; i < dp; i++) {
|
||||||
return 0;
|
dontpack[dontpacksize++] = unpacked[inpos++];
|
||||||
}
|
|
||||||
|
if (outpos + dontpacksize >= DATA_SIZE) {
|
||||||
// if the raw data buffer is full, flush it
|
free_offsets(offsets);
|
||||||
if (dontpacksize == LONG_RUN_SIZE) {
|
return 0;
|
||||||
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
}
|
||||||
dontpacksize = 0;
|
|
||||||
|
// if the raw data buffer is full, flush it
|
||||||
|
if (dontpacksize == LONG_RUN_SIZE) {
|
||||||
|
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
|
||||||
|
dontpacksize = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -354,7 +414,7 @@ uint8_t rotate (uint8_t i) {
|
||||||
// start and current are positions within the uncompressed input stream.
|
// start and current are positions within the uncompressed input stream.
|
||||||
// fast enables faster compression by ignoring sequence RLE.
|
// fast enables faster compression by ignoring sequence RLE.
|
||||||
rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
||||||
rle_t candidate = { 0, 0, 0 };
|
rle_t candidate = { 0, 0, 0, 0 };
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
// check for possible 8-bit RLE
|
// check for possible 8-bit RLE
|
||||||
|
@ -367,8 +427,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.data = current[0];
|
candidate.data = current[0];
|
||||||
candidate.method = rle_8;
|
candidate.method = rle_8;
|
||||||
|
|
||||||
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for possible 16-bit RLE
|
// check for possible 16-bit RLE
|
||||||
|
@ -384,8 +442,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.data = first;
|
candidate.data = first;
|
||||||
candidate.method = rle_16;
|
candidate.method = rle_16;
|
||||||
|
|
||||||
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// fast mode: don't use sequence RLE
|
// fast mode: don't use sequence RLE
|
||||||
|
@ -401,8 +457,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.data = current[0];
|
candidate.data = current[0];
|
||||||
candidate.method = rle_seq;
|
candidate.method = rle_seq;
|
||||||
|
|
||||||
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return candidate;
|
return candidate;
|
||||||
|
@ -412,7 +466,7 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
|
||||||
// start and current are positions within the uncompressed input stream.
|
// start and current are positions within the uncompressed input stream.
|
||||||
// fast enables fast mode which only uses regular forward references
|
// fast enables fast mode which only uses regular forward references
|
||||||
backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t *offsets, int fast) {
|
backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t *offsets, int fast) {
|
||||||
backref_t candidate = { 0, 0, 0 };
|
backref_t candidate = { 0, 0, 0, 0 };
|
||||||
uint16_t size;
|
uint16_t size;
|
||||||
int currbytes;
|
int currbytes;
|
||||||
tuple_t *tuple;
|
tuple_t *tuple;
|
||||||
|
@ -433,8 +487,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.offset = pos - start;
|
candidate.offset = pos - start;
|
||||||
candidate.method = lz_norm;
|
candidate.method = lz_norm;
|
||||||
|
|
||||||
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -456,8 +508,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.offset = pos - start;
|
candidate.offset = pos - start;
|
||||||
candidate.method = lz_rot;
|
candidate.method = lz_rot;
|
||||||
|
|
||||||
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -477,8 +527,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
|
||||||
candidate.size = size;
|
candidate.size = size;
|
||||||
candidate.offset = pos - start;
|
candidate.offset = pos - start;
|
||||||
candidate.method = lz_rev;
|
candidate.method = lz_rev;
|
||||||
|
|
||||||
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -491,8 +539,6 @@ uint16_t write_backref (uint8_t *out, uint16_t outpos, backref_t backref) {
|
||||||
uint16_t size = backref.size - 1;
|
uint16_t size = backref.size - 1;
|
||||||
int outsize;
|
int outsize;
|
||||||
|
|
||||||
debug("write_backref: writing backref to %4x, size %d (method %d)\n", backref.offset, backref.size, backref.method);
|
|
||||||
|
|
||||||
// long run
|
// long run
|
||||||
if (size >= RUN_SIZE) {
|
if (size >= RUN_SIZE) {
|
||||||
// write command byte / MSB of size
|
// write command byte / MSB of size
|
||||||
|
@ -528,8 +574,6 @@ uint16_t write_rle (uint8_t *out, uint16_t outpos, rle_t rle) {
|
||||||
else
|
else
|
||||||
size = rle.size - 1;
|
size = rle.size - 1;
|
||||||
|
|
||||||
debug("write_rle: writing %d bytes of data 0x%02x (method %d)\n", rle.size, rle.data, rle.method);
|
|
||||||
|
|
||||||
// long run
|
// long run
|
||||||
if (size >= RUN_SIZE) {
|
if (size >= RUN_SIZE) {
|
||||||
// write command byte / MSB of size
|
// write command byte / MSB of size
|
||||||
|
@ -562,14 +606,6 @@ uint16_t write_rle (uint8_t *out, uint16_t outpos, rle_t rle) {
|
||||||
uint16_t write_raw (uint8_t *out, uint16_t outpos, uint8_t *in, uint16_t insize) {
|
uint16_t write_raw (uint8_t *out, uint16_t outpos, uint8_t *in, uint16_t insize) {
|
||||||
if (!insize) return 0;
|
if (!insize) return 0;
|
||||||
|
|
||||||
#ifdef DEBUG_OUT
|
|
||||||
printf("write_raw: writing %d bytes unpacked data: ", insize);
|
|
||||||
for (int i = 0; i < insize; i++)
|
|
||||||
printf("%02x ", in[i]);
|
|
||||||
|
|
||||||
printf("\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
uint16_t size = insize - 1;
|
uint16_t size = insize - 1;
|
||||||
int outsize;
|
int outsize;
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@ extern "C" {
|
||||||
#define RUN_SIZE 32
|
#define RUN_SIZE 32
|
||||||
#define LONG_RUN_SIZE 1024
|
#define LONG_RUN_SIZE 1024
|
||||||
|
|
||||||
|
size_t pack_la(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast, uint16_t lookahead);
|
||||||
size_t pack (uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast);
|
size_t pack (uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast);
|
||||||
size_t unpack (uint8_t *packed, uint8_t *unpacked);
|
size_t unpack (uint8_t *packed, uint8_t *unpacked);
|
||||||
|
|
||||||
|
|
28
inhal.c
28
inhal.c
|
@ -37,12 +37,16 @@ int main (int argc, char **argv) {
|
||||||
|
|
||||||
if (argc < 4) {
|
if (argc < 4) {
|
||||||
fprintf(stderr, "To insert compressed data into a ROM:\n"
|
fprintf(stderr, "To insert compressed data into a ROM:\n"
|
||||||
"%s [-fast] infile romfile offset\n"
|
"%s [-fast] [-la n] infile romfile offset\n"
|
||||||
|
|
||||||
"To write compressed data to a new file:\n"
|
"To write compressed data to a new file:\n"
|
||||||
"%s [-fast] -n infile outfile\n\n"
|
"%s [-fast] [-la n] -n infile outfile\n\n"
|
||||||
|
|
||||||
"Running with the -fast switch increases compression speed at the expense of size.\n"
|
"Running with the -fast switch speeds up compression at the expense of size.\n"
|
||||||
|
"Running with the -la switch (where 'n' is an integer) will \"look ahead\" up to\n"
|
||||||
|
"n bytes into the input stream, for cases in which compressing less aggressively\n"
|
||||||
|
"might yield smaller output. This can improve compression at the expense of\n"
|
||||||
|
"speed, and is not usually necessary.\n"
|
||||||
|
|
||||||
"\nExample:\n%s -fast test.chr kirbybowl.sfc 0x70000\n"
|
"\nExample:\n%s -fast test.chr kirbybowl.sfc 0x70000\n"
|
||||||
"%s -n test.chr test-packed.bin\n\n"
|
"%s -n test.chr test-packed.bin\n\n"
|
||||||
|
@ -52,19 +56,29 @@ int main (int argc, char **argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *infile, *outfile;
|
FILE *infile, *outfile;
|
||||||
int fileoffset;
|
int fileoffset;
|
||||||
int newfile = 0;
|
int newfile = 0;
|
||||||
int fast = 0;
|
int fast = 0;
|
||||||
|
uint16_t lookahead = 0;
|
||||||
|
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
if (!strcmp(argv[i], "-n"))
|
if (!strcmp(argv[i], "-n"))
|
||||||
newfile = 1;
|
newfile = 1;
|
||||||
else if (!strcmp(argv[i], "-fast"))
|
else if (!strcmp(argv[i], "-fast"))
|
||||||
fast = 1;
|
fast = 1;
|
||||||
|
else if (!strcmp(argv[i], "-la"))
|
||||||
|
lookahead = (uint16_t)atoi(argv[++i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fast)
|
if (fast)
|
||||||
printf("Fast compression enabled.\n");
|
printf("Fast compression enabled.\n");
|
||||||
|
|
||||||
|
// limit lookahead to 3 bytes since anything more doesn't really have useful results
|
||||||
|
// (due to some properties of the compression format itself)
|
||||||
|
if (lookahead > 3)
|
||||||
|
lookahead = 3;
|
||||||
|
if (lookahead)
|
||||||
|
printf("%u-byte look-ahead enabled.\n", lookahead);
|
||||||
|
|
||||||
// check for -n switch
|
// check for -n switch
|
||||||
if (newfile) {
|
if (newfile) {
|
||||||
|
@ -114,7 +128,7 @@ int main (int argc, char **argv) {
|
||||||
|
|
||||||
// compress the file
|
// compress the file
|
||||||
clock_t time = clock();
|
clock_t time = clock();
|
||||||
outputsize = pack(unpacked, inputsize, packed, fast);
|
outputsize = pack_la(unpacked, inputsize, packed, fast, lookahead);
|
||||||
time = clock() - time;
|
time = clock() - time;
|
||||||
|
|
||||||
if (outputsize) {
|
if (outputsize) {
|
||||||
|
|
Loading…
Reference in a new issue