some experimental space-optimization

This commit is contained in:
devinacker 2014-12-20 06:27:35 -05:00
parent ab1823487b
commit 1062bda5c3
3 changed files with 109 additions and 58 deletions

View file

@ -50,13 +50,13 @@ typedef enum {
// used to store and compare backref candidates
typedef struct {
uint16_t offset, size;
uint16_t offset, size, ahead;
method_e method;
} backref_t;
// used to store RLE candidates
typedef struct {
uint16_t size, data;
uint16_t size, data, ahead;
method_e method;
} rle_t;
@ -81,7 +81,11 @@ void free_offsets(tuple_t*);
// unpacked/packed are 65536 byte buffers to read/from write to,
// inputsize is the length of the uncompressed data.
// Returns the size of the compressed data in bytes, or 0 if compression failed.
size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
return pack_la(unpacked, inputsize, packed, fast, 0);
}
size_t pack_la(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast, uint16_t lookahead) {
if (inputsize > DATA_SIZE) return 0;
// current input/output positions
@ -89,8 +93,8 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
uint32_t outpos = 0;
// backref and RLE compression candidates
backref_t backref;
rle_t rle;
backref_t backref, backref_ahead;
rle_t rle, rle_ahead;
// used to collect data which should be written uncompressed
uint8_t dontpack[LONG_RUN_SIZE];
@ -116,21 +120,61 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
}
while (inpos < inputsize) {
if (lookahead > (inputsize - inpos))
lookahead = inputsize - inpos;
// check for a potential RLE
rle = rle_check(unpacked, unpacked + inpos, inputsize, fast);
for (unsigned la = 1; la <= lookahead; la++) {
rle_ahead = rle_check(unpacked, unpacked + inpos + la, inputsize, fast);
rle_ahead.ahead = la;
if (rle_ahead.size - rle_ahead.ahead > rle.size - rle.ahead) {
debug("lookahead found new best RLE (%u bytes at %x -> %u bytes at %x)\n",
rle.size, inpos + rle.ahead, rle_ahead.size, inpos + la);
rle = rle_ahead;
}
}
// check for a potential back reference
if (rle.size < LONG_RUN_SIZE && inpos < inputsize - 3)
if (rle.size < LONG_RUN_SIZE && inpos < inputsize - 3) {
backref = ref_search(unpacked, unpacked + inpos, inputsize, offsets, fast);
else backref.size = 0;
for (unsigned la = 1; la <= lookahead; la++) {
backref_ahead = ref_search(unpacked, unpacked + inpos + la, inputsize, offsets, fast);
backref_ahead.ahead = la;
if (backref_ahead.size - backref_ahead.ahead > backref.size - backref.ahead) {
debug("lookahead found new best LZ (%u bytes at %x -> %u bytes at %x)\n",
backref.size, inpos + backref.ahead, backref_ahead.size, inpos + la);
backref = backref_ahead;
}
}
}
else {
backref.size = 0;
backref.ahead = 0;
}
// if the backref is a better candidate, use it
if (backref.size > 3 && backref.size > rle.size) {
if (outpos + dontpacksize + backref.size >= DATA_SIZE) {
if (backref.size - backref.ahead > 3 && (backref.size - backref.ahead) > (rle.size - rle.ahead)) {
if (outpos + dontpacksize + backref.size + backref.ahead >= DATA_SIZE) {
free_offsets(offsets);
return 0;
}
// write any bytes that were skipped by lookahead
for (unsigned la = 0; la < backref.ahead; la++) {
dontpack[dontpacksize++] = unpacked[inpos++];
// if the raw data buffer is full, flush it
if (dontpacksize == LONG_RUN_SIZE) {
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
}
}
// flush the raw data buffer first
// flush the raw data buffer
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
@ -138,13 +182,24 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
inpos += backref.size;
}
// or if the RLE is a better candidate, use it instead
else if (rle.size >= 2) {
if (outpos + dontpacksize + rle.size >= DATA_SIZE) {
else if (rle.size - rle.ahead >= 2) {
if (outpos + dontpacksize + rle.size + rle.ahead >= DATA_SIZE) {
free_offsets(offsets);
return 0;
}
// write any bytes that were skipped by lookahead
for (unsigned la = 0; la < rle.ahead; la++) {
dontpack[dontpacksize++] = unpacked[inpos++];
// if the raw data buffer is full, flush it
if (dontpacksize == LONG_RUN_SIZE) {
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
}
}
// flush the raw data buffer first
// flush the raw data buffer
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
@ -152,19 +207,24 @@ size_t pack(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) {
inpos += rle.size;
}
// otherwise, write this byte uncompressed
// otherwise, write any bytes uncompressed that we know aren't usable
else {
dontpack[dontpacksize++] = unpacked[inpos++];
if (outpos + dontpacksize >= DATA_SIZE) {
free_offsets(offsets);
return 0;
}
// if the raw data buffer is full, flush it
if (dontpacksize == LONG_RUN_SIZE) {
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
unsigned dp = (rle.ahead < backref.ahead) ? rle.ahead : backref.ahead;
if (!dp) dp++;
for (unsigned i = 0; i < dp; i++) {
dontpack[dontpacksize++] = unpacked[inpos++];
if (outpos + dontpacksize >= DATA_SIZE) {
free_offsets(offsets);
return 0;
}
// if the raw data buffer is full, flush it
if (dontpacksize == LONG_RUN_SIZE) {
outpos += write_raw(packed, outpos, dontpack, dontpacksize);
dontpacksize = 0;
}
}
}
}
@ -354,7 +414,7 @@ uint8_t rotate (uint8_t i) {
// start and current are positions within the uncompressed input stream.
// fast enables faster compression by ignoring sequence RLE.
rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
rle_t candidate = { 0, 0, 0 };
rle_t candidate = { 0, 0, 0, 0 };
size_t size;
// check for possible 8-bit RLE
@ -367,8 +427,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
candidate.size = size;
candidate.data = current[0];
candidate.method = rle_8;
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
}
// check for possible 16-bit RLE
@ -384,8 +442,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
candidate.size = size;
candidate.data = first;
candidate.method = rle_16;
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
}
// fast mode: don't use sequence RLE
@ -401,8 +457,6 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
candidate.size = size;
candidate.data = current[0];
candidate.method = rle_seq;
debug("\trle_check: found new candidate (size = %d, method = %d)\n", candidate.size, candidate.method);
}
return candidate;
@ -412,7 +466,7 @@ rle_t rle_check (uint8_t *start, uint8_t *current, uint32_t insize, int fast) {
// start and current are positions within the uncompressed input stream.
// fast enables fast mode which only uses regular forward references
backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t *offsets, int fast) {
backref_t candidate = { 0, 0, 0 };
backref_t candidate = { 0, 0, 0, 0 };
uint16_t size;
int currbytes;
tuple_t *tuple;
@ -433,8 +487,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
candidate.size = size;
candidate.offset = pos - start;
candidate.method = lz_norm;
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
}
}
@ -456,8 +508,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
candidate.size = size;
candidate.offset = pos - start;
candidate.method = lz_rot;
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
}
}
@ -477,8 +527,6 @@ backref_t ref_search (uint8_t *start, uint8_t *current, uint32_t insize, tuple_t
candidate.size = size;
candidate.offset = pos - start;
candidate.method = lz_rev;
debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", candidate.offset, candidate.size, candidate.method);
}
}
@ -491,8 +539,6 @@ uint16_t write_backref (uint8_t *out, uint16_t outpos, backref_t backref) {
uint16_t size = backref.size - 1;
int outsize;
debug("write_backref: writing backref to %4x, size %d (method %d)\n", backref.offset, backref.size, backref.method);
// long run
if (size >= RUN_SIZE) {
// write command byte / MSB of size
@ -528,8 +574,6 @@ uint16_t write_rle (uint8_t *out, uint16_t outpos, rle_t rle) {
else
size = rle.size - 1;
debug("write_rle: writing %d bytes of data 0x%02x (method %d)\n", rle.size, rle.data, rle.method);
// long run
if (size >= RUN_SIZE) {
// write command byte / MSB of size
@ -562,14 +606,6 @@ uint16_t write_rle (uint8_t *out, uint16_t outpos, rle_t rle) {
uint16_t write_raw (uint8_t *out, uint16_t outpos, uint8_t *in, uint16_t insize) {
if (!insize) return 0;
#ifdef DEBUG_OUT
printf("write_raw: writing %d bytes unpacked data: ", insize);
for (int i = 0; i < insize; i++)
printf("%02x ", in[i]);
printf("\n");
#endif
uint16_t size = insize - 1;
int outsize;

View file

@ -38,6 +38,7 @@ extern "C" {
#define RUN_SIZE 32
#define LONG_RUN_SIZE 1024
size_t pack_la(uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast, uint16_t lookahead);
size_t pack (uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast);
size_t unpack (uint8_t *packed, uint8_t *unpacked);

28
inhal.c
View file

@ -37,12 +37,16 @@ int main (int argc, char **argv) {
if (argc < 4) {
fprintf(stderr, "To insert compressed data into a ROM:\n"
"%s [-fast] infile romfile offset\n"
"%s [-fast] [-la n] infile romfile offset\n"
"To write compressed data to a new file:\n"
"%s [-fast] -n infile outfile\n\n"
"%s [-fast] [-la n] -n infile outfile\n\n"
"Running with the -fast switch increases compression speed at the expense of size.\n"
"Running with the -fast switch speeds up compression at the expense of size.\n"
"Running with the -la switch (where 'n' is an integer) will \"look ahead\" up to\n"
"n bytes into the input stream, for cases in which compressing less aggressively\n"
"might yield smaller output. This can improve compression at the expense of\n"
"speed, and is not usually necessary.\n"
"\nExample:\n%s -fast test.chr kirbybowl.sfc 0x70000\n"
"%s -n test.chr test-packed.bin\n\n"
@ -52,19 +56,29 @@ int main (int argc, char **argv) {
}
FILE *infile, *outfile;
int fileoffset;
int newfile = 0;
int fast = 0;
int fileoffset;
int newfile = 0;
int fast = 0;
uint16_t lookahead = 0;
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-n"))
newfile = 1;
else if (!strcmp(argv[i], "-fast"))
fast = 1;
else if (!strcmp(argv[i], "-la"))
lookahead = (uint16_t)atoi(argv[++i]);
}
if (fast)
printf("Fast compression enabled.\n");
// limit lookahead to 3 bytes since anything more doesn't really have useful results
// (due to some properties of the compression format itself)
if (lookahead > 3)
lookahead = 3;
if (lookahead)
printf("%u-byte look-ahead enabled.\n", lookahead);
// check for -n switch
if (newfile) {
@ -114,7 +128,7 @@ int main (int argc, char **argv) {
// compress the file
clock_t time = clock();
outputsize = pack(unpacked, inputsize, packed, fast);
outputsize = pack_la(unpacked, inputsize, packed, fast, lookahead);
time = clock() - time;
if (outputsize) {