From ac8f12c97d1afd9bafa9c710f827d40a407d3266 Mon Sep 17 00:00:00 2001 From: Mark Adler Date: Mon, 18 Sep 2023 21:17:00 -0700 Subject: [PATCH] Add LIT_MEM define to use more memory for a small deflate speedup. A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of deflate. This commit provides the option to #define LIT_MEM, which uses more memory to reverse most of that slowdown. The memory for the pending buffer and symbol buffers is increased by 25%, which increases the total memory usage with the default parameters by about 6%. --- deflate.c | 21 +++++++++++++++++++++ deflate.h | 31 ++++++++++++++++++++++++++++++- trees.c | 18 ++++++++++++++++-- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/deflate.c b/deflate.c index 5e72cd926..263bbc8d4 100644 --- a/deflate.c +++ b/deflate.c @@ -493,7 +493,11 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, * symbols from which it is being constructed. */ +#ifdef LIT_MEM + s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 5); +#else s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); +#endif s->pending_buf_size = (ulg)s->lit_bufsize * 4; if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || @@ -503,8 +507,14 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, deflateEnd (strm); return Z_MEM_ERROR; } +#ifdef LIT_MEM + s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else s->sym_buf = s->pending_buf + s->lit_bufsize; s->sym_end = (s->lit_bufsize - 1) * 3; +#endif /* We avoid equality with lit_bufsize*3 because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. @@ -720,9 +730,15 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; +#ifdef LIT_MEM + if (bits < 0 || bits > 16 || + (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; +#else if (bits < 0 || bits > 16 || s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR; +#endif do { put = Buf_size - s->bi_valid; if (put > bits) @@ -1308,7 +1324,12 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +#ifdef LIT_MEM + ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; diff --git a/deflate.h b/deflate.h index 869679142..69fe3a934 100644 --- a/deflate.h +++ b/deflate.h @@ -23,6 +23,10 @@ # define GZIP #endif +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +/* #define LIT_MEM */ + /* =========================================================================== * Internal compression state. */ @@ -217,7 +221,12 @@ typedef struct internal_state { /* Depth of each subtree used as tie breaker for trees of equal frequency */ +#ifdef LIT_MEM + ushf *d_buf; /* buffer for distances */ + uchf *l_buf; /* buffer for literals/lengths */ +#else uchf *sym_buf; /* buffer for distances and literals/lengths */ +#endif uInt lit_bufsize; /* Size of match buffer for literals/lengths. There are 4 reasons for @@ -239,7 +248,7 @@ typedef struct internal_state { * - I can't count above 4 */ - uInt sym_next; /* running index in sym_buf */ + uInt sym_next; /* running index in symbol buffer */ uInt sym_end; /* symbol table full when sym_next reaches this */ ulg opt_len; /* bit length of current block with optimal trees */ @@ -318,6 +327,25 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, extern const uch ZLIB_INTERNAL _dist_code[]; #endif +#ifdef LIT_MEM +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->sym_next] = 0; \ + s->l_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->sym_next] = dist; \ + s->l_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +#else # define _tr_tally_lit(s, c, flush) \ { uch cc = (c); \ s->sym_buf[s->sym_next++] = 0; \ @@ -337,6 +365,7 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, s->dyn_dtree[d_code(dist)].Freq++; \ flush = (s->sym_next == s->sym_end); \ } +#endif #else # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) # define _tr_tally_dist(s, distance, length, flush) \ diff --git a/trees.c b/trees.c index 8dbdc40ba..5ca23e938 100644 --- a/trees.c +++ b/trees.c @@ -899,14 +899,19 @@ local void compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree) { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned sx = 0; /* running index in sym_buf */ + unsigned sx = 0; /* running index in symbol buffers */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ if (s->sym_next != 0) do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else dist = s->sym_buf[sx++] & 0xff; dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; lc = s->sym_buf[sx++]; +#endif if (dist == 0) { send_code(s, lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); @@ -931,8 +936,12 @@ local void compress_block(deflate_state *s, const ct_data *ltree, } } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and sym_buf is ok: */ + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < (s->lit_bufsize << 1) + sx, "pendingBuf overflow"); +#else Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); +#endif } while (sx < s->sym_next); @@ -1082,9 +1091,14 @@ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, * the current block must be flushed. */ int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { +#ifdef LIT_MEM + s->d_buf[s->sym_next] = (ush)dist; + s->l_buf[s->sym_next++] = (uch)lc; +#else s->sym_buf[s->sym_next++] = (uch)dist; s->sym_buf[s->sym_next++] = (uch)(dist >> 8); s->sym_buf[s->sym_next++] = (uch)lc; +#endif if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++;