alpar@9: /* inffast.c -- fast decoding alpar@9: * Copyright (C) 1995-2008, 2010 Mark Adler alpar@9: * For conditions of distribution and use, see copyright notice in zlib.h alpar@9: */ alpar@9: alpar@9: #include "zutil.h" alpar@9: #include "inftrees.h" alpar@9: #include "inflate.h" alpar@9: #include "inffast.h" alpar@9: alpar@9: #ifndef ASMINF alpar@9: alpar@9: /* Allow machine dependent optimization for post-increment or pre-increment. alpar@9: Based on testing to date, alpar@9: Pre-increment preferred for: alpar@9: - PowerPC G3 (Adler) alpar@9: - MIPS R5000 (Randers-Pehrson) alpar@9: Post-increment preferred for: alpar@9: - none alpar@9: No measurable difference: alpar@9: - Pentium III (Anderson) alpar@9: - M68060 (Nikl) alpar@9: */ alpar@9: #ifdef POSTINC alpar@9: # define OFF 0 alpar@9: # define PUP(a) *(a)++ alpar@9: #else alpar@9: # define OFF 1 alpar@9: # define PUP(a) *++(a) alpar@9: #endif alpar@9: alpar@9: /* alpar@9: Decode literal, length, and distance codes and write out the resulting alpar@9: literal and match bytes until either not enough input or output is alpar@9: available, an end-of-block is encountered, or a data error is encountered. alpar@9: When large enough input and output buffers are supplied to inflate(), for alpar@9: example, a 16K input buffer and a 64K output buffer, more than 95% of the alpar@9: inflate execution time is spent in this routine. alpar@9: alpar@9: Entry assumptions: alpar@9: alpar@9: state->mode == LEN alpar@9: strm->avail_in >= 6 alpar@9: strm->avail_out >= 258 alpar@9: start >= strm->avail_out alpar@9: state->bits < 8 alpar@9: alpar@9: On return, state->mode is one of: alpar@9: alpar@9: LEN -- ran out of enough output space or enough available input alpar@9: TYPE -- reached end of block code, inflate() to interpret next block alpar@9: BAD -- error in block data alpar@9: alpar@9: Notes: alpar@9: alpar@9: - The maximum input bits used by a length/distance pair is 15 bits for the alpar@9: length code, 5 bits for the length extra, 15 bits for the distance code, alpar@9: and 13 bits for the distance extra. This totals 48 bits, or six bytes. alpar@9: Therefore if strm->avail_in >= 6, then there is enough input to avoid alpar@9: checking for available input while decoding. alpar@9: alpar@9: - The maximum bytes that a single length/distance pair can output is 258 alpar@9: bytes, which is the maximum length that can be coded. inflate_fast() alpar@9: requires strm->avail_out >= 258 for each loop to avoid checking for alpar@9: output space. alpar@9: */ alpar@9: void ZLIB_INTERNAL inflate_fast(strm, start) alpar@9: z_streamp strm; alpar@9: unsigned start; /* inflate()'s starting value for strm->avail_out */ alpar@9: { alpar@9: struct inflate_state FAR *state; alpar@9: unsigned char FAR *in; /* local strm->next_in */ alpar@9: unsigned char FAR *last; /* while in < last, enough input available */ alpar@9: unsigned char FAR *out; /* local strm->next_out */ alpar@9: unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ alpar@9: unsigned char FAR *end; /* while out < end, enough space available */ alpar@9: #ifdef INFLATE_STRICT alpar@9: unsigned dmax; /* maximum distance from zlib header */ alpar@9: #endif alpar@9: unsigned wsize; /* window size or zero if not using window */ alpar@9: unsigned whave; /* valid bytes in the window */ alpar@9: unsigned wnext; /* window write index */ alpar@9: unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ alpar@9: unsigned long hold; /* local strm->hold */ alpar@9: unsigned bits; /* local strm->bits */ alpar@9: code const FAR *lcode; /* local strm->lencode */ alpar@9: code const FAR *dcode; /* local strm->distcode */ alpar@9: unsigned lmask; /* mask for first level of length codes */ alpar@9: unsigned dmask; /* mask for first level of distance codes */ alpar@9: code here; /* retrieved table entry */ alpar@9: unsigned op; /* code bits, operation, extra bits, or */ alpar@9: /* window position, window bytes to copy */ alpar@9: unsigned len; /* match length, unused bytes */ alpar@9: unsigned dist; /* match distance */ alpar@9: unsigned char FAR *from; /* where to copy match from */ alpar@9: alpar@9: /* copy state to local variables */ alpar@9: state = (struct inflate_state FAR *)strm->state; alpar@9: in = strm->next_in - OFF; alpar@9: last = in + (strm->avail_in - 5); alpar@9: out = strm->next_out - OFF; alpar@9: beg = out - (start - strm->avail_out); alpar@9: end = out + (strm->avail_out - 257); alpar@9: #ifdef INFLATE_STRICT alpar@9: dmax = state->dmax; alpar@9: #endif alpar@9: wsize = state->wsize; alpar@9: whave = state->whave; alpar@9: wnext = state->wnext; alpar@9: window = state->window; alpar@9: hold = state->hold; alpar@9: bits = state->bits; alpar@9: lcode = state->lencode; alpar@9: dcode = state->distcode; alpar@9: lmask = (1U << state->lenbits) - 1; alpar@9: dmask = (1U << state->distbits) - 1; alpar@9: alpar@9: /* decode literals and length/distances until end-of-block or not enough alpar@9: input data or output space */ alpar@9: do { alpar@9: if (bits < 15) { alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: } alpar@9: here = lcode[hold & lmask]; alpar@9: dolen: alpar@9: op = (unsigned)(here.bits); alpar@9: hold >>= op; alpar@9: bits -= op; alpar@9: op = (unsigned)(here.op); alpar@9: if (op == 0) { /* literal */ alpar@9: Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? alpar@9: "inflate: literal '%c'\n" : alpar@9: "inflate: literal 0x%02x\n", here.val)); alpar@9: PUP(out) = (unsigned char)(here.val); alpar@9: } alpar@9: else if (op & 16) { /* length base */ alpar@9: len = (unsigned)(here.val); alpar@9: op &= 15; /* number of extra bits */ alpar@9: if (op) { alpar@9: if (bits < op) { alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: } alpar@9: len += (unsigned)hold & ((1U << op) - 1); alpar@9: hold >>= op; alpar@9: bits -= op; alpar@9: } alpar@9: Tracevv((stderr, "inflate: length %u\n", len)); alpar@9: if (bits < 15) { alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: } alpar@9: here = dcode[hold & dmask]; alpar@9: dodist: alpar@9: op = (unsigned)(here.bits); alpar@9: hold >>= op; alpar@9: bits -= op; alpar@9: op = (unsigned)(here.op); alpar@9: if (op & 16) { /* distance base */ alpar@9: dist = (unsigned)(here.val); alpar@9: op &= 15; /* number of extra bits */ alpar@9: if (bits < op) { alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: if (bits < op) { alpar@9: hold += (unsigned long)(PUP(in)) << bits; alpar@9: bits += 8; alpar@9: } alpar@9: } alpar@9: dist += (unsigned)hold & ((1U << op) - 1); alpar@9: #ifdef INFLATE_STRICT alpar@9: if (dist > dmax) { alpar@9: strm->msg = (char *)"invalid distance too far back"; alpar@9: state->mode = BAD; alpar@9: break; alpar@9: } alpar@9: #endif alpar@9: hold >>= op; alpar@9: bits -= op; alpar@9: Tracevv((stderr, "inflate: distance %u\n", dist)); alpar@9: op = (unsigned)(out - beg); /* max distance in output */ alpar@9: if (dist > op) { /* see if copy from window */ alpar@9: op = dist - op; /* distance back in window */ alpar@9: if (op > whave) { alpar@9: if (state->sane) { alpar@9: strm->msg = alpar@9: (char *)"invalid distance too far back"; alpar@9: state->mode = BAD; alpar@9: break; alpar@9: } alpar@9: #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR alpar@9: if (len <= op - whave) { alpar@9: do { alpar@9: PUP(out) = 0; alpar@9: } while (--len); alpar@9: continue; alpar@9: } alpar@9: len -= op - whave; alpar@9: do { alpar@9: PUP(out) = 0; alpar@9: } while (--op > whave); alpar@9: if (op == 0) { alpar@9: from = out - dist; alpar@9: do { alpar@9: PUP(out) = PUP(from); alpar@9: } while (--len); alpar@9: continue; alpar@9: } alpar@9: #endif alpar@9: } alpar@9: from = window - OFF; alpar@9: if (wnext == 0) { /* very common case */ alpar@9: from += wsize - op; alpar@9: if (op < len) { /* some from window */ alpar@9: len -= op; alpar@9: do { alpar@9: PUP(out) = PUP(from); alpar@9: } while (--op); alpar@9: from = out - dist; /* rest from output */ alpar@9: } alpar@9: } alpar@9: else if (wnext < op) { /* wrap around window */ alpar@9: from += wsize + wnext - op; alpar@9: op -= wnext; alpar@9: if (op < len) { /* some from end of window */ alpar@9: len -= op; alpar@9: do { alpar@9: PUP(out) = PUP(from); alpar@9: } while (--op); alpar@9: from = window - OFF; alpar@9: if (wnext < len) { /* some from start of window */ alpar@9: op = wnext; alpar@9: len -= op; alpar@9: do { alpar@9: PUP(out) = PUP(from); alpar@9: } while (--op); alpar@9: from = out - dist; /* rest from output */ alpar@9: } alpar@9: } alpar@9: } alpar@9: else { /* contiguous in window */ alpar@9: from += wnext - op; alpar@9: if (op < len) { /* some from window */ alpar@9: len -= op; alpar@9: do { alpar@9: PUP(out) = PUP(from); alpar@9: } while (--op); alpar@9: from = out - dist; /* rest from output */ alpar@9: } alpar@9: } alpar@9: while (len > 2) { alpar@9: PUP(out) = PUP(from); alpar@9: PUP(out) = PUP(from); alpar@9: PUP(out) = PUP(from); alpar@9: len -= 3; alpar@9: } alpar@9: if (len) { alpar@9: PUP(out) = PUP(from); alpar@9: if (len > 1) alpar@9: PUP(out) = PUP(from); alpar@9: } alpar@9: } alpar@9: else { alpar@9: from = out - dist; /* copy direct from output */ alpar@9: do { /* minimum length is three */ alpar@9: PUP(out) = PUP(from); alpar@9: PUP(out) = PUP(from); alpar@9: PUP(out) = PUP(from); alpar@9: len -= 3; alpar@9: } while (len > 2); alpar@9: if (len) { alpar@9: PUP(out) = PUP(from); alpar@9: if (len > 1) alpar@9: PUP(out) = PUP(from); alpar@9: } alpar@9: } alpar@9: } alpar@9: else if ((op & 64) == 0) { /* 2nd level distance code */ alpar@9: here = dcode[here.val + (hold & ((1U << op) - 1))]; alpar@9: goto dodist; alpar@9: } alpar@9: else { alpar@9: strm->msg = (char *)"invalid distance code"; alpar@9: state->mode = BAD; alpar@9: break; alpar@9: } alpar@9: } alpar@9: else if ((op & 64) == 0) { /* 2nd level length code */ alpar@9: here = lcode[here.val + (hold & ((1U << op) - 1))]; alpar@9: goto dolen; alpar@9: } alpar@9: else if (op & 32) { /* end-of-block */ alpar@9: Tracevv((stderr, "inflate: end of block\n")); alpar@9: state->mode = TYPE; alpar@9: break; alpar@9: } alpar@9: else { alpar@9: strm->msg = (char *)"invalid literal/length code"; alpar@9: state->mode = BAD; alpar@9: break; alpar@9: } alpar@9: } while (in < last && out < end); alpar@9: alpar@9: /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ alpar@9: len = bits >> 3; alpar@9: in -= len; alpar@9: bits -= len << 3; alpar@9: hold &= (1U << bits) - 1; alpar@9: alpar@9: /* update state and return */ alpar@9: strm->next_in = in + OFF; alpar@9: strm->next_out = out + OFF; alpar@9: strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); alpar@9: strm->avail_out = (unsigned)(out < end ? alpar@9: 257 + (end - out) : 257 - (out - end)); alpar@9: state->hold = hold; alpar@9: state->bits = bits; alpar@9: return; alpar@9: } alpar@9: alpar@9: /* alpar@9: inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): alpar@9: - Using bit fields for code structure alpar@9: - Different op definition to avoid & for extra bits (do & for table bits) alpar@9: - Three separate decoding do-loops for direct, window, and wnext == 0 alpar@9: - Special case for distance > 1 copies to do overlapped load and store copy alpar@9: - Explicit branch predictions (based on measured branch probabilities) alpar@9: - Deferring match copy and interspersed it with decoding subsequent codes alpar@9: - Swapping literal/length else alpar@9: - Swapping window/direct else alpar@9: - Larger unrolled copy loops (three is about right) alpar@9: - Moving len -= 3 statement into middle of loop alpar@9: */ alpar@9: alpar@9: #endif /* !ASMINF */