Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(211)

Side by Side Diff: third_party/zlib/contrib/arm/inffast.c

Issue 2722063002: zlib: inflate using wider loads and stores
Patch Set: zlib: inflate using wider loads and stores Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/zlib/contrib/arm/chunkcopy.h ('k') | third_party/zlib/contrib/arm/inflate.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* inffast.c -- fast decoding
2 * Copyright (C) 1995-2017 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #include "zutil.h"
7 #include "inftrees.h"
8 #include "inflate.h"
9 #include "inffast.h"
10 #include "chunkcopy.h"
11
12 #ifdef ASMINF
13 # pragma message("Assembler code may have bugs -- use at your own risk")
14 #else
15
16 /*
17 Decode literal, length, and distance codes and write out the resulting
18 literal and match bytes until either not enough input or output is
19 available, an end-of-block is encountered, or a data error is encountered.
20 When large enough input and output buffers are supplied to inflate(), for
21 example, a 16K input buffer and a 64K output buffer, more than 95% of the
22 inflate execution time is spent in this routine.
23
24 Entry assumptions:
25
26 state->mode == LEN
27 strm->avail_in >= 6
28 strm->avail_out >= 258
29 start >= strm->avail_out
30 state->bits < 8
31
32 On return, state->mode is one of:
33
34 LEN -- ran out of enough output space or enough available input
35 TYPE -- reached end of block code, inflate() to interpret next block
36 BAD -- error in block data
37
38 Notes:
39
40 - The maximum input bits used by a length/distance pair is 15 bits for the
41 length code, 5 bits for the length extra, 15 bits for the distance code,
42 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
43 Therefore if strm->avail_in >= 6, then there is enough input to avoid
44 checking for available input while decoding.
45
46 - The maximum bytes that a single length/distance pair can output is 258
47 bytes, which is the maximum length that can be coded. inflate_fast()
48 requires strm->avail_out >= 258 for each loop to avoid checking for
49 output space.
50 */
51 void ZLIB_INTERNAL inflate_fast(strm, start)
52 z_streamp strm;
53 unsigned start; /* inflate()'s starting value for strm->avail_out */
54 {
55 struct inflate_state FAR *state;
56 z_const unsigned char FAR *in; /* local strm->next_in */
57 z_const unsigned char FAR *last; /* have enough input while in < last */
58 unsigned char FAR *out; /* local strm->next_out */
59 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
60 unsigned char FAR *end; /* while out < end, enough space available */
61 unsigned char FAR *limit; /* safety limit for chunky copies */
62 #ifdef INFLATE_STRICT
63 unsigned dmax; /* maximum distance from zlib header */
64 #endif
65 unsigned wsize; /* window size or zero if not using window */
66 unsigned whave; /* valid bytes in the window */
67 unsigned wnext; /* window write index */
68 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
69 unsigned long hold; /* local strm->hold */
70 unsigned bits; /* local strm->bits */
71 code const FAR *lcode; /* local strm->lencode */
72 code const FAR *dcode; /* local strm->distcode */
73 unsigned lmask; /* mask for first level of length codes */
74 unsigned dmask; /* mask for first level of distance codes */
75 code here; /* retrieved table entry */
76 unsigned op; /* code bits, operation, extra bits, or */
77 /* window position, window bytes to copy */
78 unsigned len; /* match length, unused bytes */
79 unsigned dist; /* match distance */
80 unsigned char FAR *from; /* where to copy match from */
81
82 /* copy state to local variables */
83 state = (struct inflate_state FAR *)strm->state;
84 in = strm->next_in;
85 last = in + (strm->avail_in - 5);
86 out = strm->next_out;
87 beg = out - (start - strm->avail_out);
88 end = out + (strm->avail_out - 257);
89 limit = out + strm->avail_out;
90 #ifdef INFLATE_STRICT
91 dmax = state->dmax;
92 #endif
93 wsize = state->wsize;
94 whave = state->whave;
95 wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
96 window = state->window;
97 hold = state->hold;
98 bits = state->bits;
99 lcode = state->lencode;
100 dcode = state->distcode;
101 lmask = (1U << state->lenbits) - 1;
102 dmask = (1U << state->distbits) - 1;
103
104 /* decode literals and length/distances until end-of-block or not enough
105 input data or output space */
106 do {
107 if (bits < 15) {
108 hold += (unsigned long)(*in++) << bits;
109 bits += 8;
110 hold += (unsigned long)(*in++) << bits;
111 bits += 8;
112 }
113 here = lcode[hold & lmask];
114 dolen:
115 op = (unsigned)(here.bits);
116 hold >>= op;
117 bits -= op;
118 op = (unsigned)(here.op);
119 if (op == 0) { /* literal */
120 Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
121 "inflate: literal '%c'\n" :
122 "inflate: literal 0x%02x\n", here.val));
123 *out++ = (unsigned char)(here.val);
124 }
125 else if (op & 16) { /* length base */
126 len = (unsigned)(here.val);
127 op &= 15; /* number of extra bits */
128 if (op) {
129 if (bits < op) {
130 hold += (unsigned long)(*in++) << bits;
131 bits += 8;
132 }
133 len += (unsigned)hold & ((1U << op) - 1);
134 hold >>= op;
135 bits -= op;
136 }
137 Tracevv((stderr, "inflate: length %u\n", len));
138 if (bits < 15) {
139 hold += (unsigned long)(*in++) << bits;
140 bits += 8;
141 hold += (unsigned long)(*in++) << bits;
142 bits += 8;
143 }
144 here = dcode[hold & dmask];
145 dodist:
146 op = (unsigned)(here.bits);
147 hold >>= op;
148 bits -= op;
149 op = (unsigned)(here.op);
150 if (op & 16) { /* distance base */
151 dist = (unsigned)(here.val);
152 op &= 15; /* number of extra bits */
153 if (bits < op) {
154 hold += (unsigned long)(*in++) << bits;
155 bits += 8;
156 if (bits < op) {
157 hold += (unsigned long)(*in++) << bits;
158 bits += 8;
159 }
160 }
161 dist += (unsigned)hold & ((1U << op) - 1);
162 #ifdef INFLATE_STRICT
163 if (dist > dmax) {
164 strm->msg = (char *)"invalid distance too far back";
165 state->mode = BAD;
166 break;
167 }
168 #endif
169 hold >>= op;
170 bits -= op;
171 Tracevv((stderr, "inflate: distance %u\n", dist));
172 op = (unsigned)(out - beg); /* max distance in output */
173 if (dist > op) { /* see if copy from window */
174 op = dist - op; /* distance back in window */
175 if (op > whave) {
176 if (state->sane) {
177 strm->msg =
178 (char *)"invalid distance too far back";
179 state->mode = BAD;
180 break;
181 }
182 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
183 if (len <= op - whave) {
184 do {
185 *out++ = 0;
186 } while (--len);
187 continue;
188 }
189 len -= op - whave;
190 do {
191 *out++ = 0;
192 } while (--op > whave);
193 if (op == 0) {
194 from = out - dist;
195 do {
196 *out++ = *from++;
197 } while (--len);
198 continue;
199 }
200 #endif
201 }
202 from = window;
203 if (wnext >= op) { /* contiguous in window */
204 from += wnext - op;
205 }
206 else { /* wrap around window */
207 op -= wnext;
208 from += wsize - op;
209 if (op < len) { /* some from end of window */
210 len -= op;
211 out = chunkcopy_safe(out, from, op, limit);
212 from = window; /* more from start of window */
213 op = wnext;
214 /* This (rare) case can create a situation where
215 the first chunkcopy below must be checked.
216 */
217 }
218 }
219 if (op < len) { /* still need some from output * /
220 out = chunkcopy_safe(out, from, op, limit);
221 len -= op;
222 /* When dist is small the amount of data that can be
223 copied from the window is also small, and progress
224 towards the dangerous end of the output buffer is
225 also small. This means that for trivial memsets and
226 for chunkunroll_relaxed() a safety check is
227 unnecessary. However, these conditions may not be
228 entered at all, and in that case it's possible that
229 the main copy is near the end.
230 */
231 out = chunkunroll_relaxed(out, &dist, &len);
232 out = chunkcopy_safe(out, out - dist, len, limit);
233 } else {
234 /* from points to window, so there is no risk of
235 overlapping pointers requiring memset-like behaviour
236 */
237 out = chunkcopy_safe(out, from, len, limit);
238 }
239 }
240 else {
241 /* Whole reference is in range of current output. No
242 range checks are necessary because we start with room
243 for at least 258 bytes of output, so unroll and roundoff
244 operations can write beyond `out+len` so long as they
245 stay within 258 bytes of `out`.
246 */
247 out = chunkcopy_lapped_relaxed(out, dist, len);
248 }
249 }
250 else if ((op & 64) == 0) { /* 2nd level distance code */
251 here = dcode[here.val + (hold & ((1U << op) - 1))];
252 goto dodist;
253 }
254 else {
255 strm->msg = (char *)"invalid distance code";
256 state->mode = BAD;
257 break;
258 }
259 }
260 else if ((op & 64) == 0) { /* 2nd level length code */
261 here = lcode[here.val + (hold & ((1U << op) - 1))];
262 goto dolen;
263 }
264 else if (op & 32) { /* end-of-block */
265 Tracevv((stderr, "inflate: end of block\n"));
266 state->mode = TYPE;
267 break;
268 }
269 else {
270 strm->msg = (char *)"invalid literal/length code";
271 state->mode = BAD;
272 break;
273 }
274 } while (in < last && out < end);
275
276 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
277 len = bits >> 3;
278 in -= len;
279 bits -= len << 3;
280 hold &= (1U << bits) - 1;
281
282 /* update state and return */
283 strm->next_in = in;
284 strm->next_out = out;
285 strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
286 strm->avail_out = (unsigned)(out < end ?
287 257 + (end - out) : 257 - (out - end));
288 state->hold = hold;
289 state->bits = bits;
290 return;
291 }
292
293 /*
294 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
295 - Using bit fields for code structure
296 - Different op definition to avoid & for extra bits (do & for table bits)
297 - Three separate decoding do-loops for direct, window, and wnext == 0
298 - Special case for distance > 1 copies to do overlapped load and store copy
299 - Explicit branch predictions (based on measured branch probabilities)
300 - Deferring match copy and interspersed it with decoding subsequent codes
301 - Swapping literal/length else
302 - Swapping window/direct else
303 - Larger unrolled copy loops (three is about right)
304 - Moving len -= 3 statement into middle of loop
305 */
306
307 #endif /* !ASMINF */
OLDNEW
« no previous file with comments | « third_party/zlib/contrib/arm/chunkcopy.h ('k') | third_party/zlib/contrib/arm/inflate.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698