inffas86.c 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /* inffas86.c is a hand tuned assembler version of
  2. *
  3. * inffast.c -- fast decoding
  4. * Copyright (C) 1995-2003 Mark Adler
  5. * For conditions of distribution and use, see copyright notice in zlib.h
  6. *
  7. * Copyright (C) 2003 Chris Anderson <christop@charm.net>
  8. * Please use the copyright conditions above.
  9. *
  10. * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
  11. * slightly quicker on x86 systems because, instead of using rep movsb to copy
  12. * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
  13. * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
  14. * from http://fedora.linux.duke.edu/fc1_x86_64
  15. * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
  16. * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
  17. * when decompressing mozilla-source-1.3.tar.gz.
  18. *
  19. * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
  20. * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
  21. * the moment. I have successfully compiled and tested this code with gcc2.96,
  22. * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
  23. * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
  24. * enabled. I will attempt to merge the MMX code into this version. Newer
  25. * versions of this and inffast.S can be found at
  26. * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
  27. */
  28. #include "zutil.h"
  29. #include "inftrees.h"
  30. #include "inflate.h"
  31. #include "inffast.h"
  32. /* Mark Adler's comments from inffast.c: */
  33. /*
  34. Decode literal, length, and distance codes and write out the resulting
  35. literal and match bytes until either not enough input or output is
  36. available, an end-of-block is encountered, or a data error is encountered.
  37. When large enough input and output buffers are supplied to inflate(), for
  38. example, a 16K input buffer and a 64K output buffer, more than 95% of the
  39. inflate execution time is spent in this routine.
  40. Entry assumptions:
  41. state->mode == LEN
  42. strm->avail_in >= 6
  43. strm->avail_out >= 258
  44. start >= strm->avail_out
  45. state->bits < 8
  46. On return, state->mode is one of:
  47. LEN -- ran out of enough output space or enough available input
  48. TYPE -- reached end of block code, inflate() to interpret next block
  49. BAD -- error in block data
  50. Notes:
  51. - The maximum input bits used by a length/distance pair is 15 bits for the
  52. length code, 5 bits for the length extra, 15 bits for the distance code,
  53. and 13 bits for the distance extra. This totals 48 bits, or six bytes.
  54. Therefore if strm->avail_in >= 6, then there is enough input to avoid
  55. checking for available input while decoding.
  56. - The maximum bytes that a single length/distance pair can output is 258
  57. bytes, which is the maximum length that can be coded. inflate_fast()
  58. requires strm->avail_out >= 258 for each loop to avoid checking for
  59. output space.
  60. */
  61. void inflate_fast(strm, start)
  62. z_streamp strm;
  63. unsigned start; /* inflate()'s starting value for strm->avail_out */
  64. {
  65. struct inflate_state FAR *state;
  66. struct inffast_ar {
  67. /* 64 32 x86 x86_64 */
  68. /* ar offset register */
  69. /* 0 0 */ void *esp; /* esp save */
  70. /* 8 4 */ void *ebp; /* ebp save */
  71. /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
  72. /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
  73. /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
  74. /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
  75. /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
  76. /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
  77. /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
  78. /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
  79. /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
  80. /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
  81. /* 92 48 */ unsigned wsize; /* window size */
  82. /* 96 52 */ unsigned write; /* window write index */
  83. /*100 56 */ unsigned lmask; /* r12 mask for lcode */
  84. /*104 60 */ unsigned dmask; /* r13 mask for dcode */
  85. /*108 64 */ unsigned len; /* r14 match length */
  86. /*112 68 */ unsigned dist; /* r15 match distance */
  87. /*116 72 */ unsigned status; /* set when state chng*/
  88. } ar;
  89. #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
  90. #define PAD_AVAIL_IN 6
  91. #define PAD_AVAIL_OUT 258
  92. #else
  93. #define PAD_AVAIL_IN 5
  94. #define PAD_AVAIL_OUT 257
  95. #endif
  96. /* copy state to local variables */
  97. state = (struct inflate_state FAR *)strm->state;
  98. ar.in = strm->next_in;
  99. ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
  100. ar.out = strm->next_out;
  101. ar.beg = ar.out - (start - strm->avail_out);
  102. ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
  103. ar.wsize = state->wsize;
  104. ar.write = state->wnext;
  105. ar.window = state->window;
  106. ar.hold = state->hold;
  107. ar.bits = state->bits;
  108. ar.lcode = state->lencode;
  109. ar.dcode = state->distcode;
  110. ar.lmask = (1U << state->lenbits) - 1;
  111. ar.dmask = (1U << state->distbits) - 1;
  112. /* decode literals and length/distances until end-of-block or not enough
  113. input data or output space */
  114. /* align in on 1/2 hold size boundary */
  115. while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
  116. ar.hold += (unsigned long)*ar.in++ << ar.bits;
  117. ar.bits += 8;
  118. }
  119. #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
  120. __asm__ __volatile__ (
  121. " leaq %0, %%rax\n"
  122. " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
  123. " movq %%rsp, (%%rax)\n"
  124. " movq %%rax, %%rsp\n" /* make rsp point to &ar */
  125. " movq 16(%%rsp), %%rsi\n" /* rsi = in */
  126. " movq 32(%%rsp), %%rdi\n" /* rdi = out */
  127. " movq 24(%%rsp), %%r9\n" /* r9 = last */
  128. " movq 48(%%rsp), %%r10\n" /* r10 = end */
  129. " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
  130. " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
  131. " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
  132. " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
  133. " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
  134. " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
  135. /* r14d = len */
  136. /* r15d = dist */
  137. " cld\n"
  138. " cmpq %%rdi, %%r10\n"
  139. " je .L_one_time\n" /* if only one decode left */
  140. " cmpq %%rsi, %%r9\n"
  141. " je .L_one_time\n"
  142. " jmp .L_do_loop\n"
  143. ".L_one_time:\n"
  144. " movq %%r12, %%r8\n" /* r8 = lmask */
  145. " cmpb $32, %%bl\n"
  146. " ja .L_get_length_code_one_time\n"
  147. " lodsl\n" /* eax = *(uint *)in++ */
  148. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  149. " addb $32, %%bl\n" /* bits += 32 */
  150. " shlq %%cl, %%rax\n"
  151. " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
  152. " jmp .L_get_length_code_one_time\n"
  153. ".align 32,0x90\n"
  154. ".L_while_test:\n"
  155. " cmpq %%rdi, %%r10\n"
  156. " jbe .L_break_loop\n"
  157. " cmpq %%rsi, %%r9\n"
  158. " jbe .L_break_loop\n"
  159. ".L_do_loop:\n"
  160. " movq %%r12, %%r8\n" /* r8 = lmask */
  161. " cmpb $32, %%bl\n"
  162. " ja .L_get_length_code\n" /* if (32 < bits) */
  163. " lodsl\n" /* eax = *(uint *)in++ */
  164. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  165. " addb $32, %%bl\n" /* bits += 32 */
  166. " shlq %%cl, %%rax\n"
  167. " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
  168. ".L_get_length_code:\n"
  169. " andq %%rdx, %%r8\n" /* r8 &= hold */
  170. " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
  171. " movb %%ah, %%cl\n" /* cl = this.bits */
  172. " subb %%ah, %%bl\n" /* bits -= this.bits */
  173. " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
  174. " testb %%al, %%al\n"
  175. " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
  176. " movq %%r12, %%r8\n" /* r8 = lmask */
  177. " shrl $16, %%eax\n" /* output this.val char */
  178. " stosb\n"
  179. ".L_get_length_code_one_time:\n"
  180. " andq %%rdx, %%r8\n" /* r8 &= hold */
  181. " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
  182. ".L_dolen:\n"
  183. " movb %%ah, %%cl\n" /* cl = this.bits */
  184. " subb %%ah, %%bl\n" /* bits -= this.bits */
  185. " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
  186. " testb %%al, %%al\n"
  187. " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
  188. " shrl $16, %%eax\n" /* output this.val char */
  189. " stosb\n"
  190. " jmp .L_while_test\n"
  191. ".align 32,0x90\n"
  192. ".L_test_for_length_base:\n"
  193. " movl %%eax, %%r14d\n" /* len = this */
  194. " shrl $16, %%r14d\n" /* len = this.val */
  195. " movb %%al, %%cl\n"
  196. " testb $16, %%al\n"
  197. " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
  198. " andb $15, %%cl\n" /* op &= 15 */
  199. " jz .L_decode_distance\n" /* if (!op) */
  200. ".L_add_bits_to_len:\n"
  201. " subb %%cl, %%bl\n"
  202. " xorl %%eax, %%eax\n"
  203. " incl %%eax\n"
  204. " shll %%cl, %%eax\n"
  205. " decl %%eax\n"
  206. " andl %%edx, %%eax\n" /* eax &= hold */
  207. " shrq %%cl, %%rdx\n"
  208. " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
  209. ".L_decode_distance:\n"
  210. " movq %%r13, %%r8\n" /* r8 = dmask */
  211. " cmpb $32, %%bl\n"
  212. " ja .L_get_distance_code\n" /* if (32 < bits) */
  213. " lodsl\n" /* eax = *(uint *)in++ */
  214. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  215. " addb $32, %%bl\n" /* bits += 32 */
  216. " shlq %%cl, %%rax\n"
  217. " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
  218. ".L_get_distance_code:\n"
  219. " andq %%rdx, %%r8\n" /* r8 &= hold */
  220. " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
  221. ".L_dodist:\n"
  222. " movl %%eax, %%r15d\n" /* dist = this */
  223. " shrl $16, %%r15d\n" /* dist = this.val */
  224. " movb %%ah, %%cl\n"
  225. " subb %%ah, %%bl\n" /* bits -= this.bits */
  226. " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
  227. " movb %%al, %%cl\n" /* cl = this.op */
  228. " testb $16, %%al\n" /* if ((op & 16) == 0) */
  229. " jz .L_test_for_second_level_dist\n"
  230. " andb $15, %%cl\n" /* op &= 15 */
  231. " jz .L_check_dist_one\n"
  232. ".L_add_bits_to_dist:\n"
  233. " subb %%cl, %%bl\n"
  234. " xorl %%eax, %%eax\n"
  235. " incl %%eax\n"
  236. " shll %%cl, %%eax\n"
  237. " decl %%eax\n" /* (1 << op) - 1 */
  238. " andl %%edx, %%eax\n" /* eax &= hold */
  239. " shrq %%cl, %%rdx\n"
  240. " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
  241. ".L_check_window:\n"
  242. " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
  243. " movq %%rdi, %%rax\n"
  244. " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
  245. " cmpl %%r15d, %%eax\n"
  246. " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
  247. " movl %%r14d, %%ecx\n" /* ecx = len */
  248. " movq %%rdi, %%rsi\n"
  249. " subq %%r15, %%rsi\n" /* from = out - dist */
  250. " sarl %%ecx\n"
  251. " jnc .L_copy_two\n" /* if len % 2 == 0 */
  252. " rep movsw\n"
  253. " movb (%%rsi), %%al\n"
  254. " movb %%al, (%%rdi)\n"
  255. " incq %%rdi\n"
  256. " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
  257. " jmp .L_while_test\n"
  258. ".L_copy_two:\n"
  259. " rep movsw\n"
  260. " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
  261. " jmp .L_while_test\n"
  262. ".align 32,0x90\n"
  263. ".L_check_dist_one:\n"
  264. " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
  265. " jne .L_check_window\n"
  266. " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
  267. " je .L_check_window\n"
  268. " movl %%r14d, %%ecx\n" /* ecx = len */
  269. " movb -1(%%rdi), %%al\n"
  270. " movb %%al, %%ah\n"
  271. " sarl %%ecx\n"
  272. " jnc .L_set_two\n"
  273. " movb %%al, (%%rdi)\n"
  274. " incq %%rdi\n"
  275. ".L_set_two:\n"
  276. " rep stosw\n"
  277. " jmp .L_while_test\n"
  278. ".align 32,0x90\n"
  279. ".L_test_for_second_level_length:\n"
  280. " testb $64, %%al\n"
  281. " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
  282. " xorl %%eax, %%eax\n"
  283. " incl %%eax\n"
  284. " shll %%cl, %%eax\n"
  285. " decl %%eax\n"
  286. " andl %%edx, %%eax\n" /* eax &= hold */
  287. " addl %%r14d, %%eax\n" /* eax += len */
  288. " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
  289. " jmp .L_dolen\n"
  290. ".align 32,0x90\n"
  291. ".L_test_for_second_level_dist:\n"
  292. " testb $64, %%al\n"
  293. " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
  294. " xorl %%eax, %%eax\n"
  295. " incl %%eax\n"
  296. " shll %%cl, %%eax\n"
  297. " decl %%eax\n"
  298. " andl %%edx, %%eax\n" /* eax &= hold */
  299. " addl %%r15d, %%eax\n" /* eax += dist */
  300. " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
  301. " jmp .L_dodist\n"
  302. ".align 32,0x90\n"
  303. ".L_clip_window:\n"
  304. " movl %%eax, %%ecx\n" /* ecx = nbytes */
  305. " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
  306. " negl %%ecx\n" /* nbytes = -nbytes */
  307. " cmpl %%r15d, %%eax\n"
  308. " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
  309. " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
  310. " cmpl $0, 96(%%rsp)\n"
  311. " jne .L_wrap_around_window\n" /* if (write != 0) */
  312. " movq 56(%%rsp), %%rsi\n" /* from = window */
  313. " subl %%ecx, %%eax\n" /* eax -= nbytes */
  314. " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
  315. " movl %%r14d, %%eax\n" /* eax = len */
  316. " cmpl %%ecx, %%r14d\n"
  317. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  318. " subl %%ecx, %%eax\n" /* eax -= nbytes */
  319. " rep movsb\n"
  320. " movq %%rdi, %%rsi\n"
  321. " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
  322. " jmp .L_do_copy\n"
  323. ".align 32,0x90\n"
  324. ".L_wrap_around_window:\n"
  325. " movl 96(%%rsp), %%eax\n" /* eax = write */
  326. " cmpl %%eax, %%ecx\n"
  327. " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
  328. " movl 92(%%rsp), %%esi\n" /* from = wsize */
  329. " addq 56(%%rsp), %%rsi\n" /* from += window */
  330. " addq %%rax, %%rsi\n" /* from += write */
  331. " subq %%rcx, %%rsi\n" /* from -= nbytes */
  332. " subl %%eax, %%ecx\n" /* nbytes -= write */
  333. " movl %%r14d, %%eax\n" /* eax = len */
  334. " cmpl %%ecx, %%eax\n"
  335. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  336. " subl %%ecx, %%eax\n" /* len -= nbytes */
  337. " rep movsb\n"
  338. " movq 56(%%rsp), %%rsi\n" /* from = window */
  339. " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
  340. " cmpl %%ecx, %%eax\n"
  341. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  342. " subl %%ecx, %%eax\n" /* len -= nbytes */
  343. " rep movsb\n"
  344. " movq %%rdi, %%rsi\n"
  345. " subq %%r15, %%rsi\n" /* from = out - dist */
  346. " jmp .L_do_copy\n"
  347. ".align 32,0x90\n"
  348. ".L_contiguous_in_window:\n"
  349. " movq 56(%%rsp), %%rsi\n" /* rsi = window */
  350. " addq %%rax, %%rsi\n"
  351. " subq %%rcx, %%rsi\n" /* from += write - nbytes */
  352. " movl %%r14d, %%eax\n" /* eax = len */
  353. " cmpl %%ecx, %%eax\n"
  354. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  355. " subl %%ecx, %%eax\n" /* len -= nbytes */
  356. " rep movsb\n"
  357. " movq %%rdi, %%rsi\n"
  358. " subq %%r15, %%rsi\n" /* from = out - dist */
  359. " jmp .L_do_copy\n" /* if (nbytes >= len) */
  360. ".align 32,0x90\n"
  361. ".L_do_copy:\n"
  362. " movl %%eax, %%ecx\n" /* ecx = len */
  363. " rep movsb\n"
  364. " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
  365. " jmp .L_while_test\n"
  366. ".L_test_for_end_of_block:\n"
  367. " testb $32, %%al\n"
  368. " jz .L_invalid_literal_length_code\n"
  369. " movl $1, 116(%%rsp)\n"
  370. " jmp .L_break_loop_with_status\n"
  371. ".L_invalid_literal_length_code:\n"
  372. " movl $2, 116(%%rsp)\n"
  373. " jmp .L_break_loop_with_status\n"
  374. ".L_invalid_distance_code:\n"
  375. " movl $3, 116(%%rsp)\n"
  376. " jmp .L_break_loop_with_status\n"
  377. ".L_invalid_distance_too_far:\n"
  378. " movl $4, 116(%%rsp)\n"
  379. " jmp .L_break_loop_with_status\n"
  380. ".L_break_loop:\n"
  381. " movl $0, 116(%%rsp)\n"
  382. ".L_break_loop_with_status:\n"
  383. /* put in, out, bits, and hold back into ar and pop esp */
  384. " movq %%rsi, 16(%%rsp)\n" /* in */
  385. " movq %%rdi, 32(%%rsp)\n" /* out */
  386. " movl %%ebx, 88(%%rsp)\n" /* bits */
  387. " movq %%rdx, 80(%%rsp)\n" /* hold */
  388. " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
  389. " movq 8(%%rsp), %%rbp\n"
  390. " movq %%rax, %%rsp\n"
  391. :
  392. : "m" (ar)
  393. : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
  394. "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
  395. );
  396. #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
  397. __asm__ __volatile__ (
  398. " leal %0, %%eax\n"
  399. " movl %%esp, (%%eax)\n" /* save esp, ebp */
  400. " movl %%ebp, 4(%%eax)\n"
  401. " movl %%eax, %%esp\n"
  402. " movl 8(%%esp), %%esi\n" /* esi = in */
  403. " movl 16(%%esp), %%edi\n" /* edi = out */
  404. " movl 40(%%esp), %%edx\n" /* edx = hold */
  405. " movl 44(%%esp), %%ebx\n" /* ebx = bits */
  406. " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
  407. " cld\n"
  408. " jmp .L_do_loop\n"
  409. ".align 32,0x90\n"
  410. ".L_while_test:\n"
  411. " cmpl %%edi, 24(%%esp)\n" /* out < end */
  412. " jbe .L_break_loop\n"
  413. " cmpl %%esi, 12(%%esp)\n" /* in < last */
  414. " jbe .L_break_loop\n"
  415. ".L_do_loop:\n"
  416. " cmpb $15, %%bl\n"
  417. " ja .L_get_length_code\n" /* if (15 < bits) */
  418. " xorl %%eax, %%eax\n"
  419. " lodsw\n" /* al = *(ushort *)in++ */
  420. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  421. " addb $16, %%bl\n" /* bits += 16 */
  422. " shll %%cl, %%eax\n"
  423. " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
  424. ".L_get_length_code:\n"
  425. " movl 56(%%esp), %%eax\n" /* eax = lmask */
  426. " andl %%edx, %%eax\n" /* eax &= hold */
  427. " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
  428. ".L_dolen:\n"
  429. " movb %%ah, %%cl\n" /* cl = this.bits */
  430. " subb %%ah, %%bl\n" /* bits -= this.bits */
  431. " shrl %%cl, %%edx\n" /* hold >>= this.bits */
  432. " testb %%al, %%al\n"
  433. " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
  434. " shrl $16, %%eax\n" /* output this.val char */
  435. " stosb\n"
  436. " jmp .L_while_test\n"
  437. ".align 32,0x90\n"
  438. ".L_test_for_length_base:\n"
  439. " movl %%eax, %%ecx\n" /* len = this */
  440. " shrl $16, %%ecx\n" /* len = this.val */
  441. " movl %%ecx, 64(%%esp)\n" /* save len */
  442. " movb %%al, %%cl\n"
  443. " testb $16, %%al\n"
  444. " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
  445. " andb $15, %%cl\n" /* op &= 15 */
  446. " jz .L_decode_distance\n" /* if (!op) */
  447. " cmpb %%cl, %%bl\n"
  448. " jae .L_add_bits_to_len\n" /* if (op <= bits) */
  449. " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
  450. " xorl %%eax, %%eax\n"
  451. " lodsw\n" /* al = *(ushort *)in++ */
  452. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  453. " addb $16, %%bl\n" /* bits += 16 */
  454. " shll %%cl, %%eax\n"
  455. " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
  456. " movb %%ch, %%cl\n" /* move op back to ecx */
  457. ".L_add_bits_to_len:\n"
  458. " subb %%cl, %%bl\n"
  459. " xorl %%eax, %%eax\n"
  460. " incl %%eax\n"
  461. " shll %%cl, %%eax\n"
  462. " decl %%eax\n"
  463. " andl %%edx, %%eax\n" /* eax &= hold */
  464. " shrl %%cl, %%edx\n"
  465. " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
  466. ".L_decode_distance:\n"
  467. " cmpb $15, %%bl\n"
  468. " ja .L_get_distance_code\n" /* if (15 < bits) */
  469. " xorl %%eax, %%eax\n"
  470. " lodsw\n" /* al = *(ushort *)in++ */
  471. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  472. " addb $16, %%bl\n" /* bits += 16 */
  473. " shll %%cl, %%eax\n"
  474. " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
  475. ".L_get_distance_code:\n"
  476. " movl 60(%%esp), %%eax\n" /* eax = dmask */
  477. " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
  478. " andl %%edx, %%eax\n" /* eax &= hold */
  479. " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
  480. ".L_dodist:\n"
  481. " movl %%eax, %%ebp\n" /* dist = this */
  482. " shrl $16, %%ebp\n" /* dist = this.val */
  483. " movb %%ah, %%cl\n"
  484. " subb %%ah, %%bl\n" /* bits -= this.bits */
  485. " shrl %%cl, %%edx\n" /* hold >>= this.bits */
  486. " movb %%al, %%cl\n" /* cl = this.op */
  487. " testb $16, %%al\n" /* if ((op & 16) == 0) */
  488. " jz .L_test_for_second_level_dist\n"
  489. " andb $15, %%cl\n" /* op &= 15 */
  490. " jz .L_check_dist_one\n"
  491. " cmpb %%cl, %%bl\n"
  492. " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
  493. " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
  494. " xorl %%eax, %%eax\n"
  495. " lodsw\n" /* al = *(ushort *)in++ */
  496. " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
  497. " addb $16, %%bl\n" /* bits += 16 */
  498. " shll %%cl, %%eax\n"
  499. " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
  500. " movb %%ch, %%cl\n" /* move op back to ecx */
  501. ".L_add_bits_to_dist:\n"
  502. " subb %%cl, %%bl\n"
  503. " xorl %%eax, %%eax\n"
  504. " incl %%eax\n"
  505. " shll %%cl, %%eax\n"
  506. " decl %%eax\n" /* (1 << op) - 1 */
  507. " andl %%edx, %%eax\n" /* eax &= hold */
  508. " shrl %%cl, %%edx\n"
  509. " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
  510. ".L_check_window:\n"
  511. " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
  512. " movl %%edi, %%eax\n"
  513. " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
  514. " cmpl %%ebp, %%eax\n"
  515. " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
  516. " movl 64(%%esp), %%ecx\n" /* ecx = len */
  517. " movl %%edi, %%esi\n"
  518. " subl %%ebp, %%esi\n" /* from = out - dist */
  519. " sarl %%ecx\n"
  520. " jnc .L_copy_two\n" /* if len % 2 == 0 */
  521. " rep movsw\n"
  522. " movb (%%esi), %%al\n"
  523. " movb %%al, (%%edi)\n"
  524. " incl %%edi\n"
  525. " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
  526. " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
  527. " jmp .L_while_test\n"
  528. ".L_copy_two:\n"
  529. " rep movsw\n"
  530. " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
  531. " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
  532. " jmp .L_while_test\n"
  533. ".align 32,0x90\n"
  534. ".L_check_dist_one:\n"
  535. " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
  536. " jne .L_check_window\n"
  537. " cmpl %%edi, 20(%%esp)\n"
  538. " je .L_check_window\n" /* out == beg, if outside window */
  539. " movl 64(%%esp), %%ecx\n" /* ecx = len */
  540. " movb -1(%%edi), %%al\n"
  541. " movb %%al, %%ah\n"
  542. " sarl %%ecx\n"
  543. " jnc .L_set_two\n"
  544. " movb %%al, (%%edi)\n"
  545. " incl %%edi\n"
  546. ".L_set_two:\n"
  547. " rep stosw\n"
  548. " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
  549. " jmp .L_while_test\n"
  550. ".align 32,0x90\n"
  551. ".L_test_for_second_level_length:\n"
  552. " testb $64, %%al\n"
  553. " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
  554. " xorl %%eax, %%eax\n"
  555. " incl %%eax\n"
  556. " shll %%cl, %%eax\n"
  557. " decl %%eax\n"
  558. " andl %%edx, %%eax\n" /* eax &= hold */
  559. " addl 64(%%esp), %%eax\n" /* eax += len */
  560. " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
  561. " jmp .L_dolen\n"
  562. ".align 32,0x90\n"
  563. ".L_test_for_second_level_dist:\n"
  564. " testb $64, %%al\n"
  565. " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
  566. " xorl %%eax, %%eax\n"
  567. " incl %%eax\n"
  568. " shll %%cl, %%eax\n"
  569. " decl %%eax\n"
  570. " andl %%edx, %%eax\n" /* eax &= hold */
  571. " addl %%ebp, %%eax\n" /* eax += dist */
  572. " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
  573. " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
  574. " jmp .L_dodist\n"
  575. ".align 32,0x90\n"
  576. ".L_clip_window:\n"
  577. " movl %%eax, %%ecx\n"
  578. " movl 48(%%esp), %%eax\n" /* eax = wsize */
  579. " negl %%ecx\n" /* nbytes = -nbytes */
  580. " movl 28(%%esp), %%esi\n" /* from = window */
  581. " cmpl %%ebp, %%eax\n"
  582. " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
  583. " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
  584. " cmpl $0, 52(%%esp)\n"
  585. " jne .L_wrap_around_window\n" /* if (write != 0) */
  586. " subl %%ecx, %%eax\n"
  587. " addl %%eax, %%esi\n" /* from += wsize - nbytes */
  588. " movl 64(%%esp), %%eax\n" /* eax = len */
  589. " cmpl %%ecx, %%eax\n"
  590. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  591. " subl %%ecx, %%eax\n" /* len -= nbytes */
  592. " rep movsb\n"
  593. " movl %%edi, %%esi\n"
  594. " subl %%ebp, %%esi\n" /* from = out - dist */
  595. " jmp .L_do_copy\n"
  596. ".align 32,0x90\n"
  597. ".L_wrap_around_window:\n"
  598. " movl 52(%%esp), %%eax\n" /* eax = write */
  599. " cmpl %%eax, %%ecx\n"
  600. " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
  601. " addl 48(%%esp), %%esi\n" /* from += wsize */
  602. " addl %%eax, %%esi\n" /* from += write */
  603. " subl %%ecx, %%esi\n" /* from -= nbytes */
  604. " subl %%eax, %%ecx\n" /* nbytes -= write */
  605. " movl 64(%%esp), %%eax\n" /* eax = len */
  606. " cmpl %%ecx, %%eax\n"
  607. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  608. " subl %%ecx, %%eax\n" /* len -= nbytes */
  609. " rep movsb\n"
  610. " movl 28(%%esp), %%esi\n" /* from = window */
  611. " movl 52(%%esp), %%ecx\n" /* nbytes = write */
  612. " cmpl %%ecx, %%eax\n"
  613. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  614. " subl %%ecx, %%eax\n" /* len -= nbytes */
  615. " rep movsb\n"
  616. " movl %%edi, %%esi\n"
  617. " subl %%ebp, %%esi\n" /* from = out - dist */
  618. " jmp .L_do_copy\n"
  619. ".align 32,0x90\n"
  620. ".L_contiguous_in_window:\n"
  621. " addl %%eax, %%esi\n"
  622. " subl %%ecx, %%esi\n" /* from += write - nbytes */
  623. " movl 64(%%esp), %%eax\n" /* eax = len */
  624. " cmpl %%ecx, %%eax\n"
  625. " jbe .L_do_copy\n" /* if (nbytes >= len) */
  626. " subl %%ecx, %%eax\n" /* len -= nbytes */
  627. " rep movsb\n"
  628. " movl %%edi, %%esi\n"
  629. " subl %%ebp, %%esi\n" /* from = out - dist */
  630. " jmp .L_do_copy\n" /* if (nbytes >= len) */
  631. ".align 32,0x90\n"
  632. ".L_do_copy:\n"
  633. " movl %%eax, %%ecx\n"
  634. " rep movsb\n"
  635. " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
  636. " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
  637. " jmp .L_while_test\n"
  638. ".L_test_for_end_of_block:\n"
  639. " testb $32, %%al\n"
  640. " jz .L_invalid_literal_length_code\n"
  641. " movl $1, 72(%%esp)\n"
  642. " jmp .L_break_loop_with_status\n"
  643. ".L_invalid_literal_length_code:\n"
  644. " movl $2, 72(%%esp)\n"
  645. " jmp .L_break_loop_with_status\n"
  646. ".L_invalid_distance_code:\n"
  647. " movl $3, 72(%%esp)\n"
  648. " jmp .L_break_loop_with_status\n"
  649. ".L_invalid_distance_too_far:\n"
  650. " movl 8(%%esp), %%esi\n"
  651. " movl $4, 72(%%esp)\n"
  652. " jmp .L_break_loop_with_status\n"
  653. ".L_break_loop:\n"
  654. " movl $0, 72(%%esp)\n"
  655. ".L_break_loop_with_status:\n"
  656. /* put in, out, bits, and hold back into ar and pop esp */
  657. " movl %%esi, 8(%%esp)\n" /* save in */
  658. " movl %%edi, 16(%%esp)\n" /* save out */
  659. " movl %%ebx, 44(%%esp)\n" /* save bits */
  660. " movl %%edx, 40(%%esp)\n" /* save hold */
  661. " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
  662. " movl (%%esp), %%esp\n"
  663. :
  664. : "m" (ar)
  665. : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
  666. );
  667. #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
  668. __asm {
  669. lea eax, ar
  670. mov [eax], esp /* save esp, ebp */
  671. mov [eax+4], ebp
  672. mov esp, eax
  673. mov esi, [esp+8] /* esi = in */
  674. mov edi, [esp+16] /* edi = out */
  675. mov edx, [esp+40] /* edx = hold */
  676. mov ebx, [esp+44] /* ebx = bits */
  677. mov ebp, [esp+32] /* ebp = lcode */
  678. cld
  679. jmp L_do_loop
  680. ALIGN 4
  681. L_while_test:
  682. cmp [esp+24], edi
  683. jbe L_break_loop
  684. cmp [esp+12], esi
  685. jbe L_break_loop
  686. L_do_loop:
  687. cmp bl, 15
  688. ja L_get_length_code /* if (15 < bits) */
  689. xor eax, eax
  690. lodsw /* al = *(ushort *)in++ */
  691. mov cl, bl /* cl = bits, needs it for shifting */
  692. add bl, 16 /* bits += 16 */
  693. shl eax, cl
  694. or edx, eax /* hold |= *((ushort *)in)++ << bits */
  695. L_get_length_code:
  696. mov eax, [esp+56] /* eax = lmask */
  697. and eax, edx /* eax &= hold */
  698. mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
  699. L_dolen:
  700. mov cl, ah /* cl = this.bits */
  701. sub bl, ah /* bits -= this.bits */
  702. shr edx, cl /* hold >>= this.bits */
  703. test al, al
  704. jnz L_test_for_length_base /* if (op != 0) 45.7% */
  705. shr eax, 16 /* output this.val char */
  706. stosb
  707. jmp L_while_test
  708. ALIGN 4
  709. L_test_for_length_base:
  710. mov ecx, eax /* len = this */
  711. shr ecx, 16 /* len = this.val */
  712. mov [esp+64], ecx /* save len */
  713. mov cl, al
  714. test al, 16
  715. jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
  716. and cl, 15 /* op &= 15 */
  717. jz L_decode_distance /* if (!op) */
  718. cmp bl, cl
  719. jae L_add_bits_to_len /* if (op <= bits) */
  720. mov ch, cl /* stash op in ch, freeing cl */
  721. xor eax, eax
  722. lodsw /* al = *(ushort *)in++ */
  723. mov cl, bl /* cl = bits, needs it for shifting */
  724. add bl, 16 /* bits += 16 */
  725. shl eax, cl
  726. or edx, eax /* hold |= *((ushort *)in)++ << bits */
  727. mov cl, ch /* move op back to ecx */
  728. L_add_bits_to_len:
  729. sub bl, cl
  730. xor eax, eax
  731. inc eax
  732. shl eax, cl
  733. dec eax
  734. and eax, edx /* eax &= hold */
  735. shr edx, cl
  736. add [esp+64], eax /* len += hold & mask[op] */
  737. L_decode_distance:
  738. cmp bl, 15
  739. ja L_get_distance_code /* if (15 < bits) */
  740. xor eax, eax
  741. lodsw /* al = *(ushort *)in++ */
  742. mov cl, bl /* cl = bits, needs it for shifting */
  743. add bl, 16 /* bits += 16 */
  744. shl eax, cl
  745. or edx, eax /* hold |= *((ushort *)in)++ << bits */
  746. L_get_distance_code:
  747. mov eax, [esp+60] /* eax = dmask */
  748. mov ecx, [esp+36] /* ecx = dcode */
  749. and eax, edx /* eax &= hold */
  750. mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
  751. L_dodist:
  752. mov ebp, eax /* dist = this */
  753. shr ebp, 16 /* dist = this.val */
  754. mov cl, ah
  755. sub bl, ah /* bits -= this.bits */
  756. shr edx, cl /* hold >>= this.bits */
  757. mov cl, al /* cl = this.op */
  758. test al, 16 /* if ((op & 16) == 0) */
  759. jz L_test_for_second_level_dist
  760. and cl, 15 /* op &= 15 */
  761. jz L_check_dist_one
  762. cmp bl, cl
  763. jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
  764. mov ch, cl /* stash op in ch, freeing cl */
  765. xor eax, eax
  766. lodsw /* al = *(ushort *)in++ */
  767. mov cl, bl /* cl = bits, needs it for shifting */
  768. add bl, 16 /* bits += 16 */
  769. shl eax, cl
  770. or edx, eax /* hold |= *((ushort *)in)++ << bits */
  771. mov cl, ch /* move op back to ecx */
  772. L_add_bits_to_dist:
  773. sub bl, cl
  774. xor eax, eax
  775. inc eax
  776. shl eax, cl
  777. dec eax /* (1 << op) - 1 */
  778. and eax, edx /* eax &= hold */
  779. shr edx, cl
  780. add ebp, eax /* dist += hold & ((1 << op) - 1) */
  781. L_check_window:
  782. mov [esp+8], esi /* save in so from can use it's reg */
  783. mov eax, edi
  784. sub eax, [esp+20] /* nbytes = out - beg */
  785. cmp eax, ebp
  786. jb L_clip_window /* if (dist > nbytes) 4.2% */
  787. mov ecx, [esp+64] /* ecx = len */
  788. mov esi, edi
  789. sub esi, ebp /* from = out - dist */
  790. sar ecx, 1
  791. jnc L_copy_two
  792. rep movsw
  793. mov al, [esi]
  794. mov [edi], al
  795. inc edi
  796. mov esi, [esp+8] /* move in back to %esi, toss from */
  797. mov ebp, [esp+32] /* ebp = lcode */
  798. jmp L_while_test
  799. L_copy_two:
  800. rep movsw
  801. mov esi, [esp+8] /* move in back to %esi, toss from */
  802. mov ebp, [esp+32] /* ebp = lcode */
  803. jmp L_while_test
  804. ALIGN 4
  805. L_check_dist_one:
  806. cmp ebp, 1 /* if dist 1, is a memset */
  807. jne L_check_window
  808. cmp [esp+20], edi
  809. je L_check_window /* out == beg, if outside window */
  810. mov ecx, [esp+64] /* ecx = len */
  811. mov al, [edi-1]
  812. mov ah, al
  813. sar ecx, 1
  814. jnc L_set_two
  815. mov [edi], al /* memset out with from[-1] */
  816. inc edi
  817. L_set_two:
  818. rep stosw
  819. mov ebp, [esp+32] /* ebp = lcode */
  820. jmp L_while_test
  821. ALIGN 4
  822. L_test_for_second_level_length:
  823. test al, 64
  824. jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
  825. xor eax, eax
  826. inc eax
  827. shl eax, cl
  828. dec eax
  829. and eax, edx /* eax &= hold */
  830. add eax, [esp+64] /* eax += len */
  831. mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
  832. jmp L_dolen
  833. ALIGN 4
  834. L_test_for_second_level_dist:
  835. test al, 64
  836. jnz L_invalid_distance_code /* if ((op & 64) != 0) */
  837. xor eax, eax
  838. inc eax
  839. shl eax, cl
  840. dec eax
  841. and eax, edx /* eax &= hold */
  842. add eax, ebp /* eax += dist */
  843. mov ecx, [esp+36] /* ecx = dcode */
  844. mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
  845. jmp L_dodist
  846. ALIGN 4
  847. L_clip_window:
  848. mov ecx, eax
  849. mov eax, [esp+48] /* eax = wsize */
  850. neg ecx /* nbytes = -nbytes */
  851. mov esi, [esp+28] /* from = window */
  852. cmp eax, ebp
  853. jb L_invalid_distance_too_far /* if (dist > wsize) */
  854. add ecx, ebp /* nbytes = dist - nbytes */
  855. cmp dword ptr [esp+52], 0
  856. jne L_wrap_around_window /* if (write != 0) */
  857. sub eax, ecx
  858. add esi, eax /* from += wsize - nbytes */
  859. mov eax, [esp+64] /* eax = len */
  860. cmp eax, ecx
  861. jbe L_do_copy /* if (nbytes >= len) */
  862. sub eax, ecx /* len -= nbytes */
  863. rep movsb
  864. mov esi, edi
  865. sub esi, ebp /* from = out - dist */
  866. jmp L_do_copy
  867. ALIGN 4
  868. L_wrap_around_window:
  869. mov eax, [esp+52] /* eax = write */
  870. cmp ecx, eax
  871. jbe L_contiguous_in_window /* if (write >= nbytes) */
  872. add esi, [esp+48] /* from += wsize */
  873. add esi, eax /* from += write */
  874. sub esi, ecx /* from -= nbytes */
  875. sub ecx, eax /* nbytes -= write */
  876. mov eax, [esp+64] /* eax = len */
  877. cmp eax, ecx
  878. jbe L_do_copy /* if (nbytes >= len) */
  879. sub eax, ecx /* len -= nbytes */
  880. rep movsb
  881. mov esi, [esp+28] /* from = window */
  882. mov ecx, [esp+52] /* nbytes = write */
  883. cmp eax, ecx
  884. jbe L_do_copy /* if (nbytes >= len) */
  885. sub eax, ecx /* len -= nbytes */
  886. rep movsb
  887. mov esi, edi
  888. sub esi, ebp /* from = out - dist */
  889. jmp L_do_copy
  890. ALIGN 4
  891. L_contiguous_in_window:
  892. add esi, eax
  893. sub esi, ecx /* from += write - nbytes */
  894. mov eax, [esp+64] /* eax = len */
  895. cmp eax, ecx
  896. jbe L_do_copy /* if (nbytes >= len) */
  897. sub eax, ecx /* len -= nbytes */
  898. rep movsb
  899. mov esi, edi
  900. sub esi, ebp /* from = out - dist */
  901. jmp L_do_copy
  902. ALIGN 4
  903. L_do_copy:
  904. mov ecx, eax
  905. rep movsb
  906. mov esi, [esp+8] /* move in back to %esi, toss from */
  907. mov ebp, [esp+32] /* ebp = lcode */
  908. jmp L_while_test
  909. L_test_for_end_of_block:
  910. test al, 32
  911. jz L_invalid_literal_length_code
  912. mov dword ptr [esp+72], 1
  913. jmp L_break_loop_with_status
  914. L_invalid_literal_length_code:
  915. mov dword ptr [esp+72], 2
  916. jmp L_break_loop_with_status
  917. L_invalid_distance_code:
  918. mov dword ptr [esp+72], 3
  919. jmp L_break_loop_with_status
  920. L_invalid_distance_too_far:
  921. mov esi, [esp+4]
  922. mov dword ptr [esp+72], 4
  923. jmp L_break_loop_with_status
  924. L_break_loop:
  925. mov dword ptr [esp+72], 0
  926. L_break_loop_with_status:
  927. /* put in, out, bits, and hold back into ar and pop esp */
  928. mov [esp+8], esi /* save in */
  929. mov [esp+16], edi /* save out */
  930. mov [esp+44], ebx /* save bits */
  931. mov [esp+40], edx /* save hold */
  932. mov ebp, [esp+4] /* restore esp, ebp */
  933. mov esp, [esp]
  934. }
  935. #else
  936. #error "x86 architecture not defined"
  937. #endif
  938. if (ar.status > 1) {
  939. if (ar.status == 2)
  940. strm->msg = "invalid literal/length code";
  941. else if (ar.status == 3)
  942. strm->msg = "invalid distance code";
  943. else
  944. strm->msg = "invalid distance too far back";
  945. state->mode = BAD;
  946. }
  947. else if ( ar.status == 1 ) {
  948. state->mode = TYPE;
  949. }
  950. /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
  951. ar.len = ar.bits >> 3;
  952. ar.in -= ar.len;
  953. ar.bits -= ar.len << 3;
  954. ar.hold &= (1U << ar.bits) - 1;
  955. /* update state and return */
  956. strm->next_in = ar.in;
  957. strm->next_out = ar.out;
  958. strm->avail_in = (unsigned)(ar.in < ar.last ?
  959. PAD_AVAIL_IN + (ar.last - ar.in) :
  960. PAD_AVAIL_IN - (ar.in - ar.last));
  961. strm->avail_out = (unsigned)(ar.out < ar.end ?
  962. PAD_AVAIL_OUT + (ar.end - ar.out) :
  963. PAD_AVAIL_OUT - (ar.out - ar.end));
  964. state->hold = ar.hold;
  965. state->bits = ar.bits;
  966. return;
  967. }