inffast.S 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368
  1. /*
  2. * inffast.S is a hand tuned assembler version of:
  3. *
  4. * inffast.c -- fast decoding
  5. * Copyright (C) 1995-2003 Mark Adler
  6. * For conditions of distribution and use, see copyright notice in zlib.h
  7. *
  8. * Copyright (C) 2003 Chris Anderson <christop@charm.net>
  9. * Please use the copyright conditions above.
  10. *
  11. * This version (Jan-23-2003) of inflate_fast was coded and tested under
  12. * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
  13. * machine, I found that gzip style archives decompressed about 20% faster than
  14. * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
  15. * depend on how large of a buffer is used for z_stream.next_in & next_out
  16. * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
  17. * stream processing I/O and crc32/addler32. In my case, this routine used
  18. * 70% of the cpu time and crc32 used 20%.
  19. *
  20. * I am confident that this version will work in the general case, but I have
  21. * not tested a wide variety of datasets or a wide variety of platforms.
  22. *
  23. * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
  24. * It should be a runtime flag instead of compile time flag...
  25. *
  26. * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
  27. * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code
  28. * is compiled. Without either option, runtime detection is enabled. Runtime
  29. * detection should work on all modern cpus and the recomended algorithm (flip
  30. * ID bit on eflags and then use the cpuid instruction) is used in many
  31. * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12
  32. * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o
  33. * inffast.obj generates a COFF object which can then be linked with MSVC++
  34. * compiled code. Tested under FreeBSD 4.7 with gcc-2.95.
  35. *
  36. * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
  37. * slower than compiler generated code). Adjusted cpuid check to use the MMX
  38. * code only for Pentiums < P4 until I have more data on the P4. Speed
  39. * improvment is only about 15% on the Athlon when compared with code generated
  40. * with MSVC++. Not sure yet, but I think the P4 will also be slower using the
  41. * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
  42. * have less latency than MMX ops. Added code to buffer the last 11 bytes of
  43. * the input stream since the MMX code grabs bits in chunks of 32, which
  44. * differs from the inffast.c algorithm. I don't think there would have been
  45. * read overruns where a page boundary was crossed (a segfault), but there
  46. * could have been overruns when next_in ends on unaligned memory (unintialized
  47. * memory read).
  48. *
  49. * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C
  50. * version of the non-MMX code so that it doesn't depend on zstrm and zstate
  51. * structure offsets which are hard coded in this file. This was last tested
  52. * with zlib-1.2.0 which is currently in beta testing, newer versions of this
  53. * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
  54. * http://www.charm.net/~christop/zlib/
  55. */
  56. /*
  57. * if you have underscore linking problems (_inflate_fast undefined), try
  58. * using -DGAS_COFF
  59. */
  60. #if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
  61. #if defined( WIN32 ) || defined( __CYGWIN__ )
  62. #define GAS_COFF /* windows object format */
  63. #else
  64. #define GAS_ELF
  65. #endif
  66. #endif /* ! GAS_COFF && ! GAS_ELF */
  67. #if defined( GAS_COFF )
  68. /* coff externals have underscores */
  69. #define inflate_fast _inflate_fast
  70. #define inflate_fast_use_mmx _inflate_fast_use_mmx
  71. #endif /* GAS_COFF */
  72. .file "inffast.S"
  73. .globl inflate_fast
  74. .text
  75. .align 4,0
  76. .L_invalid_literal_length_code_msg:
  77. .string "invalid literal/length code"
  78. .align 4,0
  79. .L_invalid_distance_code_msg:
  80. .string "invalid distance code"
  81. .align 4,0
  82. .L_invalid_distance_too_far_msg:
  83. .string "invalid distance too far back"
  84. #if ! defined( NO_MMX )
  85. .align 4,0
  86. .L_mask: /* mask[N] = ( 1 << N ) - 1 */
  87. .long 0
  88. .long 1
  89. .long 3
  90. .long 7
  91. .long 15
  92. .long 31
  93. .long 63
  94. .long 127
  95. .long 255
  96. .long 511
  97. .long 1023
  98. .long 2047
  99. .long 4095
  100. .long 8191
  101. .long 16383
  102. .long 32767
  103. .long 65535
  104. .long 131071
  105. .long 262143
  106. .long 524287
  107. .long 1048575
  108. .long 2097151
  109. .long 4194303
  110. .long 8388607
  111. .long 16777215
  112. .long 33554431
  113. .long 67108863
  114. .long 134217727
  115. .long 268435455
  116. .long 536870911
  117. .long 1073741823
  118. .long 2147483647
  119. .long 4294967295
  120. #endif /* NO_MMX */
  121. .text
  122. /*
  123. * struct z_stream offsets, in zlib.h
  124. */
  125. #define next_in_strm 0 /* strm->next_in */
  126. #define avail_in_strm 4 /* strm->avail_in */
  127. #define next_out_strm 12 /* strm->next_out */
  128. #define avail_out_strm 16 /* strm->avail_out */
  129. #define msg_strm 24 /* strm->msg */
  130. #define state_strm 28 /* strm->state */
  131. /*
  132. * struct inflate_state offsets, in inflate.h
  133. */
  134. #define mode_state 0 /* state->mode */
  135. #define wsize_state 32 /* state->wsize */
  136. #define write_state 40 /* state->write */
  137. #define window_state 44 /* state->window */
  138. #define hold_state 48 /* state->hold */
  139. #define bits_state 52 /* state->bits */
  140. #define lencode_state 68 /* state->lencode */
  141. #define distcode_state 72 /* state->distcode */
  142. #define lenbits_state 76 /* state->lenbits */
  143. #define distbits_state 80 /* state->distbits */
  144. /*
  145. * inflate_fast's activation record
  146. */
  147. #define local_var_size 64 /* how much local space for vars */
  148. #define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */
  149. #define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */
  150. /*
  151. * offsets for local vars on stack
  152. */
  153. #define out 60 /* unsigned char* */
  154. #define window 56 /* unsigned char* */
  155. #define wsize 52 /* unsigned int */
  156. #define write 48 /* unsigned int */
  157. #define in 44 /* unsigned char* */
  158. #define beg 40 /* unsigned char* */
  159. #define buf 28 /* char[ 12 ] */
  160. #define len 24 /* unsigned int */
  161. #define last 20 /* unsigned char* */
  162. #define end 16 /* unsigned char* */
  163. #define dcode 12 /* code* */
  164. #define lcode 8 /* code* */
  165. #define dmask 4 /* unsigned int */
  166. #define lmask 0 /* unsigned int */
  167. /*
  168. * typedef enum inflate_mode consts, in inflate.h
  169. */
  170. #define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
  171. #define INFLATE_MODE_BAD 26
  172. #if ! defined( USE_MMX ) && ! defined( NO_MMX )
  173. #define RUN_TIME_MMX
  174. #define CHECK_MMX 1
  175. #define DO_USE_MMX 2
  176. #define DONT_USE_MMX 3
  177. .globl inflate_fast_use_mmx
  178. .data
  179. .align 4,0
  180. inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
  181. .long CHECK_MMX
  182. #if defined( GAS_ELF )
  183. /* elf info */
  184. .type inflate_fast_use_mmx,@object
  185. .size inflate_fast_use_mmx,4
  186. #endif
  187. #endif /* RUN_TIME_MMX */
  188. #if defined( GAS_COFF )
  189. /* coff info: scl 2 = extern, type 32 = function */
  190. .def inflate_fast; .scl 2; .type 32; .endef
  191. #endif
  192. .text
  193. .align 32,0x90
  194. inflate_fast:
  195. pushl %edi
  196. pushl %esi
  197. pushl %ebp
  198. pushl %ebx
  199. pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
  200. subl $local_var_size, %esp
  201. cld
  202. #define strm_r %esi
  203. #define state_r %edi
  204. movl strm_sp(%esp), strm_r
  205. movl state_strm(strm_r), state_r
  206. /* in = strm->next_in;
  207. * out = strm->next_out;
  208. * last = in + strm->avail_in - 11;
  209. * beg = out - (start - strm->avail_out);
  210. * end = out + (strm->avail_out - 257);
  211. */
  212. movl avail_in_strm(strm_r), %edx
  213. movl next_in_strm(strm_r), %eax
  214. addl %eax, %edx /* avail_in += next_in */
  215. subl $11, %edx /* avail_in -= 11 */
  216. movl %eax, in(%esp)
  217. movl %edx, last(%esp)
  218. movl start_sp(%esp), %ebp
  219. movl avail_out_strm(strm_r), %ecx
  220. movl next_out_strm(strm_r), %ebx
  221. subl %ecx, %ebp /* start -= avail_out */
  222. negl %ebp /* start = -start */
  223. addl %ebx, %ebp /* start += next_out */
  224. subl $257, %ecx /* avail_out -= 257 */
  225. addl %ebx, %ecx /* avail_out += out */
  226. movl %ebx, out(%esp)
  227. movl %ebp, beg(%esp)
  228. movl %ecx, end(%esp)
  229. /* wsize = state->wsize;
  230. * write = state->write;
  231. * window = state->window;
  232. * hold = state->hold;
  233. * bits = state->bits;
  234. * lcode = state->lencode;
  235. * dcode = state->distcode;
  236. * lmask = ( 1 << state->lenbits ) - 1;
  237. * dmask = ( 1 << state->distbits ) - 1;
  238. */
  239. movl lencode_state(state_r), %eax
  240. movl distcode_state(state_r), %ecx
  241. movl %eax, lcode(%esp)
  242. movl %ecx, dcode(%esp)
  243. movl $1, %eax
  244. movl lenbits_state(state_r), %ecx
  245. shll %cl, %eax
  246. decl %eax
  247. movl %eax, lmask(%esp)
  248. movl $1, %eax
  249. movl distbits_state(state_r), %ecx
  250. shll %cl, %eax
  251. decl %eax
  252. movl %eax, dmask(%esp)
  253. movl wsize_state(state_r), %eax
  254. movl write_state(state_r), %ecx
  255. movl window_state(state_r), %edx
  256. movl %eax, wsize(%esp)
  257. movl %ecx, write(%esp)
  258. movl %edx, window(%esp)
  259. movl hold_state(state_r), %ebp
  260. movl bits_state(state_r), %ebx
  261. #undef strm_r
  262. #undef state_r
  263. #define in_r %esi
  264. #define from_r %esi
  265. #define out_r %edi
  266. movl in(%esp), in_r
  267. movl last(%esp), %ecx
  268. cmpl in_r, %ecx
  269. ja .L_align_long /* if in < last */
  270. addl $11, %ecx /* ecx = &in[ avail_in ] */
  271. subl in_r, %ecx /* ecx = avail_in */
  272. movl $12, %eax
  273. subl %ecx, %eax /* eax = 12 - avail_in */
  274. leal buf(%esp), %edi
  275. rep movsb /* memcpy( buf, in, avail_in ) */
  276. movl %eax, %ecx
  277. xorl %eax, %eax
  278. rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
  279. leal buf(%esp), in_r /* in = buf */
  280. movl in_r, last(%esp) /* last = in, do just one iteration */
  281. jmp .L_is_aligned
  282. /* align in_r on long boundary */
  283. .L_align_long:
  284. testl $3, in_r
  285. jz .L_is_aligned
  286. xorl %eax, %eax
  287. movb (in_r), %al
  288. incl in_r
  289. movl %ebx, %ecx
  290. addl $8, %ebx
  291. shll %cl, %eax
  292. orl %eax, %ebp
  293. jmp .L_align_long
  294. .L_is_aligned:
  295. movl out(%esp), out_r
  296. #if defined( NO_MMX )
  297. jmp .L_do_loop
  298. #endif
  299. #if defined( USE_MMX )
  300. jmp .L_init_mmx
  301. #endif
  302. /*** Runtime MMX check ***/
  303. #if defined( RUN_TIME_MMX )
  304. .L_check_mmx:
  305. cmpl $DO_USE_MMX, inflate_fast_use_mmx
  306. je .L_init_mmx
  307. ja .L_do_loop /* > 2 */
  308. pushl %eax
  309. pushl %ebx
  310. pushl %ecx
  311. pushl %edx
  312. pushf
  313. movl (%esp), %eax /* copy eflags to eax */
  314. xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
  315. * to see if cpu supports cpuid...
  316. * ID bit method not supported by NexGen but
  317. * bios may load a cpuid instruction and
  318. * cpuid may be disabled on Cyrix 5-6x86 */
  319. popf
  320. pushf
  321. popl %edx /* copy new eflags to edx */
  322. xorl %eax, %edx /* test if ID bit is flipped */
  323. jz .L_dont_use_mmx /* not flipped if zero */
  324. xorl %eax, %eax
  325. cpuid
  326. cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
  327. jne .L_dont_use_mmx
  328. cmpl $0x6c65746e, %ecx
  329. jne .L_dont_use_mmx
  330. cmpl $0x49656e69, %edx
  331. jne .L_dont_use_mmx
  332. movl $1, %eax
  333. cpuid /* get cpu features */
  334. shrl $8, %eax
  335. andl $15, %eax
  336. cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */
  337. jne .L_dont_use_mmx
  338. testl $0x800000, %edx /* test if MMX feature is set (bit 23) */
  339. jnz .L_use_mmx
  340. jmp .L_dont_use_mmx
  341. .L_use_mmx:
  342. movl $DO_USE_MMX, inflate_fast_use_mmx
  343. jmp .L_check_mmx_pop
  344. .L_dont_use_mmx:
  345. movl $DONT_USE_MMX, inflate_fast_use_mmx
  346. .L_check_mmx_pop:
  347. popl %edx
  348. popl %ecx
  349. popl %ebx
  350. popl %eax
  351. jmp .L_check_mmx
  352. #endif
  353. /*** Non-MMX code ***/
  354. #if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
  355. #define hold_r %ebp
  356. #define bits_r %bl
  357. #define bitslong_r %ebx
  358. .align 32,0x90
  359. .L_while_test:
  360. /* while (in < last && out < end)
  361. */
  362. cmpl out_r, end(%esp)
  363. jbe .L_break_loop /* if (out >= end) */
  364. cmpl in_r, last(%esp)
  365. jbe .L_break_loop
  366. .L_do_loop:
  367. /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
  368. *
  369. * do {
  370. * if (bits < 15) {
  371. * hold |= *((unsigned short *)in)++ << bits;
  372. * bits += 16
  373. * }
  374. * this = lcode[hold & lmask]
  375. */
  376. cmpb $15, bits_r
  377. ja .L_get_length_code /* if (15 < bits) */
  378. xorl %eax, %eax
  379. lodsw /* al = *(ushort *)in++ */
  380. movb bits_r, %cl /* cl = bits, needs it for shifting */
  381. addb $16, bits_r /* bits += 16 */
  382. shll %cl, %eax
  383. orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
  384. .L_get_length_code:
  385. movl lmask(%esp), %edx /* edx = lmask */
  386. movl lcode(%esp), %ecx /* ecx = lcode */
  387. andl hold_r, %edx /* edx &= hold */
  388. movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
  389. .L_dolen:
  390. /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
  391. *
  392. * dolen:
  393. * bits -= this.bits;
  394. * hold >>= this.bits
  395. */
  396. movb %ah, %cl /* cl = this.bits */
  397. subb %ah, bits_r /* bits -= this.bits */
  398. shrl %cl, hold_r /* hold >>= this.bits */
  399. /* check if op is a literal
  400. * if (op == 0) {
  401. * PUP(out) = this.val;
  402. * }
  403. */
  404. testb %al, %al
  405. jnz .L_test_for_length_base /* if (op != 0) 45.7% */
  406. shrl $16, %eax /* output this.val char */
  407. stosb
  408. jmp .L_while_test
  409. .L_test_for_length_base:
  410. /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
  411. *
  412. * else if (op & 16) {
  413. * len = this.val
  414. * op &= 15
  415. * if (op) {
  416. * if (op > bits) {
  417. * hold |= *((unsigned short *)in)++ << bits;
  418. * bits += 16
  419. * }
  420. * len += hold & mask[op];
  421. * bits -= op;
  422. * hold >>= op;
  423. * }
  424. */
  425. #define len_r %edx
  426. movl %eax, len_r /* len = this */
  427. shrl $16, len_r /* len = this.val */
  428. movb %al, %cl
  429. testb $16, %al
  430. jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
  431. andb $15, %cl /* op &= 15 */
  432. jz .L_save_len /* if (!op) */
  433. cmpb %cl, bits_r
  434. jae .L_add_bits_to_len /* if (op <= bits) */
  435. movb %cl, %ch /* stash op in ch, freeing cl */
  436. xorl %eax, %eax
  437. lodsw /* al = *(ushort *)in++ */
  438. movb bits_r, %cl /* cl = bits, needs it for shifting */
  439. addb $16, bits_r /* bits += 16 */
  440. shll %cl, %eax
  441. orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
  442. movb %ch, %cl /* move op back to ecx */
  443. .L_add_bits_to_len:
  444. movl $1, %eax
  445. shll %cl, %eax
  446. decl %eax
  447. subb %cl, bits_r
  448. andl hold_r, %eax /* eax &= hold */
  449. shrl %cl, hold_r
  450. addl %eax, len_r /* len += hold & mask[op] */
  451. .L_save_len:
  452. movl len_r, len(%esp) /* save len */
  453. #undef len_r
  454. .L_decode_distance:
  455. /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  456. *
  457. * if (bits < 15) {
  458. * hold |= *((unsigned short *)in)++ << bits;
  459. * bits += 16
  460. * }
  461. * this = dcode[hold & dmask];
  462. * dodist:
  463. * bits -= this.bits;
  464. * hold >>= this.bits;
  465. * op = this.op;
  466. */
  467. cmpb $15, bits_r
  468. ja .L_get_distance_code /* if (15 < bits) */
  469. xorl %eax, %eax
  470. lodsw /* al = *(ushort *)in++ */
  471. movb bits_r, %cl /* cl = bits, needs it for shifting */
  472. addb $16, bits_r /* bits += 16 */
  473. shll %cl, %eax
  474. orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
  475. .L_get_distance_code:
  476. movl dmask(%esp), %edx /* edx = dmask */
  477. movl dcode(%esp), %ecx /* ecx = dcode */
  478. andl hold_r, %edx /* edx &= hold */
  479. movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
  480. #define dist_r %edx
  481. .L_dodist:
  482. movl %eax, dist_r /* dist = this */
  483. shrl $16, dist_r /* dist = this.val */
  484. movb %ah, %cl
  485. subb %ah, bits_r /* bits -= this.bits */
  486. shrl %cl, hold_r /* hold >>= this.bits */
  487. /* if (op & 16) {
  488. * dist = this.val
  489. * op &= 15
  490. * if (op > bits) {
  491. * hold |= *((unsigned short *)in)++ << bits;
  492. * bits += 16
  493. * }
  494. * dist += hold & mask[op];
  495. * bits -= op;
  496. * hold >>= op;
  497. */
  498. movb %al, %cl /* cl = this.op */
  499. testb $16, %al /* if ((op & 16) == 0) */
  500. jz .L_test_for_second_level_dist
  501. andb $15, %cl /* op &= 15 */
  502. jz .L_check_dist_one
  503. cmpb %cl, bits_r
  504. jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
  505. movb %cl, %ch /* stash op in ch, freeing cl */
  506. xorl %eax, %eax
  507. lodsw /* al = *(ushort *)in++ */
  508. movb bits_r, %cl /* cl = bits, needs it for shifting */
  509. addb $16, bits_r /* bits += 16 */
  510. shll %cl, %eax
  511. orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
  512. movb %ch, %cl /* move op back to ecx */
  513. .L_add_bits_to_dist:
  514. movl $1, %eax
  515. shll %cl, %eax
  516. decl %eax /* (1 << op) - 1 */
  517. subb %cl, bits_r
  518. andl hold_r, %eax /* eax &= hold */
  519. shrl %cl, hold_r
  520. addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
  521. jmp .L_check_window
  522. .L_check_window:
  523. /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  524. * %ecx = nbytes
  525. *
  526. * nbytes = out - beg;
  527. * if (dist <= nbytes) {
  528. * from = out - dist;
  529. * do {
  530. * PUP(out) = PUP(from);
  531. * } while (--len > 0) {
  532. * }
  533. */
  534. movl in_r, in(%esp) /* save in so from can use it's reg */
  535. movl out_r, %eax
  536. subl beg(%esp), %eax /* nbytes = out - beg */
  537. cmpl dist_r, %eax
  538. jb .L_clip_window /* if (dist > nbytes) 4.2% */
  539. movl len(%esp), %ecx
  540. movl out_r, from_r
  541. subl dist_r, from_r /* from = out - dist */
  542. subl $3, %ecx
  543. movb (from_r), %al
  544. movb %al, (out_r)
  545. movb 1(from_r), %al
  546. movb 2(from_r), %dl
  547. addl $3, from_r
  548. movb %al, 1(out_r)
  549. movb %dl, 2(out_r)
  550. addl $3, out_r
  551. rep movsb
  552. movl in(%esp), in_r /* move in back to %esi, toss from */
  553. jmp .L_while_test
  554. .align 16,0x90
  555. .L_check_dist_one:
  556. cmpl $1, dist_r
  557. jne .L_check_window
  558. cmpl out_r, beg(%esp)
  559. je .L_check_window
  560. decl out_r
  561. movl len(%esp), %ecx
  562. movb (out_r), %al
  563. subl $3, %ecx
  564. movb %al, 1(out_r)
  565. movb %al, 2(out_r)
  566. movb %al, 3(out_r)
  567. addl $4, out_r
  568. rep stosb
  569. jmp .L_while_test
  570. .align 16,0x90
  571. .L_test_for_second_level_length:
  572. /* else if ((op & 64) == 0) {
  573. * this = lcode[this.val + (hold & mask[op])];
  574. * }
  575. */
  576. testb $64, %al
  577. jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
  578. movl $1, %eax
  579. shll %cl, %eax
  580. decl %eax
  581. andl hold_r, %eax /* eax &= hold */
  582. addl %edx, %eax /* eax += this.val */
  583. movl lcode(%esp), %edx /* edx = lcode */
  584. movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
  585. jmp .L_dolen
  586. .align 16,0x90
  587. .L_test_for_second_level_dist:
  588. /* else if ((op & 64) == 0) {
  589. * this = dcode[this.val + (hold & mask[op])];
  590. * }
  591. */
  592. testb $64, %al
  593. jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
  594. movl $1, %eax
  595. shll %cl, %eax
  596. decl %eax
  597. andl hold_r, %eax /* eax &= hold */
  598. addl %edx, %eax /* eax += this.val */
  599. movl dcode(%esp), %edx /* edx = dcode */
  600. movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
  601. jmp .L_dodist
  602. .align 16,0x90
  603. .L_clip_window:
  604. /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  605. * %ecx = nbytes
  606. *
  607. * else {
  608. * if (dist > wsize) {
  609. * invalid distance
  610. * }
  611. * from = window;
  612. * nbytes = dist - nbytes;
  613. * if (write == 0) {
  614. * from += wsize - nbytes;
  615. */
  616. #define nbytes_r %ecx
  617. movl %eax, nbytes_r
  618. movl wsize(%esp), %eax /* prepare for dist compare */
  619. negl nbytes_r /* nbytes = -nbytes */
  620. movl window(%esp), from_r /* from = window */
  621. cmpl dist_r, %eax
  622. jb .L_invalid_distance_too_far /* if (dist > wsize) */
  623. addl dist_r, nbytes_r /* nbytes = dist - nbytes */
  624. cmpl $0, write(%esp)
  625. jne .L_wrap_around_window /* if (write != 0) */
  626. subl nbytes_r, %eax
  627. addl %eax, from_r /* from += wsize - nbytes */
  628. /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  629. * %ecx = nbytes, %eax = len
  630. *
  631. * if (nbytes < len) {
  632. * len -= nbytes;
  633. * do {
  634. * PUP(out) = PUP(from);
  635. * } while (--nbytes);
  636. * from = out - dist;
  637. * }
  638. * }
  639. */
  640. #define len_r %eax
  641. movl len(%esp), len_r
  642. cmpl nbytes_r, len_r
  643. jbe .L_do_copy1 /* if (nbytes >= len) */
  644. subl nbytes_r, len_r /* len -= nbytes */
  645. rep movsb
  646. movl out_r, from_r
  647. subl dist_r, from_r /* from = out - dist */
  648. jmp .L_do_copy1
  649. cmpl nbytes_r, len_r
  650. jbe .L_do_copy1 /* if (nbytes >= len) */
  651. subl nbytes_r, len_r /* len -= nbytes */
  652. rep movsb
  653. movl out_r, from_r
  654. subl dist_r, from_r /* from = out - dist */
  655. jmp .L_do_copy1
  656. .L_wrap_around_window:
  657. /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  658. * %ecx = nbytes, %eax = write, %eax = len
  659. *
  660. * else if (write < nbytes) {
  661. * from += wsize + write - nbytes;
  662. * nbytes -= write;
  663. * if (nbytes < len) {
  664. * len -= nbytes;
  665. * do {
  666. * PUP(out) = PUP(from);
  667. * } while (--nbytes);
  668. * from = window;
  669. * nbytes = write;
  670. * if (nbytes < len) {
  671. * len -= nbytes;
  672. * do {
  673. * PUP(out) = PUP(from);
  674. * } while(--nbytes);
  675. * from = out - dist;
  676. * }
  677. * }
  678. * }
  679. */
  680. #define write_r %eax
  681. movl write(%esp), write_r
  682. cmpl write_r, nbytes_r
  683. jbe .L_contiguous_in_window /* if (write >= nbytes) */
  684. addl wsize(%esp), from_r
  685. addl write_r, from_r
  686. subl nbytes_r, from_r /* from += wsize + write - nbytes */
  687. subl write_r, nbytes_r /* nbytes -= write */
  688. #undef write_r
  689. movl len(%esp), len_r
  690. cmpl nbytes_r, len_r
  691. jbe .L_do_copy1 /* if (nbytes >= len) */
  692. subl nbytes_r, len_r /* len -= nbytes */
  693. rep movsb
  694. movl window(%esp), from_r /* from = window */
  695. movl write(%esp), nbytes_r /* nbytes = write */
  696. cmpl nbytes_r, len_r
  697. jbe .L_do_copy1 /* if (nbytes >= len) */
  698. subl nbytes_r, len_r /* len -= nbytes */
  699. rep movsb
  700. movl out_r, from_r
  701. subl dist_r, from_r /* from = out - dist */
  702. jmp .L_do_copy1
  703. .L_contiguous_in_window:
  704. /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
  705. * %ecx = nbytes, %eax = write, %eax = len
  706. *
  707. * else {
  708. * from += write - nbytes;
  709. * if (nbytes < len) {
  710. * len -= nbytes;
  711. * do {
  712. * PUP(out) = PUP(from);
  713. * } while (--nbytes);
  714. * from = out - dist;
  715. * }
  716. * }
  717. */
  718. #define write_r %eax
  719. addl write_r, from_r
  720. subl nbytes_r, from_r /* from += write - nbytes */
  721. #undef write_r
  722. movl len(%esp), len_r
  723. cmpl nbytes_r, len_r
  724. jbe .L_do_copy1 /* if (nbytes >= len) */
  725. subl nbytes_r, len_r /* len -= nbytes */
  726. rep movsb
  727. movl out_r, from_r
  728. subl dist_r, from_r /* from = out - dist */
  729. .L_do_copy1:
  730. /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
  731. * %eax = len
  732. *
  733. * while (len > 0) {
  734. * PUP(out) = PUP(from);
  735. * len--;
  736. * }
  737. * }
  738. * } while (in < last && out < end);
  739. */
  740. #undef nbytes_r
  741. #define in_r %esi
  742. movl len_r, %ecx
  743. rep movsb
  744. movl in(%esp), in_r /* move in back to %esi, toss from */
  745. jmp .L_while_test
  746. #undef len_r
  747. #undef dist_r
  748. #endif /* NO_MMX || RUN_TIME_MMX */
  749. /*** MMX code ***/
  750. #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
  751. .align 32,0x90
  752. .L_init_mmx:
  753. emms
  754. #undef bits_r
  755. #undef bitslong_r
  756. #define bitslong_r %ebp
  757. #define hold_mm %mm0
  758. movd %ebp, hold_mm
  759. movl %ebx, bitslong_r
  760. #define used_mm %mm1
  761. #define dmask2_mm %mm2
  762. #define lmask2_mm %mm3
  763. #define lmask_mm %mm4
  764. #define dmask_mm %mm5
  765. #define tmp_mm %mm6
  766. movd lmask(%esp), lmask_mm
  767. movq lmask_mm, lmask2_mm
  768. movd dmask(%esp), dmask_mm
  769. movq dmask_mm, dmask2_mm
  770. pxor used_mm, used_mm
  771. movl lcode(%esp), %ebx /* ebx = lcode */
  772. jmp .L_do_loop_mmx
  773. .align 32,0x90
  774. .L_while_test_mmx:
  775. /* while (in < last && out < end)
  776. */
  777. cmpl out_r, end(%esp)
  778. jbe .L_break_loop /* if (out >= end) */
  779. cmpl in_r, last(%esp)
  780. jbe .L_break_loop
  781. .L_do_loop_mmx:
  782. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  783. cmpl $32, bitslong_r
  784. ja .L_get_length_code_mmx /* if (32 < bits) */
  785. movd bitslong_r, tmp_mm
  786. movd (in_r), %mm7
  787. addl $4, in_r
  788. psllq tmp_mm, %mm7
  789. addl $32, bitslong_r
  790. por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
  791. .L_get_length_code_mmx:
  792. pand hold_mm, lmask_mm
  793. movd lmask_mm, %eax
  794. movq lmask2_mm, lmask_mm
  795. movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
  796. .L_dolen_mmx:
  797. movzbl %ah, %ecx /* ecx = this.bits */
  798. movd %ecx, used_mm
  799. subl %ecx, bitslong_r /* bits -= this.bits */
  800. testb %al, %al
  801. jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
  802. shrl $16, %eax /* output this.val char */
  803. stosb
  804. jmp .L_while_test_mmx
  805. .L_test_for_length_base_mmx:
  806. #define len_r %edx
  807. movl %eax, len_r /* len = this */
  808. shrl $16, len_r /* len = this.val */
  809. testb $16, %al
  810. jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
  811. andl $15, %eax /* op &= 15 */
  812. jz .L_decode_distance_mmx /* if (!op) */
  813. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  814. movd %eax, used_mm
  815. movd hold_mm, %ecx
  816. subl %eax, bitslong_r
  817. andl .L_mask(,%eax,4), %ecx
  818. addl %ecx, len_r /* len += hold & mask[op] */
  819. .L_decode_distance_mmx:
  820. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  821. cmpl $32, bitslong_r
  822. ja .L_get_dist_code_mmx /* if (32 < bits) */
  823. movd bitslong_r, tmp_mm
  824. movd (in_r), %mm7
  825. addl $4, in_r
  826. psllq tmp_mm, %mm7
  827. addl $32, bitslong_r
  828. por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
  829. .L_get_dist_code_mmx:
  830. movl dcode(%esp), %ebx /* ebx = dcode */
  831. pand hold_mm, dmask_mm
  832. movd dmask_mm, %eax
  833. movq dmask2_mm, dmask_mm
  834. movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
  835. .L_dodist_mmx:
  836. #define dist_r %ebx
  837. movzbl %ah, %ecx /* ecx = this.bits */
  838. movl %eax, dist_r
  839. shrl $16, dist_r /* dist = this.val */
  840. subl %ecx, bitslong_r /* bits -= this.bits */
  841. movd %ecx, used_mm
  842. testb $16, %al /* if ((op & 16) == 0) */
  843. jz .L_test_for_second_level_dist_mmx
  844. andl $15, %eax /* op &= 15 */
  845. jz .L_check_dist_one_mmx
  846. .L_add_bits_to_dist_mmx:
  847. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  848. movd %eax, used_mm /* save bit length of current op */
  849. movd hold_mm, %ecx /* get the next bits on input stream */
  850. subl %eax, bitslong_r /* bits -= op bits */
  851. andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
  852. addl %ecx, dist_r /* dist += hold & mask[op] */
  853. .L_check_window_mmx:
  854. movl in_r, in(%esp) /* save in so from can use it's reg */
  855. movl out_r, %eax
  856. subl beg(%esp), %eax /* nbytes = out - beg */
  857. cmpl dist_r, %eax
  858. jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */
  859. movl len_r, %ecx
  860. movl out_r, from_r
  861. subl dist_r, from_r /* from = out - dist */
  862. subl $3, %ecx
  863. movb (from_r), %al
  864. movb %al, (out_r)
  865. movb 1(from_r), %al
  866. movb 2(from_r), %dl
  867. addl $3, from_r
  868. movb %al, 1(out_r)
  869. movb %dl, 2(out_r)
  870. addl $3, out_r
  871. rep movsb
  872. movl in(%esp), in_r /* move in back to %esi, toss from */
  873. movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
  874. jmp .L_while_test_mmx
  875. .align 16,0x90
  876. .L_check_dist_one_mmx:
  877. cmpl $1, dist_r
  878. jne .L_check_window_mmx
  879. cmpl out_r, beg(%esp)
  880. je .L_check_window_mmx
  881. decl out_r
  882. movl len_r, %ecx
  883. movb (out_r), %al
  884. subl $3, %ecx
  885. movb %al, 1(out_r)
  886. movb %al, 2(out_r)
  887. movb %al, 3(out_r)
  888. addl $4, out_r
  889. rep stosb
  890. movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
  891. jmp .L_while_test_mmx
  892. .align 16,0x90
  893. .L_test_for_second_level_length_mmx:
  894. testb $64, %al
  895. jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
  896. andl $15, %eax
  897. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  898. movd hold_mm, %ecx
  899. andl .L_mask(,%eax,4), %ecx
  900. addl len_r, %ecx
  901. movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
  902. jmp .L_dolen_mmx
  903. .align 16,0x90
  904. .L_test_for_second_level_dist_mmx:
  905. testb $64, %al
  906. jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
  907. andl $15, %eax
  908. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  909. movd hold_mm, %ecx
  910. andl .L_mask(,%eax,4), %ecx
  911. movl dcode(%esp), %eax /* ecx = dcode */
  912. addl dist_r, %ecx
  913. movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
  914. jmp .L_dodist_mmx
  915. .align 16,0x90
  916. .L_clip_window_mmx:
  917. #define nbytes_r %ecx
  918. movl %eax, nbytes_r
  919. movl wsize(%esp), %eax /* prepare for dist compare */
  920. negl nbytes_r /* nbytes = -nbytes */
  921. movl window(%esp), from_r /* from = window */
  922. cmpl dist_r, %eax
  923. jb .L_invalid_distance_too_far /* if (dist > wsize) */
  924. addl dist_r, nbytes_r /* nbytes = dist - nbytes */
  925. cmpl $0, write(%esp)
  926. jne .L_wrap_around_window_mmx /* if (write != 0) */
  927. subl nbytes_r, %eax
  928. addl %eax, from_r /* from += wsize - nbytes */
  929. cmpl nbytes_r, len_r
  930. jbe .L_do_copy1_mmx /* if (nbytes >= len) */
  931. subl nbytes_r, len_r /* len -= nbytes */
  932. rep movsb
  933. movl out_r, from_r
  934. subl dist_r, from_r /* from = out - dist */
  935. jmp .L_do_copy1_mmx
  936. cmpl nbytes_r, len_r
  937. jbe .L_do_copy1_mmx /* if (nbytes >= len) */
  938. subl nbytes_r, len_r /* len -= nbytes */
  939. rep movsb
  940. movl out_r, from_r
  941. subl dist_r, from_r /* from = out - dist */
  942. jmp .L_do_copy1_mmx
  943. .L_wrap_around_window_mmx:
  944. #define write_r %eax
  945. movl write(%esp), write_r
  946. cmpl write_r, nbytes_r
  947. jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */
  948. addl wsize(%esp), from_r
  949. addl write_r, from_r
  950. subl nbytes_r, from_r /* from += wsize + write - nbytes */
  951. subl write_r, nbytes_r /* nbytes -= write */
  952. #undef write_r
  953. cmpl nbytes_r, len_r
  954. jbe .L_do_copy1_mmx /* if (nbytes >= len) */
  955. subl nbytes_r, len_r /* len -= nbytes */
  956. rep movsb
  957. movl window(%esp), from_r /* from = window */
  958. movl write(%esp), nbytes_r /* nbytes = write */
  959. cmpl nbytes_r, len_r
  960. jbe .L_do_copy1_mmx /* if (nbytes >= len) */
  961. subl nbytes_r, len_r /* len -= nbytes */
  962. rep movsb
  963. movl out_r, from_r
  964. subl dist_r, from_r /* from = out - dist */
  965. jmp .L_do_copy1_mmx
  966. .L_contiguous_in_window_mmx:
  967. #define write_r %eax
  968. addl write_r, from_r
  969. subl nbytes_r, from_r /* from += write - nbytes */
  970. #undef write_r
  971. cmpl nbytes_r, len_r
  972. jbe .L_do_copy1_mmx /* if (nbytes >= len) */
  973. subl nbytes_r, len_r /* len -= nbytes */
  974. rep movsb
  975. movl out_r, from_r
  976. subl dist_r, from_r /* from = out - dist */
  977. .L_do_copy1_mmx:
  978. #undef nbytes_r
  979. #define in_r %esi
  980. movl len_r, %ecx
  981. rep movsb
  982. movl in(%esp), in_r /* move in back to %esi, toss from */
  983. movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
  984. jmp .L_while_test_mmx
  985. #undef hold_r
  986. #undef bitslong_r
  987. #endif /* USE_MMX || RUN_TIME_MMX */
  988. /*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
  989. .L_invalid_distance_code:
  990. /* else {
  991. * strm->msg = "invalid distance code";
  992. * state->mode = BAD;
  993. * }
  994. */
  995. movl $.L_invalid_distance_code_msg, %ecx
  996. movl $INFLATE_MODE_BAD, %edx
  997. jmp .L_update_stream_state
  998. .L_test_for_end_of_block:
  999. /* else if (op & 32) {
  1000. * state->mode = TYPE;
  1001. * break;
  1002. * }
  1003. */
  1004. testb $32, %al
  1005. jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
  1006. movl $0, %ecx
  1007. movl $INFLATE_MODE_TYPE, %edx
  1008. jmp .L_update_stream_state
  1009. .L_invalid_literal_length_code:
  1010. /* else {
  1011. * strm->msg = "invalid literal/length code";
  1012. * state->mode = BAD;
  1013. * }
  1014. */
  1015. movl $.L_invalid_literal_length_code_msg, %ecx
  1016. movl $INFLATE_MODE_BAD, %edx
  1017. jmp .L_update_stream_state
  1018. .L_invalid_distance_too_far:
  1019. /* strm->msg = "invalid distance too far back";
  1020. * state->mode = BAD;
  1021. */
  1022. movl in(%esp), in_r /* from_r has in's reg, put in back */
  1023. movl $.L_invalid_distance_too_far_msg, %ecx
  1024. movl $INFLATE_MODE_BAD, %edx
  1025. jmp .L_update_stream_state
  1026. .L_update_stream_state:
  1027. /* set strm->msg = %ecx, strm->state->mode = %edx */
  1028. movl strm_sp(%esp), %eax
  1029. testl %ecx, %ecx /* if (msg != NULL) */
  1030. jz .L_skip_msg
  1031. movl %ecx, msg_strm(%eax) /* strm->msg = msg */
  1032. .L_skip_msg:
  1033. movl state_strm(%eax), %eax /* state = strm->state */
  1034. movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
  1035. jmp .L_break_loop
  1036. .align 32,0x90
  1037. .L_break_loop:
  1038. /*
  1039. * Regs:
  1040. *
  1041. * bits = %ebp when mmx, and in %ebx when non-mmx
  1042. * hold = %hold_mm when mmx, and in %ebp when non-mmx
  1043. * in = %esi
  1044. * out = %edi
  1045. */
  1046. #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
  1047. #if defined( RUN_TIME_MMX )
  1048. cmpl $DO_USE_MMX, inflate_fast_use_mmx
  1049. jne .L_update_next_in
  1050. #endif /* RUN_TIME_MMX */
  1051. movl %ebp, %ebx
  1052. .L_update_next_in:
  1053. #endif
  1054. #define strm_r %eax
  1055. #define state_r %edx
  1056. /* len = bits >> 3;
  1057. * in -= len;
  1058. * bits -= len << 3;
  1059. * hold &= (1U << bits) - 1;
  1060. * state->hold = hold;
  1061. * state->bits = bits;
  1062. * strm->next_in = in;
  1063. * strm->next_out = out;
  1064. */
  1065. movl strm_sp(%esp), strm_r
  1066. movl %ebx, %ecx
  1067. movl state_strm(strm_r), state_r
  1068. shrl $3, %ecx
  1069. subl %ecx, in_r
  1070. shll $3, %ecx
  1071. subl %ecx, %ebx
  1072. movl out_r, next_out_strm(strm_r)
  1073. movl %ebx, bits_state(state_r)
  1074. movl %ebx, %ecx
  1075. leal buf(%esp), %ebx
  1076. cmpl %ebx, last(%esp)
  1077. jne .L_buf_not_used /* if buf != last */
  1078. subl %ebx, in_r /* in -= buf */
  1079. movl next_in_strm(strm_r), %ebx
  1080. movl %ebx, last(%esp) /* last = strm->next_in */
  1081. addl %ebx, in_r /* in += strm->next_in */
  1082. movl avail_in_strm(strm_r), %ebx
  1083. subl $11, %ebx
  1084. addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */
  1085. .L_buf_not_used:
  1086. movl in_r, next_in_strm(strm_r)
  1087. movl $1, %ebx
  1088. shll %cl, %ebx
  1089. decl %ebx
  1090. #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
  1091. #if defined( RUN_TIME_MMX )
  1092. cmpl $DO_USE_MMX, inflate_fast_use_mmx
  1093. jne .L_update_hold
  1094. #endif /* RUN_TIME_MMX */
  1095. psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
  1096. movd hold_mm, %ebp
  1097. emms
  1098. .L_update_hold:
  1099. #endif /* USE_MMX || RUN_TIME_MMX */
  1100. andl %ebx, %ebp
  1101. movl %ebp, hold_state(state_r)
  1102. #define last_r %ebx
  1103. /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
  1104. movl last(%esp), last_r
  1105. cmpl in_r, last_r
  1106. jbe .L_last_is_smaller /* if (in >= last) */
  1107. subl in_r, last_r /* last -= in */
  1108. addl $11, last_r /* last += 11 */
  1109. movl last_r, avail_in_strm(strm_r)
  1110. jmp .L_fixup_out
  1111. .L_last_is_smaller:
  1112. subl last_r, in_r /* in -= last */
  1113. negl in_r /* in = -in */
  1114. addl $11, in_r /* in += 11 */
  1115. movl in_r, avail_in_strm(strm_r)
  1116. #undef last_r
  1117. #define end_r %ebx
  1118. .L_fixup_out:
  1119. /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
  1120. movl end(%esp), end_r
  1121. cmpl out_r, end_r
  1122. jbe .L_end_is_smaller /* if (out >= end) */
  1123. subl out_r, end_r /* end -= out */
  1124. addl $257, end_r /* end += 257 */
  1125. movl end_r, avail_out_strm(strm_r)
  1126. jmp .L_done
  1127. .L_end_is_smaller:
  1128. subl end_r, out_r /* out -= end */
  1129. negl out_r /* out = -out */
  1130. addl $257, out_r /* out += 257 */
  1131. movl out_r, avail_out_strm(strm_r)
  1132. #undef end_r
  1133. #undef strm_r
  1134. #undef state_r
  1135. .L_done:
  1136. addl $local_var_size, %esp
  1137. popf
  1138. popl %ebx
  1139. popl %ebp
  1140. popl %esi
  1141. popl %edi
  1142. ret
  1143. #if defined( GAS_ELF )
  1144. /* elf info */
  1145. .type inflate_fast,@function
  1146. .size inflate_fast,.-inflate_fast
  1147. #endif