summaryrefslogtreecommitdiff
path: root/zlib/contrib/asm386/gvmat32.asm
blob: 28d527f47f885e4cd957e1c2559f0ef74697b5cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
;
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
; File written by Gilles Vollant, by modifiying the longest_match
;  from Jean-loup Gailly in deflate.c
; It need wmask == 0x7fff
;     (assembly code is faster with a fixed wmask)
;
; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
;   I compile with : "ml /coff /Zi /c gvmat32.asm"
;

;uInt longest_match_7fff(s, cur_match)
;    deflate_state *s;
;    IPos cur_match;                             /* current match */

        NbStack         equ     76
        cur_match       equ     dword ptr[esp+NbStack-0]
        str_s           equ     dword ptr[esp+NbStack-4]
; 5 dword on top (ret,ebp,esi,edi,ebx)
        adrret          equ     dword ptr[esp+NbStack-8]
        pushebp         equ     dword ptr[esp+NbStack-12]
        pushedi         equ     dword ptr[esp+NbStack-16]
        pushesi         equ     dword ptr[esp+NbStack-20]
        pushebx         equ     dword ptr[esp+NbStack-24]

        chain_length    equ     dword ptr [esp+NbStack-28]
        limit           equ     dword ptr [esp+NbStack-32]
        best_len        equ     dword ptr [esp+NbStack-36]
        window          equ     dword ptr [esp+NbStack-40]
        prev            equ     dword ptr [esp+NbStack-44]
        scan_start      equ      word ptr [esp+NbStack-48]
        wmask           equ     dword ptr [esp+NbStack-52]
        match_start_ptr equ     dword ptr [esp+NbStack-56]
        nice_match      equ     dword ptr [esp+NbStack-60]
        scan            equ     dword ptr [esp+NbStack-64]

        windowlen       equ     dword ptr [esp+NbStack-68]
        match_start     equ     dword ptr [esp+NbStack-72]
        strend          equ     dword ptr [esp+NbStack-76]
        NbStackAdd      equ     (NbStack-24)

    .386p

    name    gvmatch
    .MODEL  FLAT



;  all the +4 offsets are due to the addition of pending_buf_size (in zlib
;  in the deflate_state structure since the asm code was first written
;  (if you compile with zlib 1.0.4 or older, remove the +4).
;  Note : these value are good with a 8 bytes boundary pack structure
    dep_chain_length    equ     70h+4
    dep_window          equ     2ch+4
    dep_strstart        equ     60h+4
    dep_prev_length     equ     6ch+4
    dep_nice_match      equ     84h+4
    dep_w_size          equ     20h+4
    dep_prev            equ     34h+4
    dep_w_mask          equ     28h+4
    dep_good_match      equ     80h+4
    dep_match_start     equ     64h+4
    dep_lookahead       equ     68h+4


_TEXT                   segment

IFDEF NOUNDERLINE
                        public  longest_match_7fff
;                        public  match_init
ELSE
                        public  _longest_match_7fff
;                        public  _match_init
ENDIF

    MAX_MATCH           equ     258
    MIN_MATCH           equ     3
    MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)



IFDEF NOUNDERLINE
;match_init      proc near
;                ret
;match_init      endp
ELSE
;_match_init     proc near
;                ret
;_match_init     endp
ENDIF


IFDEF NOUNDERLINE
longest_match_7fff   proc near
ELSE
_longest_match_7fff  proc near
ENDIF

        mov     edx,[esp+4]



        push    ebp
        push    edi
        push    esi
        push    ebx

        sub     esp,NbStackAdd

; initialize or check the variables used in match.asm.
        mov     ebp,edx

; chain_length = s->max_chain_length
; if (prev_length>=good_match) chain_length >>= 2
        mov     edx,[ebp+dep_chain_length]
        mov     ebx,[ebp+dep_prev_length]
        cmp     [ebp+dep_good_match],ebx
        ja      noshr
        shr     edx,2
noshr:
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
        inc     edx
        mov     edi,[ebp+dep_nice_match]
        mov     chain_length,edx
        mov     eax,[ebp+dep_lookahead]
        cmp     eax,edi
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
        jae     nolookaheadnicematch
        mov     edi,eax
nolookaheadnicematch:
; best_len = s->prev_length
        mov     best_len,ebx

; window = s->window
        mov     esi,[ebp+dep_window]
        mov     ecx,[ebp+dep_strstart]
        mov     window,esi

        mov     nice_match,edi
; scan = window + strstart
        add     esi,ecx
        mov     scan,esi
; dx = *window
        mov     dx,word ptr [esi]
; bx = *(window+best_len-1)
        mov     bx,word ptr [esi+ebx-1]
        add     esi,MAX_MATCH-1
; scan_start = *scan
        mov     scan_start,dx
; strend = scan + MAX_MATCH-1
        mov     strend,esi
; bx = scan_end = *(window+best_len-1)

;    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
;        s->strstart - (IPos)MAX_DIST(s) : NIL;

        mov     esi,[ebp+dep_w_size]
        sub     esi,MIN_LOOKAHEAD
; here esi = MAX_DIST(s)
        sub     ecx,esi
        ja      nodist
        xor     ecx,ecx
nodist:
        mov     limit,ecx

; prev = s->prev
        mov     edx,[ebp+dep_prev]
        mov     prev,edx

;
        mov     edx,dword ptr [ebp+dep_match_start]
        mov     bp,scan_start
        mov     eax,cur_match
        mov     match_start,edx

        mov     edx,window
        mov     edi,edx
        add     edi,best_len
        mov     esi,prev
        dec     edi
; windowlen = window + best_len -1
        mov     windowlen,edi

        jmp     beginloop2
        align   4

; here, in the loop
;       eax = ax = cur_match
;       ecx = limit
;        bx = scan_end
;        bp = scan_start
;       edi = windowlen (window + best_len -1)
;       esi = prev


;// here; chain_length <=16
normalbeg0add16:
        add     chain_length,16
        jz      exitloop
normalbeg0:
        cmp     word ptr[edi+eax],bx
        je      normalbeg2noroll
rcontlabnoroll:
; cur_match = prev[cur_match & wmask]
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
        cmp     ecx,eax
        jnb     exitloop
; if --chain_length != 0, go to exitloop
        dec     chain_length
        jnz     normalbeg0
        jmp     exitloop

normalbeg2noroll:
; if (scan_start==*(cur_match+window)) goto normalbeg2
        cmp     bp,word ptr[edx+eax]
        jne     rcontlabnoroll
        jmp     normalbeg2

contloop3:
        mov     edi,windowlen

; cur_match = prev[cur_match & wmask]
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
        cmp     ecx,eax
jnbexitloopshort1:
        jnb     exitloop
; if --chain_length != 0, go to exitloop


; begin the main loop
beginloop2:
        sub     chain_length,16+1
; if chain_length <=16, don't use the unrolled loop
        jna     normalbeg0add16

do16:
        cmp     word ptr[edi+eax],bx
        je      normalbeg2dc0

maccn   MACRO   lab
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
        cmp     ecx,eax
        jnb     exitloop
        cmp     word ptr[edi+eax],bx
        je      lab
        ENDM

rcontloop0:
        maccn   normalbeg2dc1

rcontloop1:
        maccn   normalbeg2dc2

rcontloop2:
        maccn   normalbeg2dc3

rcontloop3:
        maccn   normalbeg2dc4

rcontloop4:
        maccn   normalbeg2dc5

rcontloop5:
        maccn   normalbeg2dc6

rcontloop6:
        maccn   normalbeg2dc7

rcontloop7:
        maccn   normalbeg2dc8

rcontloop8:
        maccn   normalbeg2dc9

rcontloop9:
        maccn   normalbeg2dc10

rcontloop10:
        maccn   short normalbeg2dc11

rcontloop11:
        maccn   short normalbeg2dc12

rcontloop12:
        maccn   short normalbeg2dc13

rcontloop13:
        maccn   short normalbeg2dc14

rcontloop14:
        maccn   short normalbeg2dc15

rcontloop15:
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
        cmp     ecx,eax
        jnb     exitloop

        sub     chain_length,16
        ja      do16
        jmp     normalbeg0add16

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

normbeg MACRO   rcontlab,valsub
; if we are here, we know that *(match+best_len-1) == scan_end
        cmp     bp,word ptr[edx+eax]
; if (match != scan_start) goto rcontlab
        jne     rcontlab
; calculate the good chain_length, and we'll compare scan and match string
        add     chain_length,16-valsub
        jmp     iseq
        ENDM


normalbeg2dc11:
        normbeg rcontloop11,11

normalbeg2dc12:
        normbeg short rcontloop12,12

normalbeg2dc13:
        normbeg short rcontloop13,13

normalbeg2dc14:
        normbeg short rcontloop14,14

normalbeg2dc15:
        normbeg short rcontloop15,15

normalbeg2dc10:
        normbeg rcontloop10,10

normalbeg2dc9:
        normbeg rcontloop9,9

normalbeg2dc8:
        normbeg rcontloop8,8

normalbeg2dc7:
        normbeg rcontloop7,7

normalbeg2dc6:
        normbeg rcontloop6,6

normalbeg2dc5:
        normbeg rcontloop5,5

normalbeg2dc4:
        normbeg rcontloop4,4

normalbeg2dc3:
        normbeg rcontloop3,3

normalbeg2dc2:
        normbeg rcontloop2,2

normalbeg2dc1:
        normbeg rcontloop1,1

normalbeg2dc0:
        normbeg rcontloop0,0


; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end

normalbeg2:
        mov     edi,window

        cmp     bp,word ptr[edi+eax]
        jne     contloop3                   ; if *(ushf*)match != scan_start, continue

iseq:
; if we are here, we know that *(match+best_len-1) == scan_end
; and (match == scan_start)

        mov     edi,edx
        mov     esi,scan                    ; esi = scan
        add     edi,eax                     ; edi = window + cur_match = match

        mov     edx,[esi+3]                 ; compare manually dword at match+3
        xor     edx,[edi+3]                 ; and scan +3

        jz      begincompare                ; if equal, go to long compare

; we will determine the unmatch byte and calculate len (in esi)
        or      dl,dl
        je      eq1rr
        mov     esi,3
        jmp     trfinval
eq1rr:
        or      dx,dx
        je      eq1

        mov     esi,4
        jmp     trfinval
eq1:
        and     edx,0ffffffh
        jz      eq11
        mov     esi,5
        jmp     trfinval
eq11:
        mov     esi,6
        jmp     trfinval

begincompare:
        ; here we now scan and match begin same
        add     edi,6
        add     esi,6
        mov     ecx,(MAX_MATCH-(2+4))/4     ; scan for at most MAX_MATCH bytes
        repe    cmpsd                       ; loop until mismatch

        je      trfin                       ; go to trfin if not unmatch
; we determine the unmatch byte
        sub     esi,4
        mov     edx,[edi-4]
        xor     edx,[esi]

        or      dl,dl
        jnz     trfin
        inc     esi

        or      dx,dx
        jnz     trfin
        inc     esi

        and     edx,0ffffffh
        jnz     trfin
        inc     esi

trfin:
        sub     esi,scan          ; esi = len
trfinval:
; here we have finised compare, and esi contain len of equal string
        cmp     esi,best_len        ; if len > best_len, go newbestlen
        ja      short newbestlen
; now we restore edx, ecx and esi, for the big loop
        mov     esi,prev
        mov     ecx,limit
        mov     edx,window
        jmp     contloop3

newbestlen:
        mov     best_len,esi        ; len become best_len

        mov     match_start,eax     ; save new position as match_start
        cmp     esi,nice_match      ; if best_len >= nice_match, exit
        jae     exitloop
        mov     ecx,scan
        mov     edx,window          ; restore edx=window
        add     ecx,esi
        add     esi,edx

        dec     esi
        mov     windowlen,esi       ; windowlen = window + best_len-1
        mov     bx,[ecx-1]          ; bx = *(scan+best_len-1) = scan_end

; now we restore ecx and esi, for the big loop :
        mov     esi,prev
        mov     ecx,limit
        jmp     contloop3

exitloop:
; exit : s->match_start=match_start
        mov     ebx,match_start
        mov     ebp,str_s
        mov     ecx,best_len
        mov     dword ptr [ebp+dep_match_start],ebx        
        mov     eax,dword ptr [ebp+dep_lookahead]
        cmp     ecx,eax
        ja      minexlo
        mov     eax,ecx
minexlo:
; return min(best_len,s->lookahead)
        
; restore stack and register ebx,esi,edi,ebp
        add     esp,NbStackAdd

        pop     ebx
        pop     esi
        pop     edi
        pop     ebp
        ret
InfoAuthor:
; please don't remove this string !
; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
        db     0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah



IFDEF NOUNDERLINE
longest_match_7fff   endp
ELSE
_longest_match_7fff  endp
ENDIF


IFDEF NOUNDERLINE
cpudetect32     proc near
ELSE
_cpudetect32    proc near
ENDIF


	pushfd                  ; push original EFLAGS
	pop     eax             ; get original EFLAGS
	mov     ecx, eax        ; save original EFLAGS
	xor     eax, 40000h     ; flip AC bit in EFLAGS
	push    eax             ; save new EFLAGS value on stack
	popfd                   ; replace current EFLAGS value
	pushfd                  ; get new EFLAGS
	pop     eax             ; store new EFLAGS in EAX
	xor     eax, ecx        ; can’t toggle AC bit, processor=80386
	jz      end_cpu_is_386  ; jump if 80386 processor
	push    ecx
	popfd                   ; restore AC bit in EFLAGS first

	pushfd
	pushfd
	pop     ecx
			
	mov     eax, ecx        ; get original EFLAGS
	xor     eax, 200000h    ; flip ID bit in EFLAGS
	push    eax             ; save new EFLAGS value on stack
	popfd                   ; replace current EFLAGS value
	pushfd                  ; get new EFLAGS
	pop		eax	            ; store new EFLAGS in EAX
	popfd                   ; restore original EFLAGS
	xor		eax, ecx        ; can’t toggle ID bit,
	je		is_old_486		; processor=old

	mov     eax,1
	db      0fh,0a2h        ;CPUID   

exitcpudetect:
	ret

end_cpu_is_386:
	mov     eax,0300h
	jmp     exitcpudetect

is_old_486:
	mov     eax,0400h
	jmp     exitcpudetect

IFDEF NOUNDERLINE
cpudetect32     endp
ELSE
_cpudetect32    endp
ENDIF

_TEXT   ends
end