diff options
Diffstat (limited to 'human68k/deflate.s')
-rw-r--r-- | human68k/deflate.s | 1076 |
1 files changed, 1076 insertions, 0 deletions
diff --git a/human68k/deflate.s b/human68k/deflate.s new file mode 100644 index 0000000..246962c --- /dev/null +++ b/human68k/deflate.s @@ -0,0 +1,1076 @@ +;=========================================================================== +; Copyright (c) 1990-1999 Info-ZIP. All rights reserved. +; +; See the accompanying file LICENSE, version 1999-Oct-05 or later +; (the contents of which are also included in zip.h) for terms of use. +; If, for some reason, both of these files are missing, the Info-ZIP license +; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html +;=========================================================================== +; This is a 680x0 assembly language translation of the Info-ZIP source file +; deflate.c, by Paul Kienitz. No rights reserved. The function longest_match +; is based in part on match.a by Carsten Steger, which in turn is partly based +; on match.s for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however, +; this material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko. +; This code is not commented very much; see deflate.c for comments that explain +; what the functions are doing. +; +; The symbols that can be used to select different versions are as follows: +; +; CPU020 if defined, use 68020 instructions always. +; +; CPUTEST if defined, check at runtime for CPU type. Another symbol +; specifying the platform-specific test must be used with this. +; If neither of these is defined, use 68000 instructions only. +; Runtime test is nonportable; it is different for each OS. +; +; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also +; tells it that registers d0/a0/d1/a1 are not preserved by +; function calls. At present, if AMIGA is not defined, it +; causes functions to preserve all registers. ALL OF THIS CODE +; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED +; BY ANY FUNCTIONS THAT IT CALLS. +; +; DYN_ALLOC should be defined here if it is defined for C source; tells us +; that big arrays are allocated instead of static. +; +; WSIZE must be defined as the same number used for WSIZE in the C +; source, and must be a power of two <= 32768. As elsewhere, +; the default value is 32768. +; +; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed. +; +; SMALL_MEM define this if it is defined in the C source; otherwise it uses +; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported. +; The FULL_SEARCH option in deflate.c is also not supported. +; +; DEBUG activates some tracing output, as in the C source. +; +; QUADLONG this selects a different version of the innermost longest_match +; loop code for 68020 operations, comparing bytes four at a time +; instead of two at a time. It seems to be a tiny bit faster on +; average, but it's slower often enough that one can't generalize. +; +; This code currently assumes that function results are returned in D0 for +; all platforms. It assumes that args to functions are pushed onto the stack, +; last arg first. It also currently assumes that all C symbols have an +; underscore prepended when referenced from assembly. +; +; 1999/09/23: for Human68k: Modified by Shimazaki Ryo. + + IFNDEF CPU020 + IFNDEF CPUTEST +CPU000 equ 1 + ENDC + ENDC + +; Use these macros for accessing variables of type int: + IFDEF INT16 +MOVINT MACRO _1,_2 + move.w _1,_2 + ENDM +CLRINT MACRO _1 + clr.w _1 + ENDM +INTSIZE equ 2 + ELSE ; !INT16 +MOVINT MACRO _1,_2 + move.l _1,_2 + ENDM +CLRINT MACRO _1 + clr.l _1 + ENDM +INTSIZE equ 4 + ENDC + + IFDEF DYN_ALLOC +BASEPTR MACRO _1,_2 + move.l _1,_2 + ENDM + ELSE +BASEPTR MACRO _1,_2 + lea _1,_2 + ENDM + ENDC + +; constants we use, many of them adjustable: + +MAX_MATCH equ 258 +MIN_MATCH equ 3 +TOO_FAR equ 4096 + IFNDEF WSIZE +WSIZE equ 32768 + ENDC +WMASK equ WSIZE-1 +MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1 +MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1 +; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits +;HASH_BITS equ 15 +; ELSE + IFDEF SMALL_MEM +HASH_BITS equ 13 + ELSE +HASH_BITS equ 14 ; default -- MEDIUM_MEM + ENDC +; ENDC ; BIG_MEM +HASH_SIZE equ 1<<HASH_BITS +HASH_MASK equ HASH_SIZE-1 +H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH +B_SLOW equ 1 +B_FAST equ 2 +ZE_MEM equ 4 +EOF equ -1 + +; struct config is defined by these offsets: +Good_length equ 0 +Max_lazy equ 2 +Nice_length equ 4 +Max_chain equ 6 +Sizeof_config equ 8 + + +; external functions we call: + xref _ct_tally ; int ct_tally(int, int) + xref _flush_block ; unsigned long F(char *, unsigned long, int) + xref _ziperr ; void ziperr(int, char *) + xref _error ; void error(char *) + xref _calloc ; stdlib function: void *calloc(size_t, size_t) + xref _free ; stdlib function: void free(void *) + IFDEF DEBUG + xref _fputc ; stdio function: int fputc(int, FILE *) + xref _stderr ; pointer to FILE, which we pass to fputc + ENDC + +; our entry points: + xdef _lm_init ; void lm_init(int level, unsigned short *flags) + xdef _lm_free ; void lm_free(void) + xdef _deflate ; void deflate(void) ...the big one + xdef _fill_window ; this line is just for debugging + + +; ============================================================================ +; Here is where we have our global variables. + +;;; section deflatevars,data + +; external global variables we reference: + xref _verbose ; signed int + xref _level ; signed int + xref _read_buf ; int (*read_buf)(char *, unsigned int) + +; global variables we make available: + + xdef _window + xdef _prev + xdef _head + xdef _window_size + xdef _block_start + xdef _strstart + + bss + quad + + IFDEF DYN_ALLOC +_prev: ds.l 1 ; pointer to calloc()'d unsigned short array +_head: ds.l 1 ; pointer to calloc()'d unsigned short array +_window: ds.l 1 ; pointer to calloc()'d unsigned char array + ELSE ; !DYN_ALLOC +_prev: ds.w WSIZE ; array of unsigned short +_head: ds.w HASH_SIZE ; array of unsigned short +_window: ds.b 2*WSIZE ; array of unsigned char + ENDC ; ?DYN_ALLOC + + text + quad +_window_size: ds.l 1 ; unsigned long +_block_start: ds.l 1 ; unsigned long +_strstart: ds.w INTSIZE/2 ; unsigned int + +; Now here are our private variables: + + IFDEF CPUTEST +is020: ds.w 1 ; bool: CPU type is '020 or higher + ENDC +ins_h: ds.w 1 ; unsigned short +sliding: ds.w 1 ; bool: the file is read a piece at a time +eofile: ds.w 1 ; bool: we have read in the end of the file +max_lazy_match: ds.w 1 ; unsigned short +lookahead: ds.w 1 ; unsigned short + +; These are NOT DECLARED AS STATIC in deflate.c, but currently could be: +max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c) +prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c) +good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c) +nice_match: ds.w 1 ; unsigned short (signed int in deflate.c) +match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c) + +; This array of struct config is a constant and could be in the code section: +config_table: dc.w 0,0,0,0 ; level 0: store uncompressed + dc.w 4,4,8,4 ; level 1: fastest, loosest compression + dc.w 4,5,16,8 ; level 2 + dc.w 4,6,32,32 ; level 3: highest to use deflate_fast + dc.w 4,4,16,16 ; level 4: lowest to use lazy matches + dc.w 8,16,32,32 ; level 5 + dc.w 8,16,128,128 ; level 6: the default level + dc.w 8,32,128,256 ; level 7 + dc.w 32,128,258,1024 ; level 8 + dc.w 32,258,258,4096 ; level 9: maximum compression, slow + + +;;CAL_SH MACRO ; macro for calling zcalloc() +;; IFD INT16 +;; move.w #2,-(sp) +;; move.w #\1,-(sp) +;; jsr _zcalloc +;; addq #4,sp +;; ELSE +;; pea 2 +;; pea \1 +;; jsr _zcalloc +;; addq #8,sp +;; ENDC +;; ENDM + +CAL_SH MACRO _1 ; Okay, we're back to using regular calloc()... + movem.l d2/a2,-(sp) + pea 2 + pea _1 + jsr _calloc + addq #8,sp + movem.l (sp)+,d2/a2 + ENDM + +; ============================================================================ +; And here we begin our functions. match_init is for internal use only: + +;; section deflate,code + +match_init: + IFDEF CPUTEST ; now check for platform type + IFDEF AMIGA ; Amiga specific test for '020 CPU: + xref _SysBase + NOLIST + INCLUDE 'exec/execbase.i' + LIST + + clr.w is020 ; default value is 68000 + move.l _SysBase,a0 + btst #AFB_68020,AttnFlags+1(a0) + beq.s cheap + move.w #1,is020 +cheap: + ELSE ; !AMIGA + + FAIL Write an '020-detector for your system here! +; On the Macintosh, I believe GetEnvironment() provides the information. + + ENDC ; AMIGA + ENDC ; CPUTEST + rts ; match_init consists only of rts if CPUTEST unset + + +; ============================================================================ +; Here is longest_match(), the function that the rest of this was built up +; from, the hottest hot spot in the program and therefore the most heavily +; optimized. It has two different versions, one for '020 and higher CPUs, and +; one for 68000/68010. It can test at runtime which version to use if you +; create a test function in match_init for your platform. Currently such a +; test is implemented for the Amiga. It can also be assembled to use '000 or +; '020 code only. + +Cur_Match reg d0 ; unsigned int, kept valid as long +Best_Len reg d1 ; unsigned int, kept valid as long +Scan_Start reg d3 ; pair of bytes +Scan_End reg d4 ; pair of bytes +Limit reg d5 ; unsigned int +Chain_Length reg d6 ; unsigned int +Scan_Test reg d7 ; counter, pair of bytes sometimes +Scan reg a0 ; pointer to unsigned char +Match reg a1 ; pointer to unsigned char +Prev_Address reg a2 ; pointer to unsigned short +Scan_Ini reg a3 ; pointer to unsigned char +Match_Ini reg a5 ; pointer to unsigned char +; Note: "pair of bytes" means the two low order bytes of the register in +; 68020 code, but means the lowest and third lowest bytes on the 68000. +SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1 +; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model + + +longest_match: + movem.l SAVEREGS,-(sp) + +; setup steps common to byte and word versions: + IFDEF INT16 + and.l #$0000FFFF,Cur_Match ; upper half must be zero! +; we use an and.l down here for the sake of ATSIGN/REGARGS. + moveq #0,Limit ; so adding to Scan_Ini works + ENDC + move.w (max_chain_len,pc),Chain_Length + move.w (prev_length,pc),Best_Len + MOVINT (_strstart,pc),Limit + BASEPTR _prev,Prev_Address + BASEPTR _window,Match_Ini + move.l Match_Ini,Scan_Ini + addq #MIN_MATCH,Match_Ini ; optimizes inner loop + add.l Limit,Scan_Ini + sub.w #MAX_DIST,Limit + bhi.s limit_ok + moveq #0,Limit +limit_ok: + cmp.w (good_match,pc),Best_Len + blo.s length_ok + lsr.w #2,Chain_Length +length_ok: + subq.w #1,Chain_Length + + IFDEF CPUTEST + tst.w is020 ; can we use '020 stuff today? + bne WORD_match + ENDC + + IFNDEF CPU020 + +; for 68000 or 68010, use byte operations: + moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd + moveq #0,Scan_End ; likewise + moveq #0,Scan_Test ; likewise + move.b (Scan_Ini),Scan_Start + swap Scan_Start ; swap is faster than 8 bit shift + move.b 1(Scan_Ini),Scan_Start + move.b -1(Scan_Ini,Best_Len.w),Scan_End + swap Scan_End + move.b 0(Scan_Ini,Best_Len.w),Scan_End + bra.s bdo_scan + +blong_loop: + move.b -1(Scan_Ini,Best_Len.w),Scan_End + swap Scan_End + move.b 0(Scan_Ini,Best_Len.w),Scan_End + +bshort_loop: + add.w Cur_Match,Cur_Match ; assert value before doubling < 32K + IFNE 32768-WSIZE + and.w #(WMASK*2),Cur_Match + ENDC + move.w (Prev_Address,Cur_Match.l),Cur_Match + cmp.w Limit,Cur_Match + dbls Chain_Length,bdo_scan + bra return + +bdo_scan: + move.l Match_Ini,Match + add.l Cur_Match,Match + move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test + swap Scan_Test + move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test + cmp.l Scan_Test,Scan_End + bne.s bshort_loop + move.b -MIN_MATCH(Match),Scan_Test + swap Scan_Test + move.b -MIN_MATCH+1(Match),Scan_Test + cmp.l Scan_Test,Scan_Start + bne.s bshort_loop + move.w #(MAX_MATCH-3),Scan_Test + lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop + +bscan_loop: + cmp.b (Match)+,(Scan)+ + dbne Scan_Test,bscan_loop + subq #1,Scan + + sub.l Scan_Ini,Scan ; assert difference is 16 bits + cmp.w Best_Len,Scan + bls.s bshort_loop + MOVINT Scan,Best_Len + move.w Cur_Match,match_start + cmp.w (nice_match,pc),Best_Len + blo.s blong_loop + IFDEF CPUTEST + bra return + ENDC + + ENDC ; !CPU020 + + IFNDEF CPU000 +;;; MACHINE MC68020 + +; for 68020 or higher, use word operations even on odd addresses: +WORD_match: + move.w (Scan_Ini),Scan_Start + move.w -1(Scan_Ini,Best_Len.w),Scan_End + bra.s wdo_scan + +wlong_loop: + move.w -1(Scan_Ini,Best_Len.w),Scan_End + +wshort_loop: + and.w #WMASK,Cur_Match + move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode + cmp.w Limit,Cur_Match + dbls Chain_Length,wdo_scan + bra.s return + +wdo_scan: + move.l Match_Ini,Match + add.l Cur_Match,Match + cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End + bne.s wshort_loop + cmp.w -MIN_MATCH(Match),Scan_Start + bne.s wshort_loop + IFDEF QUADLONG +; By some measurements, this version of the code is a little tiny bit faster. +; But on some files it's slower. It probably pays off only when there are +; long match strings, and costs in the most common case of three-byte matches. + moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15 + lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop + +wscan_loop: + cmp.l (Match)+,(Scan)+ ; test four bytes at a time + bne.s odd + cmp.l (Match)+,(Scan)+ + bne.s odd + cmp.l (Match)+,(Scan)+ + bne.s odd + cmp.l (Match)+,(Scan)+ + dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop +odd: + subq #4,Scan + subq #4,Match + cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword + bne.s even + cmp.b (Match)+,(Scan)+ + bne.s even + cmp.b (Match)+,(Scan)+ + beq.s steven +even: subq #1,Scan + ELSE ; !QUADLONG + moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127 + lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop + +wscan_loop: + cmp.w (Match)+,(Scan)+ + dbne Scan_Test,wscan_loop + subq #2,Scan + move.b -2(Match),Scan_Test + cmp.b (Scan),Scan_Test + bne.s steven + addq #1,Scan + ENDC ; ?QUADLONG +steven: + sub.l Scan_Ini,Scan ; assert: difference is 16 bits + cmp.w Best_Len,Scan + bls.s wshort_loop + MOVINT Scan,Best_Len + move.w Cur_Match,match_start + cmp.w (nice_match,pc),Best_Len + blo.s wlong_loop + +;;; MACHINE MC68000 + ENDC ; !CPU000 + +return: + MOVINT Best_Len,d0 ; return value (upper half should be clear) + movem.l (sp)+,SAVEREGS + rts + + +; ============================================================================= +; This is the deflate() function itself, our main entry point. It calls +; longest_match, above, and some outside functions. It is a hot spot, but not +; as hot as longest_match. It uses no special '020 code. + +; ================== Several macros used in deflate() and later functions: + +; Arg 1 is D-reg that new ins_h value is to be left in, +; arg 2 is the byte value to be hashed into it, which must not be the same reg +UP_HASH MACRO _1,_2 + move.w (ins_h,pc),_1 + asl.w #H_SHIFT,_1 + eor.b _2,_1 + and.w #HASH_MASK,_1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK + move.w _1,ins_h ; ins_h = that + ENDM + +; Arg 1 is scratch A, arg 2 is scratch D +IN_STR MACRO _1,_2 + move.l Strst,_2 + addq.w #MIN_MATCH-1,_2 + move.b (Window,_2.l),_2 ; window[strstart + MIN_MATCH - 1] + UP_HASH Head,_2 + add.l Head,Head ; assert upper word is zero before add + BASEPTR _head,_1 + add.l Head,_1 + move.w (_1),Head ; hash_head = head[ins_h] + move.w Strst,(_1) ; head[ins_h] = strstart + move.l Strst,_2 + IFNE WSIZE-32768 + and.w #WMASK,_2 + ENDC + add.w _2,_2 ; masks implicitly when WSIZE == 32768 + move.w Head,(Prev,_2.l) ; prev[str_start & WMASK] = hash_head + ENDM + +; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1 +FLUSH_B MACRO _1 + local nenu,nun + movem.l d2/a2,-(sp) + IF _1==0 + CLRINT -(sp) + ELSEIF (INTSIZE==4).and.(_1<$8000) + pea (_1).w + ELSE + MOVINT #_1,-(sp) + ENDC + move.l (_block_start,pc),d0 + blt.s nenu + move.l Window,a0 + add.l d0,a0 + bra.s nun +nenu: sub.l a0,a0 ; if block_start < 0, push NULL +nun: sub.l Strst,d0 + neg.l d0 + move.l d0,-(sp) + move.l a0,-(sp) + jsr _flush_block + lea 8+INTSIZE(sp),sp + movem.l (sp)+,d2/a2 + ENDM + +; This expands to nothing unless DEBUG is defined. +; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int +TRACE_C MACRO _1 + local qui + IFDEF DEBUG + cmp.w #1,_verbose+INTSIZE-2 ; test lower word only + ble.s qui + moveq #0,d0 + move.b ea,d0 + movem.l d2/a2,-(sp) + move.l _stderr,-(sp) + MOVINT d0,-(sp) + jsr _fputc + addq.l #4+INTSIZE,sp + movem.l (sp)+,d2/a2 +qui: + ENDC ; DEBUG + ENDM + +; ================== Here are the register vars we use, and deflate() itself: + +Window reg a2 ; cached address of window[] +Prev reg a3 ; cached address of prev[] +Strst reg d7 ; strstart cached as a longword +Look reg d6 ; lookahead cached as short +Head reg d5 ; local variable hash_head, short +PrevL reg d4 ; prev_length cached as short +MatchL reg d3 ; local variable match_length, unsigned short +Avail reg d2 ; local variable available_match, bool +PrevM reg a5 ; local variable prev_match, int in an A-reg + +DEFREGS reg d3-d7/a3/a5 + + +_deflate: ; first, setup steps common to deflate and deflate_fast: + movem.l DEFREGS,-(sp) + IFDEF INT16 + moveq #0,Strst ; make sure strstart is valid as a long + ENDC + moveq #0,Head ; ditto for hash_head + MOVINT (_strstart,pc),Strst + move.w (lookahead,pc),Look + move.w (prev_length,pc),PrevL + BASEPTR _window,Window + BASEPTR _prev,Prev + MOVINT _level,d0 + cmp.w #3,d0 + ble deflate_fast + moveq #MIN_MATCH-1,MatchL + moveq #0,Avail + +look_loop: + tst.w Look + beq last_tally + IN_STR a0,d0 + move.w MatchL,PrevL + move.w (match_start,pc),PrevM + move.w #MIN_MATCH-1,MatchL + + tst.w Head + beq.s no_new_match + cmp.w (max_lazy_match,pc),PrevL + bhs.s no_new_match + move.w Strst,d0 + sub.w Head,d0 + cmp.w #MAX_DIST,d0 + bhi.s no_new_match + move.w PrevL,prev_length ; longest_match reads these variables + MOVINT Strst,_strstart + MOVINT Head,d0 ; parm for longest_match + bsr longest_match ; sets match_start + cmp.w Look,d0 ; does length exceed valid data? + bls.s stml + move.w Look,d0 +stml: move.w d0,MatchL ; valid length of match + cmp.w #MIN_MATCH,MatchL ; is the match only three bytes? + bne.s no_new_match + move.w (match_start,pc),d0 + sub.w Strst,d0 + cmp.w #-TOO_FAR,d0 + bge.s no_new_match + moveq #MIN_MATCH-1,MatchL ; mark the current match as no good + +no_new_match: + cmp.w #MIN_MATCH,PrevL + blo literal + cmp.w MatchL,PrevL + blo literal + ; CHECK_MATCH Strst-1,PrevM,PrevL + MOVINT Strst,_strstart ; ct_tally reads this variable + move.l PrevL,d0 + subq.w #MIN_MATCH,d0 + movem.l d2/a2,-(sp) + MOVINT d0,-(sp) + move.l Strst,d0 + sub.w PrevM,d0 + subq.w #1,d0 + MOVINT d0,-(sp) + jsr _ct_tally ; sets d0 true if we have to flush + addq #2*INTSIZE,sp + movem.l (sp)+,d2/a2 + subq.w #3,PrevL ; convert for dbra (prev_length - 2) + sub.w PrevL,Look + subq.w #2,Look +insertmatch: + addq.w #1,Strst + IN_STR a0,d1 ; don't clobber d0 + dbra PrevL,insertmatch + moveq #0,Avail + moveq #0,PrevL ; not needed? + moveq #MIN_MATCH-1,MatchL + addq.w #1,Strst + tst.w d0 + beq refill + FLUSH_B 0 + move.l Strst,_block_start + bra.s refill + +literal: + tst.w Avail + bne.s yeslit + moveq #1,Avail + bra.s skipliteral +yeslit: TRACE_C <-1(Window,Strst.l)> + MOVINT Strst,_strstart ; ct_tally reads this variable + moveq #0,d0 + move.b -1(Window,Strst.l),d0 + movem.l d2/a2,-(sp) + MOVINT d0,-(sp) + CLRINT -(sp) + jsr _ct_tally + addq #2*INTSIZE,sp + movem.l (sp)+,d2/a2 + tst.w d0 + beq.s skipliteral + FLUSH_B 0 + move.l Strst,_block_start +skipliteral: + addq.w #1,Strst + subq.w #1,Look + +refill: + cmp.w #MIN_LOOKAHEAD,Look + bhs look_loop + bsr fill_window + bra look_loop + +last_tally: + tst.w Avail + beq last_flush + MOVINT Strst,_strstart ; ct_tally reads this variable + moveq #0,d0 + move.b -1(Window,Strst.l),d0 + movem.l d2/a2,-(sp) + MOVINT d0,-(sp) + CLRINT -(sp) + jsr _ct_tally + addq #2*INTSIZE,sp + movem.l (sp)+,d2/a2 +last_flush: + FLUSH_B 1 + bra deflate_exit + +; ================== This is another version used for low compression levels: + +deflate_fast: + moveq #0,MatchL + moveq #MIN_MATCH-1,PrevL +flook_loop: + tst.w Look + beq flast_flush + + IN_STR a0,d0 + tst.w Head + beq.s fno_new_match + move.w Strst,d0 + sub.w Head,d0 + cmp.w #MAX_DIST,d0 + bhi.s fno_new_match + move.w PrevL,prev_length ; longest_match reads these variables + MOVINT Strst,_strstart + MOVINT Head,d0 ; parm for longest_match + bsr longest_match ; sets match_start + cmp.w Look,d0 ; does length exceed valid data? + bls.s fstml + move.w Look,d0 +fstml: move.w d0,MatchL ; valid length of match + +fno_new_match: + cmp.w #MIN_MATCH,MatchL + blo fliteral + ; CHECK_MATCH Strst,match_start,MatchL + MOVINT Strst,_strstart ; ct_tally reads this variable + move.l MatchL,d0 + subq.w #MIN_MATCH,d0 + movem.l d2/a2,-(sp) + MOVINT d0,-(sp) + move.l Strst,d0 + sub.w (match_start,pc),d0 + MOVINT d0,-(sp) + jsr _ct_tally ; sets d0 true if we have to flush + addq #2*INTSIZE,sp + movem.l (sp)+,d2/a2 + sub.w MatchL,Look + cmp.w (max_lazy_match,pc),MatchL + bhi ftoolong + subq.w #2,MatchL +finsertmatch: + addq.w #1,Strst + IN_STR a0,d1 ; preserve d0 + dbra MatchL,finsertmatch + moveq #0,MatchL ; not needed? + addq.w #1,Strst + bra.s flushfill + +ftoolong: + add.w MatchL,Strst + moveq #0,MatchL + moveq #0,d1 ; preserve d0 + move.b (Window,Strst.l),d1 + move.w d1,ins_h +; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH... + move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast + UP_HASH d1,Avail ; preserve d0 + IFNE MIN_MATCH-3 + FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg? + ENDC + bra.s flushfill + +fliteral: + TRACE_C <(Window,Strst.l)> + MOVINT Strst,_strstart ; ct_tally reads this variable + moveq #0,d0 + move.b (Window,Strst.l),d0 + movem.l d2/a2,-(sp) + MOVINT d0,-(sp) + CLRINT -(sp) + jsr _ct_tally ; d0 set if we need to flush + addq #2*INTSIZE,sp + movem.l (sp)+,d2/a2 + addq.w #1,Strst + subq.w #1,Look + +flushfill: + tst.w d0 + beq.s frefill + FLUSH_B 0 + move.l Strst,_block_start +frefill: + cmp.w #MIN_LOOKAHEAD,Look + bhs flook_loop + bsr fill_window + bra flook_loop + +flast_flush: + FLUSH_B 1 ; sets our return value + +deflate_exit: + MOVINT Strst,_strstart ; save back cached values + move.w PrevL,prev_length + move.w Look,lookahead + movem.l (sp)+,DEFREGS + rts + + +; ========================================================================= +; void fill_window(void) calls the input function to refill the sliding +; window that we use to find substring matches in. + +More reg Head ; local variable in fill_window +WindTop reg Prev ; local variable used for sliding +SlidIx reg PrevL ; local variable used for sliding + +FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst +; all registers available to be clobbered by the sliding operation: +; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7. +SPAREGS reg d0-d3/a0-a1/a5-a6 +SPCOUNT equ 8 ; number of registers in SPAREGS + + +_fill_window: ; C-callable entry point + movem.l Look/Strst,-(sp) + IFDEF INT16 + moveq #0,Strst ; Strst must be valid as a long + ENDC + MOVINT (_strstart,pc),Strst + move.w (lookahead,pc),Look + BASEPTR _window,Window + bsr.s fill_window + MOVINT Strst,_strstart + move.w Look,lookahead + movem.l (sp)+,Look/Strst + rts + +; strstart, lookahead, and window must be cached in Strst, Look, and Window: +fill_window: ; asm-callable entry point + movem.l FWREGS,-(sp) + move.w (eofile,pc),d0 ; we put this up here for speed + bne fwdone + and.l #$FFFF,Look ; make sure Look is valid as long +fw_refill: + move.l (_window_size,pc),More ; <= 64K + sub.l Look,More + sub.l Strst,More ; Strst is already valid as long + cmp.w #EOF,More + bne.s notboundary + subq.w #1,More + bra checkend + +notboundary: + move.w (sliding,pc),d0 + beq checkend + cmp.w #WSIZE+MAX_DIST,Strst + blo checkend + IF (32768-WSIZE)>0 + lea WSIZE(Window),WindTop ; WindTop is aligned when Window is + ELSE + move.l Window,WindTop + add.l #WSIZE,WindTop + ENDC + move.l Window,d0 + and.w #3,d0 + beq.s isaligned + subq.w #1,d0 +align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary + dbra d0,align +isaligned: +; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop: + move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx +slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time! + movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter. + lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest + dbra SlidIx,slide + BASEPTR _window,Window ; restore cached value + sub.w #WSIZE,match_start + sub.w #WSIZE,Strst + sub.l #WSIZE,_block_start + add.w #WSIZE,More + BASEPTR _head,a0 + move.w #HASH_SIZE-1,d0 +fixhead: + move.w (a0),d1 + sub.w #WSIZE,d1 + bpl.s headok + moveq #0,d1 +headok: move.w d1,(a0)+ + dbra d0,fixhead + BASEPTR _prev,a0 + move.w #WSIZE-1,d0 +fixprev: + move.w (a0),d1 + sub.w #WSIZE,d1 + bpl.s prevok + moveq #0,d1 +prevok: move.w d1,(a0)+ + dbra d0,fixprev + TRACE_C #'.' + + move _verbose+INTSIZE-2,d0 + beq checkend + movem.l d2/a2,-(sp) + xref _print_period + jsr _print_period + movem.l (sp)+,d2/a2 + +checkend: ; assert eofile is false + movem.l d2/a2,-(sp) + MOVINT More,-(sp) ; assert More's upper word is zero + move.l Strst,d0 + add.w Look,d0 + add.l Window,d0 + move.l d0,-(sp) + move.l _read_buf,a0 + jsr (a0) ; refill the upper part of the window + addq #4+INTSIZE,sp + movem.l (sp)+,d2/a2 + tst.w d0 + beq.s iseof + cmp.w #EOF,d0 + beq.s iseof + add.w d0,Look + cmp.w #MIN_LOOKAHEAD,Look + blo fw_refill ; eofile is still false + + bra.s fwdone +iseof: move.w #1,eofile +fwdone: movem.l (sp)+,FWREGS + rts + + +; ========================================================================= +; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version. + +;;; xdef _lm_free ; the entry point + +_lm_free: + IFDEF DYN_ALLOC + move.l _window,d0 + beq.s lf_no_window + movem.l d2/a2,-(sp) + move.l d0,-(sp) + jsr _free + addq #4,sp + movem.l (sp)+,d2/a2 + clr.l _window +lf_no_window: + move.l _prev,d0 + beq.s lf_no_prev + movem.l d2/a2,-(sp) + move.l d0,-(sp) + jsr _free + move.l _head,(sp) ; reuse the same stack arg slot + jsr _free + addq #4,sp + movem.l (sp)+,d2/a2 + clr.l _prev + clr.l _head +lf_no_prev: + ENDC + rts + +; ============================================================================ +; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays +; if any, and initializes all variables so that deflate() is ready to go. + +;;; xdef _lm_init ; the entry point + +Level reg d2 +;Window reg a2 ; as in deflate() + +_lm_init: + MOVINT 4(sp),d0 + move.l 4+INTSIZE(sp),a0 + move.w d0,Level + cmp.w #1,Level + blt.s levelerr + bgt.s try9 + bset.b #B_FAST,1(a0) +try9: cmp.w #9,Level + bgt.s levelerr + blt.s levelok + bset.b #B_SLOW,1(a0) + bra.s levelok +levelerr: + pea (level_message,pc) + jsr _error ; never returns +levelok: + clr.w sliding + move.l (_window_size,pc),d0 + bne.s gotawindowsize + move.w #1,sliding + move.l #2*WSIZE,_window_size +gotawindowsize: + + BASEPTR _window,Window + IFDEF DYN_ALLOC + move.l Window,d0 ; fake tst.l + bne.s gotsomewind + CAL_SH WSIZE + move.l d0,Window + move.l d0,_window + bne.s gotsomewind + pea (window_message,pc) + bra error +gotsomewind: + tst.l _prev + bne.s gotsomehead + CAL_SH WSIZE + move.l d0,_prev + beq.s nohead + CAL_SH HASH_SIZE + move.l d0,_head + bne.s gotfreshhead ; newly calloc'd memory is zeroed +nohead: pea (hash_message,pc) +error: MOVINT #ZE_MEM,-(sp) + jsr _ziperr ; never returns +gotsomehead: + ENDC ; DYN_ALLOC + + move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop + BASEPTR _head,a0 +wipeh: clr.l (a0)+ + dbra d0,wipeh +gotfreshhead: + move.l Level,d0 + IFEQ Sizeof_config-8 + asl.l #3,d0 + ELSE + mulu #Sizeof_config,d0 + ENDC + lea (config_table,pc),a0 + add.l d0,a0 + move.w Max_lazy(a0),max_lazy_match + move.w Good_length(a0),good_match + move.w Nice_length(a0),nice_match + move.w Max_chain(a0),max_chain_len + CLRINT _strstart + clr.l _block_start + bsr match_init + + clr.w eofile + movem.l d2/a2,-(sp) + MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short + move.l Window,-(sp) ; even when int size is long, as if deflate.c + move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined. + jsr (a0) + addq #4+INTSIZE,sp + movem.l (sp)+,d2/a2 + move.w d0,lookahead + beq.s noread + cmp.w #EOF,d0 + bne.s irefill +noread: move.w #1,eofile + clr.w lookahead + bra.s init_done + +irefill: + move.w (lookahead,pc),d0 + cmp.w #MIN_LOOKAHEAD,d0 + bhs.s hashify + bsr _fill_window ; use the C-callable version +hashify: + clr.w ins_h + moveq #MIN_MATCH-2,d0 +hash1: move.b (Window)+,d1 + UP_HASH Level,d1 + dbra d0,hash1 + +init_done: + rts + +; strings for error messages: + IFDEF DYN_ALLOC +hash_message dc.b 'hash table allocation',0 +window_message dc.b 'window allocation',0 + ENDC +level_message dc.b 'bad pack level',0 + + end |