summaryrefslogtreecommitdiff
path: root/amiga/deflate.a
diff options
context:
space:
mode:
Diffstat (limited to 'amiga/deflate.a')
-rw-r--r--amiga/deflate.a1053
1 files changed, 1053 insertions, 0 deletions
diff --git a/amiga/deflate.a b/amiga/deflate.a
new file mode 100644
index 0000000..b21adae
--- /dev/null
+++ b/amiga/deflate.a
@@ -0,0 +1,1053 @@
+;===========================================================================
+; Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
+;
+; See the accompanying file LICENSE, version 1999-Oct-05 or later
+; (the contents of which are also included in zip.h) for terms of use.
+; If, for some reason, both of these files are missing, the Info-ZIP license
+; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html
+;===========================================================================
+; This is a 680x0 assembly language translation of the Info-ZIP source file
+; deflate.c, by Paul Kienitz. The function longest_match is based in part
+; on match.a by Carsten Steger, which in turn is partly based on match.s
+; for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however, this
+; material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
+; This code is not commented very much; see deflate.c for comments that explain
+; what the functions are doing.
+;
+; The symbols that can be used to select different versions are as follows:
+;
+; CPU020 if defined, use 68020 instructions always.
+;
+; CPUTEST if defined, check at runtime for CPU type. Another symbol
+; specifying the platform-specific test must be used with this.
+; If neither of these is defined, use 68000 instructions only.
+; Runtime test is nonportable; it is different for each OS.
+;
+; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also
+; tells it that registers d0/a0/d1/a1 are not preserved by
+; function calls. At present, if AMIGA is not defined, it
+; causes functions to preserve all registers. ALL OF THIS CODE
+; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
+; BY ANY FUNCTIONS THAT IT CALLS.
+;
+; DYN_ALLOC should be defined here if it is defined for C source; tells us
+; that big arrays are allocated instead of static.
+;
+; WSIZE must be defined as the same number used for WSIZE in the C
+; source, and must be a power of two <= 32768. As elsewhere,
+; the default value is 32768.
+;
+; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed.
+;
+; SMALL_MEM define this if it is defined in the C source; otherwise it uses
+; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported.
+; The FULL_SEARCH option in deflate.c is also not supported.
+;
+; DEBUG activates some tracing output, as in the C source.
+;
+; QUADLONG this selects a different version of the innermost longest_match
+; loop code for 68020 operations, comparing bytes four at a time
+; instead of two at a time. It seems to be a tiny bit faster on
+; average, but it's slower often enough that one can't generalize.
+;
+; This code currently assumes that function results are returned in D0 for
+; all platforms. It assumes that args to functions are pushed onto the stack,
+; last arg first. It also currently assumes that all C symbols have an
+; underscore prepended when referenced from assembly.
+
+ IFND CPU020
+ IFND CPUTEST
+CPU000 equ 1
+ ENDC
+ ENDC
+
+; Use these macros for accessing variables of type int:
+ IFD INT16
+MOVINT MACRO
+ move.w \1,\2
+ ENDM
+CLRINT MACRO
+ clr.w \1
+ ENDM
+INTSIZE equ 2
+ ELSE ; !INT16
+MOVINT MACRO
+ move.l \1,\2
+ ENDM
+CLRINT MACRO
+ clr.l \1
+ ENDM
+INTSIZE equ 4
+ ENDC
+
+ IFD DYN_ALLOC
+BASEPTR MACRO
+ move.l \1,\2
+ ENDM
+ ELSE
+BASEPTR MACRO
+ lea \1,\2
+ ENDM
+ ENDC
+
+; constants we use, many of them adjustable:
+
+MAX_MATCH equ 258
+MIN_MATCH equ 3
+TOO_FAR equ 4096
+ IFND WSIZE
+WSIZE equ 32768
+ ENDC
+WMASK equ WSIZE-1
+MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1
+MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1
+; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits
+;HASH_BITS equ 15
+; ELSE
+ IFD SMALL_MEM
+HASH_BITS equ 13
+ ELSE
+HASH_BITS equ 14 ; default -- MEDIUM_MEM
+ ENDC
+; ENDC ; BIG_MEM
+HASH_SIZE equ 1<<HASH_BITS
+HASH_MASK equ HASH_SIZE-1
+H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
+B_SLOW equ 1
+B_FAST equ 2
+ZE_MEM equ 4
+EOF equ -1
+
+; struct config is defined by these offsets:
+Good_length equ 0
+Max_lazy equ 2
+Nice_length equ 4
+Max_chain equ 6
+Sizeof_config equ 8
+
+
+; external functions we call:
+ xref _ct_tally ; int ct_tally(int, int)
+ xref _flush_block ; unsigned long F(char *, unsigned long, int)
+ xref _ziperr ; void ziperr(int, char *)
+ xref _error ; void error(char *)
+ xref _calloc ; stdlib function: void *calloc(size_t, size_t)
+ xref _free ; stdlib function: void free(void *)
+ IFD DEBUG
+ xref _fputc ; stdio function: int fputc(int, FILE *)
+ xref _stderr ; pointer to FILE, which we pass to fputc
+ ENDC
+
+; our entry points:
+ xdef _lm_init ; void lm_init(int level, unsigned short *flags)
+ xdef _lm_free ; void lm_free(void)
+ xdef _deflate ; void deflate(void) ...the big one
+ xdef _fill_window ; this line is just for debugging
+
+
+; ============================================================================
+; Here is where we have our global variables.
+
+ section deflatevars,data
+
+; external global variables we reference:
+ xref _verbose ; signed int
+ xref _level ; signed int
+ xref _read_buf ; int (*read_buf)(char *, unsigned int)
+
+; global variables we make available:
+
+ xdef _window
+ xdef _prev
+ xdef _head
+ xdef _window_size
+ xdef _block_start
+ xdef _strstart
+
+ IFD DYN_ALLOC
+_prev: ds.l 1 ; pointer to calloc()'d unsigned short array
+_head: ds.l 1 ; pointer to calloc()'d unsigned short array
+_window: ds.l 1 ; pointer to calloc()'d unsigned char array
+ ELSE ; !DYN_ALLOC
+_prev: ds.w WSIZE ; array of unsigned short
+_head: ds.w HASH_SIZE ; array of unsigned short
+_window: ds.b 2*WSIZE ; array of unsigned char
+ ENDC ; ?DYN_ALLOC
+_window_size: ds.l 1 ; unsigned long
+_block_start: ds.l 1 ; unsigned long
+_strstart: ds.w INTSIZE/2 ; unsigned int
+
+; Now here are our private variables:
+
+ IFD CPUTEST
+is020: ds.w 1 ; bool: CPU type is '020 or higher
+ ENDC
+ins_h: ds.w 1 ; unsigned short
+sliding: ds.w 1 ; bool: the file is read a piece at a time
+eofile: ds.w 1 ; bool: we have read in the end of the file
+max_lazy_match: ds.w 1 ; unsigned short
+lookahead: ds.w 1 ; unsigned short
+
+; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
+max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c)
+prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c)
+good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c)
+nice_match: ds.w 1 ; unsigned short (signed int in deflate.c)
+match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c)
+
+; This array of struct config is a constant and could be in the code section:
+config_table: dc.w 0,0,0,0 ; level 0: store uncompressed
+ dc.w 4,4,8,4 ; level 1: fastest, loosest compression
+ dc.w 4,5,16,8 ; level 2
+ dc.w 4,6,32,32 ; level 3: highest to use deflate_fast
+ dc.w 4,4,16,16 ; level 4: lowest to use lazy matches
+ dc.w 8,16,32,32 ; level 5
+ dc.w 8,16,128,128 ; level 6: the default level
+ dc.w 8,32,128,256 ; level 7
+ dc.w 32,128,258,1024 ; level 8
+ dc.w 32,258,258,4096 ; level 9: maximum compression, slow
+
+
+;;CAL_SH MACRO ; macro for calling zcalloc()
+;; IFD INT16
+;; move.w #2,-(sp)
+;; move.w #\1,-(sp)
+;; jsr _zcalloc
+;; addq #4,sp
+;; ELSE
+;; pea 2
+;; pea \1
+;; jsr _zcalloc
+;; addq #8,sp
+;; ENDC
+;; ENDM
+
+CAL_SH MACRO ; Okay, we're back to using regular calloc()...
+ pea 2
+ pea \1
+ jsr _calloc
+ addq #8,sp
+ ENDM
+
+; ============================================================================
+; And here we begin our functions. match_init is for internal use only:
+
+ section deflate,code
+
+match_init:
+ IFD CPUTEST ; now check for platform type
+ IFD AMIGA ; Amiga specific test for '020 CPU:
+ xref _SysBase
+ NOLIST
+ INCLUDE 'exec/execbase.i'
+ LIST
+
+ clr.w is020 ; default value is 68000
+ move.l _SysBase,a0
+ btst #AFB_68020,AttnFlags+1(a0)
+ beq.s cheap
+ move.w #1,is020
+cheap:
+ ELSE ; !AMIGA
+
+ FAIL Write an '020-detector for your system here!
+; On the Macintosh, I believe GetEnvironment() provides the information.
+
+ ENDC ; AMIGA
+ ENDC ; CPUTEST
+ rts ; match_init consists only of rts if CPUTEST unset
+
+
+; ============================================================================
+; Here is longest_match(), the function that the rest of this was built up
+; from, the hottest hot spot in the program and therefore the most heavily
+; optimized. It has two different versions, one for '020 and higher CPUs, and
+; one for 68000/68010. It can test at runtime which version to use if you
+; create a test function in match_init for your platform. Currently such a
+; test is implemented for the Amiga. It can also be assembled to use '000 or
+; '020 code only.
+
+Cur_Match equr d0 ; unsigned int, kept valid as long
+Best_Len equr d1 ; unsigned int, kept valid as long
+Scan_Start equr d3 ; pair of bytes
+Scan_End equr d4 ; pair of bytes
+Limit equr d5 ; unsigned int
+Chain_Length equr d6 ; unsigned int
+Scan_Test equr d7 ; counter, pair of bytes sometimes
+Scan equr a0 ; pointer to unsigned char
+Match equr a1 ; pointer to unsigned char
+Prev_Address equr a2 ; pointer to unsigned short
+Scan_Ini equr a3 ; pointer to unsigned char
+Match_Ini equr a5 ; pointer to unsigned char
+; Note: "pair of bytes" means the two low order bytes of the register in
+; 68020 code, but means the lowest and third lowest bytes on the 68000.
+ IFD AMIGA
+SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1
+ ELSE ; !AMIGA
+SAVEREGS reg d1/d3-d7/a0-a3/a5 ; protect all except d0 return val
+ ENDC ; ?AMIGA
+; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
+
+
+longest_match:
+ movem.l SAVEREGS,-(sp)
+
+; setup steps common to byte and word versions:
+ IFD INT16
+ and.l #$0000FFFF,Cur_Match ; upper half must be zero!
+; we use an and.l down here for the sake of ATSIGN/REGARGS.
+ moveq #0,Limit ; so adding to Scan_Ini works
+ ENDC
+ move.w max_chain_len,Chain_Length
+ move.w prev_length,Best_Len
+ MOVINT _strstart,Limit
+ BASEPTR _prev,Prev_Address
+ BASEPTR _window,Match_Ini
+ move.l Match_Ini,Scan_Ini
+ addq #MIN_MATCH,Match_Ini ; optimizes inner loop
+ add.l Limit,Scan_Ini
+ sub.w #MAX_DIST,Limit
+ bhi.s limit_ok
+ moveq #0,Limit
+limit_ok:
+ cmp.w good_match,Best_Len
+ blo.s length_ok
+ lsr.w #2,Chain_Length
+length_ok:
+ subq.w #1,Chain_Length
+
+ IFD CPUTEST
+ tst.w is020 ; can we use '020 stuff today?
+ bne WORD_match
+ ENDC
+
+ IFND CPU020
+
+; for 68000 or 68010, use byte operations:
+ moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd
+ moveq #0,Scan_End ; likewise
+ moveq #0,Scan_Test ; likewise
+ move.b (Scan_Ini),Scan_Start
+ swap Scan_Start ; swap is faster than 8 bit shift
+ move.b 1(Scan_Ini),Scan_Start
+ move.b -1(Scan_Ini,Best_Len.w),Scan_End
+ swap Scan_End
+ move.b 0(Scan_Ini,Best_Len.w),Scan_End
+ bra.s bdo_scan
+
+blong_loop:
+ move.b -1(Scan_Ini,Best_Len.w),Scan_End
+ swap Scan_End
+ move.b 0(Scan_Ini,Best_Len.w),Scan_End
+
+bshort_loop:
+ add.w Cur_Match,Cur_Match ; assert value before doubling < 32K
+ IFNE 32768-WSIZE
+ and.w #(WMASK*2),Cur_Match
+ ENDC
+ move.w (Prev_Address,Cur_Match.l),Cur_Match
+ cmp.w Limit,Cur_Match
+ dbls Chain_Length,bdo_scan
+ bra return
+
+bdo_scan:
+ move.l Match_Ini,Match
+ add.l Cur_Match,Match
+ move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
+ swap Scan_Test
+ move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test
+ cmp.l Scan_Test,Scan_End
+ bne.s bshort_loop
+ move.b -MIN_MATCH(Match),Scan_Test
+ swap Scan_Test
+ move.b -MIN_MATCH+1(Match),Scan_Test
+ cmp.l Scan_Test,Scan_Start
+ bne.s bshort_loop
+ move.w #(MAX_MATCH-3),Scan_Test
+ lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
+
+bscan_loop:
+ cmp.b (Match)+,(Scan)+
+ dbne Scan_Test,bscan_loop
+ subq #1,Scan
+
+ sub.l Scan_Ini,Scan ; assert difference is 16 bits
+ cmp.w Best_Len,Scan
+ bls.s bshort_loop
+ MOVINT Scan,Best_Len
+ move.w Cur_Match,match_start
+ cmp.w nice_match,Best_Len
+ blo.s blong_loop
+ IFD CPUTEST
+ bra return
+ ENDC
+
+ ENDC ; !CPU020
+
+ IFND CPU000
+ MACHINE MC68020
+
+; for 68020 or higher, use word operations even on odd addresses:
+WORD_match:
+ move.w (Scan_Ini),Scan_Start
+ move.w -1(Scan_Ini,Best_Len.w),Scan_End
+ bra.s wdo_scan
+
+wlong_loop:
+ move.w -1(Scan_Ini,Best_Len.w),Scan_End
+
+wshort_loop:
+ and.w #WMASK,Cur_Match
+ move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode
+ cmp.w Limit,Cur_Match
+ dbls Chain_Length,wdo_scan
+ bra.s return
+
+wdo_scan:
+ move.l Match_Ini,Match
+ add.l Cur_Match,Match
+ cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End
+ bne.s wshort_loop
+ cmp.w -MIN_MATCH(Match),Scan_Start
+ bne.s wshort_loop
+ IFD QUADLONG
+; By some measurements, this version of the code is a little tiny bit faster.
+; But on some files it's slower. It probably pays off only when there are
+; long match strings, and costs in the most common case of three-byte matches.
+ moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15
+ lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
+
+wscan_loop:
+ cmp.l (Match)+,(Scan)+ ; test four bytes at a time
+ bne.s odd
+ cmp.l (Match)+,(Scan)+
+ bne.s odd
+ cmp.l (Match)+,(Scan)+
+ bne.s odd
+ cmp.l (Match)+,(Scan)+
+ dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop
+odd:
+ subq #4,Scan
+ subq #4,Match
+ cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword
+ bne.s even
+ cmp.b (Match)+,(Scan)+
+ bne.s even
+ cmp.b (Match)+,(Scan)+
+ beq.s steven
+even: subq #1,Scan
+ ELSE ; !QUADLONG
+ moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127
+ lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
+
+wscan_loop:
+ cmp.w (Match)+,(Scan)+
+ dbne Scan_Test,wscan_loop
+ subq #2,Scan
+ move.b -2(Match),Scan_Test
+ cmp.b (Scan),Scan_Test
+ bne.s steven
+ addq #1,Scan
+ ENDC ; ?QUADLONG
+steven:
+ sub.l Scan_Ini,Scan ; assert: difference is 16 bits
+ cmp.w Best_Len,Scan
+ bls.s wshort_loop
+ MOVINT Scan,Best_Len
+ move.w Cur_Match,match_start
+ cmp.w nice_match,Best_Len
+ blo.s wlong_loop
+
+ MACHINE MC68000
+ ENDC ; !CPU000
+
+return:
+ MOVINT Best_Len,d0 ; return value (upper half should be clear)
+ movem.l (sp)+,SAVEREGS
+ rts
+
+
+; =============================================================================
+; This is the deflate() function itself, our main entry point. It calls
+; longest_match, above, and some outside functions. It is a hot spot, but not
+; as hot as longest_match. It uses no special '020 code.
+
+; ================== Several macros used in deflate() and later functions:
+
+; Arg 1 is D-reg that new ins_h value is to be left in,
+; arg 2 is the byte value to be hashed into it, which must not be the same reg
+UP_HASH MACRO
+ move.w ins_h,\1
+ asl.w #H_SHIFT,\1
+ eor.b \2,\1
+ and.w #HASH_MASK,\1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
+ move.w \1,ins_h ; ins_h = that
+ ENDM
+
+; Arg 1 is scratch A, arg 2 is scratch D
+IN_STR MACRO
+ move.l Strst,\2
+ addq.w #MIN_MATCH-1,\2
+ move.b (Window,\2.l),\2 ; window[strstart + MIN_MATCH - 1]
+ UP_HASH Head,\2
+ add.l Head,Head ; assert upper word is zero before add
+ BASEPTR _head,\1
+ add.l Head,\1
+ move.w (\1),Head ; hash_head = head[ins_h]
+ move.w Strst,(\1) ; head[ins_h] = strstart
+ move.l Strst,\2
+ IFNE WSIZE-32768
+ and.w #WMASK,\2
+ ENDC
+ add.w \2,\2 ; masks implicitly when WSIZE == 32768
+ move.w Head,(Prev,\2.l) ; prev[str_start & WMASK] = hash_head
+ ENDM
+
+; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
+FLUSH_B MACRO
+ IFC '\1','#0'
+ CLRINT -(sp)
+ ELSE
+ MOVINT \1,-(sp)
+ ENDC
+ move.l _block_start,d0
+ blt.s nenu\@
+ move.l Window,a0
+ add.l d0,a0
+ bra.s nun\@
+nenu\@: sub.l a0,a0 ; if block_start < 0, push NULL
+nun\@: sub.l Strst,d0
+ neg.l d0
+ move.l d0,-(sp)
+ move.l a0,-(sp)
+ jsr _flush_block
+ lea 8+INTSIZE(sp),sp
+ ENDM
+
+; This expands to nothing unless DEBUG is defined.
+; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
+TRACE_C MACRO
+ IFD DEBUG
+ cmp.w #1,_verbose+INTSIZE-2 ; test lower word only
+ ble.s qui\@
+ IFNC '\1','d0'
+ moveq #0,d0
+ move.b \1,d0
+ ENDC
+ move.l _stderr,-(sp)
+ MOVINT d0,-(sp)
+ jsr _fputc
+ addq #4+INTSIZE,sp
+qui\@:
+ ENDC ; DEBUG
+ ENDM
+
+; ================== Here are the register vars we use, and deflate() itself:
+
+Window equr a2 ; cached address of window[]
+Prev equr a3 ; cached address of prev[]
+Strst equr d7 ; strstart cached as a longword
+Look equr d6 ; lookahead cached as short
+Head equr d5 ; local variable hash_head, short
+PrevL equr d4 ; prev_length cached as short
+MatchL equr d3 ; local variable match_length, unsigned short
+Avail equr d2 ; local variable available_match, bool
+PrevM equr a5 ; local variable prev_match, int in an A-reg
+
+ IFD AMIGA
+DEFREGS reg d2-d7/a2/a3/a5
+ ELSE
+DEFREGS reg d0-d7/a0/a2/a3/a5 ; play it safe, preserve all regs
+ ENDC
+
+
+_deflate: ; first, setup steps common to deflate and deflate_fast:
+ movem.l DEFREGS,-(sp)
+ IFD INT16
+ moveq #0,Strst ; make sure strstart is valid as a long
+ ENDC
+ moveq #0,Head ; ditto for hash_head
+ MOVINT _strstart,Strst
+ move.w lookahead,Look
+ move.w prev_length,PrevL
+ BASEPTR _window,Window
+ BASEPTR _prev,Prev
+ MOVINT _level,d0
+ cmp.w #3,d0
+ ble deflate_fast
+ moveq #MIN_MATCH-1,MatchL
+ moveq #0,Avail
+
+look_loop:
+ tst.w Look
+ beq last_tally
+ IN_STR a0,d0
+ move.w MatchL,PrevL
+ move.w match_start,PrevM
+ move.w #MIN_MATCH-1,MatchL
+
+ tst.w Head
+ beq.s no_new_match
+ cmp.w max_lazy_match,PrevL
+ bhs.s no_new_match
+ move.w Strst,d0
+ sub.w Head,d0
+ cmp.w #MAX_DIST,d0
+ bhi.s no_new_match
+ move.w PrevL,prev_length ; longest_match reads these variables
+ MOVINT Strst,_strstart
+ MOVINT Head,d0 ; parm for longest_match
+ bsr longest_match ; sets match_start
+ cmp.w Look,d0 ; does length exceed valid data?
+ bls.s stml
+ move.w Look,d0
+stml: move.w d0,MatchL ; valid length of match
+ cmp.w #MIN_MATCH,MatchL ; is the match only three bytes?
+ bne.s no_new_match
+ move.w match_start,d0
+ sub.w Strst,d0
+ cmp.w #-TOO_FAR,d0
+ bge.s no_new_match
+ moveq #MIN_MATCH-1,MatchL ; mark the current match as no good
+
+no_new_match:
+ cmp.w #MIN_MATCH,PrevL
+ blo literal
+ cmp.w MatchL,PrevL
+ blo literal
+ ; CHECK_MATCH Strst-1,PrevM,PrevL
+ MOVINT Strst,_strstart ; ct_tally reads this variable
+ move.l PrevL,d0
+ subq.w #MIN_MATCH,d0
+ MOVINT d0,-(sp)
+ move.l Strst,d0
+ sub.w PrevM,d0
+ subq.w #1,d0
+ MOVINT d0,-(sp)
+ jsr _ct_tally ; sets d0 true if we have to flush
+ addq #2*INTSIZE,sp
+ subq.w #3,PrevL ; convert for dbra (prev_length - 2)
+ sub.w PrevL,Look
+ subq.w #2,Look
+insertmatch:
+ addq.w #1,Strst
+ IN_STR a0,d1 ; don't clobber d0
+ dbra PrevL,insertmatch
+ moveq #0,Avail
+ moveq #0,PrevL ; not needed?
+ moveq #MIN_MATCH-1,MatchL
+ addq.w #1,Strst
+ tst.w d0
+ beq refill
+ FLUSH_B #0
+ move.l Strst,_block_start
+ bra.s refill
+
+literal:
+ tst.w Avail
+ bne.s yeslit
+ moveq #1,Avail
+ bra.s skipliteral
+yeslit: TRACE_C <-1(Window,Strst.l)>
+ MOVINT Strst,_strstart ; ct_tally reads this variable
+ moveq #0,d0
+ move.b -1(Window,Strst.l),d0
+ MOVINT d0,-(sp)
+ CLRINT -(sp)
+ jsr _ct_tally
+ addq #2*INTSIZE,sp
+ tst.w d0
+ beq.s skipliteral
+ FLUSH_B #0
+ move.l Strst,_block_start
+skipliteral:
+ addq.w #1,Strst
+ subq.w #1,Look
+
+refill:
+ cmp.w #MIN_LOOKAHEAD,Look
+ bhs look_loop
+ bsr fill_window
+ bra look_loop
+
+last_tally:
+ tst.w Avail
+ beq last_flush
+ MOVINT Strst,_strstart ; ct_tally reads this variable
+ moveq #0,d0
+ move.b -1(Window,Strst.l),d0
+ MOVINT d0,-(sp)
+ CLRINT -(sp)
+ jsr _ct_tally
+ addq #2*INTSIZE,sp
+last_flush:
+ FLUSH_B #1
+ bra deflate_exit
+
+; ================== This is another version used for low compression levels:
+
+deflate_fast:
+ moveq #0,MatchL
+ moveq #MIN_MATCH-1,PrevL
+flook_loop:
+ tst.w Look
+ beq flast_flush
+
+ IN_STR a0,d0
+ tst.w Head
+ beq.s fno_new_match
+ move.w Strst,d0
+ sub.w Head,d0
+ cmp.w #MAX_DIST,d0
+ bhi.s fno_new_match
+ move.w PrevL,prev_length ; longest_match reads these variables
+ MOVINT Strst,_strstart
+ MOVINT Head,d0 ; parm for longest_match
+ bsr longest_match ; sets match_start
+ cmp.w Look,d0 ; does length exceed valid data?
+ bls.s fstml
+ move.w Look,d0
+fstml: move.w d0,MatchL ; valid length of match
+
+fno_new_match:
+ cmp.w #MIN_MATCH,MatchL
+ blo fliteral
+ ; CHECK_MATCH Strst,match_start,MatchL
+ MOVINT Strst,_strstart ; ct_tally reads this variable
+ move.l MatchL,d0
+ subq.w #MIN_MATCH,d0
+ MOVINT d0,-(sp)
+ move.l Strst,d0
+ sub.w match_start,d0
+ MOVINT d0,-(sp)
+ jsr _ct_tally ; sets d0 true if we have to flush
+ addq #2*INTSIZE,sp
+ sub.w MatchL,Look
+ cmp.w max_lazy_match,MatchL
+ bhi ftoolong
+ subq.w #2,MatchL
+finsertmatch:
+ addq.w #1,Strst
+ IN_STR a0,d1 ; preserve d0
+ dbra MatchL,finsertmatch
+ moveq #0,MatchL ; not needed?
+ addq.w #1,Strst
+ bra.s flushfill
+
+ftoolong:
+ add.w MatchL,Strst
+ moveq #0,MatchL
+ moveq #0,d1 ; preserve d0
+ move.b (Window,Strst.l),d1
+ move.w d1,ins_h
+; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
+ move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
+ UP_HASH d1,Avail ; preserve d0
+ IFNE MIN_MATCH-3
+ FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
+ ENDC
+ bra.s flushfill
+
+fliteral:
+ TRACE_C <(Window,Strst.l)>
+ MOVINT Strst,_strstart ; ct_tally reads this variable
+ moveq #0,d0
+ move.b (Window,Strst.l),d0
+ MOVINT d0,-(sp)
+ CLRINT -(sp)
+ jsr _ct_tally ; d0 set if we need to flush
+ addq #2*INTSIZE,sp
+ addq.w #1,Strst
+ subq.w #1,Look
+
+flushfill:
+ tst.w d0
+ beq.s frefill
+ FLUSH_B #0
+ move.l Strst,_block_start
+frefill:
+ cmp.w #MIN_LOOKAHEAD,Look
+ bhs flook_loop
+ bsr fill_window
+ bra flook_loop
+
+flast_flush:
+ FLUSH_B #1 ; sets our return value
+
+deflate_exit:
+ MOVINT Strst,_strstart ; save back cached values
+ move.w PrevL,prev_length
+ move.w Look,lookahead
+ movem.l (sp)+,DEFREGS
+ rts
+
+
+; =========================================================================
+; void fill_window(void) calls the input function to refill the sliding
+; window that we use to find substring matches in.
+
+More equr Head ; local variable in fill_window
+WindTop equr Prev ; local variable used for sliding
+SlidIx equr PrevL ; local variable used for sliding
+
+ IFD AMIGA
+FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst
+ ELSE
+FWREGS reg d1-d5/a0-a6 ; likewise
+ ENDC
+; all registers available to be clobbered by the sliding operation:
+; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
+SPAREGS reg d0-d3/a0-a1/a5-a6
+SPCOUNT equ 8 ; number of registers in SPAREGS
+
+
+_fill_window: ; C-callable entry point
+ movem.l Strst/Look,-(sp)
+ IFD INT16
+ moveq #0,Strst ; Strst must be valid as a long
+ ENDC
+ MOVINT _strstart,Strst
+ move.w lookahead,Look
+ BASEPTR _window,Window
+ bsr.s fill_window
+ MOVINT Strst,_strstart
+ move.w Look,lookahead
+ movem.l (sp)+,Strst/Look
+ rts
+
+; strstart, lookahead, and window must be cached in Strst, Look, and Window:
+fill_window: ; asm-callable entry point
+ movem.l FWREGS,-(sp)
+ tst.w eofile ; we put this up here for speed
+ bne fwdone
+ and.l #$FFFF,Look ; make sure Look is valid as long
+fw_refill:
+ move.l _window_size,More ; <= 64K
+ sub.l Look,More
+ sub.l Strst,More ; Strst is already valid as long
+ cmp.w #EOF,More
+ bne.s notboundary
+ subq.w #1,More
+ bra checkend
+
+notboundary:
+ tst.w sliding
+ beq checkend
+ cmp.w #WSIZE+MAX_DIST,Strst
+ blo checkend
+ IFGT 32768-WSIZE
+ lea WSIZE(Window),WindTop ; WindTop is aligned when Window is
+ ELSE
+ move.l Window,WindTop
+ add.l #WSIZE,WindTop
+ ENDC
+ move.l Window,d0
+ and.w #3,d0
+ beq.s isaligned
+ subq.w #1,d0
+align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary
+ dbra d0,align
+isaligned:
+; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
+ move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx
+slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time!
+ movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter.
+ lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest
+ dbra SlidIx,slide
+ BASEPTR _window,Window ; restore cached value
+ sub.w #WSIZE,match_start
+ sub.w #WSIZE,Strst
+ sub.l #WSIZE,_block_start
+ add.w #WSIZE,More
+ BASEPTR _head,a0
+ move.w #HASH_SIZE-1,d0
+fixhead:
+ move.w (a0),d1
+ sub.w #WSIZE,d1
+ bpl.s headok
+ moveq #0,d1
+headok: move.w d1,(a0)+
+ dbra d0,fixhead
+ BASEPTR _prev,a0
+ move.w #WSIZE-1,d0
+fixprev:
+ move.w (a0),d1
+ sub.w #WSIZE,d1
+ bpl.s prevok
+ moveq #0,d1
+prevok: move.w d1,(a0)+
+ dbra d0,fixprev
+ TRACE_C #'.'
+
+checkend: ; assert eofile is false
+ MOVINT More,-(sp) ; assert More's upper word is zero
+ move.l Strst,d0
+ add.w Look,d0
+ add.l Window,d0
+ move.l d0,-(sp)
+ move.l _read_buf,a0
+ jsr (a0) ; refill the upper part of the window
+ addq #4+INTSIZE,sp
+ tst.w d0
+ beq.s iseof
+ cmp.w #EOF,d0
+ beq.s iseof
+ add.w d0,Look
+ cmp.w #MIN_LOOKAHEAD,Look
+ blo fw_refill ; eofile is still false
+
+ bra.s fwdone
+iseof: move.w #1,eofile
+fwdone: movem.l (sp)+,FWREGS
+ rts
+
+
+; =========================================================================
+; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
+
+ xdef _lm_free ; the entry point
+
+_lm_free:
+ IFD DYN_ALLOC
+ move.l _window,d0
+ beq.s lf_no_window
+ move.l d0,-(sp)
+ jsr _free
+ addq #4,sp
+ clr.l _window
+lf_no_window:
+ move.l _prev,d0
+ beq.s lf_no_prev
+ move.l d0,-(sp)
+ jsr _free
+ move.l _head,(sp) ; reuse the same stack arg slot
+ jsr _free
+ addq #4,sp
+ clr.l _prev
+ clr.l _head
+lf_no_prev:
+ ENDC
+ rts
+
+; ============================================================================
+; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
+; if any, and initializes all variables so that deflate() is ready to go.
+
+ xdef _lm_init ; the entry point
+
+Level equr d2
+;Window equr a2 ; as in deflate()
+ IFD AMIGA
+INIREGS reg d2/a2
+ ELSE
+INIREGS reg d0-d2/a0-a1
+ ENDC
+
+_lm_init:
+ MOVINT 4(sp),d0
+ move.l 4+INTSIZE(sp),a0
+ movem.l INIREGS,-(sp)
+ move.w d0,Level
+ cmp.w #1,Level
+ blt.s levelerr
+ bgt.s try9
+ bset.b #B_FAST,1(a0)
+try9: cmp.w #9,Level
+ bgt.s levelerr
+ blt.s levelok
+ bset.b #B_SLOW,1(a0)
+ bra.s levelok
+levelerr:
+ pea level_message
+ jsr _error ; never returns
+levelok:
+ clr.w sliding
+ tst.l _window_size
+ bne.s gotawindowsize
+ move.w #1,sliding
+ move.l #2*WSIZE,_window_size
+gotawindowsize:
+
+ BASEPTR _window,Window
+ IFD DYN_ALLOC
+ move.l Window,d0 ; fake tst.l
+ bne.s gotsomewind
+ CAL_SH WSIZE
+ move.l d0,Window
+ move.l d0,_window
+ bne.s gotsomewind
+ pea window_message
+ MOVINT #ZE_MEM,-(sp)
+ jsr _ziperr ; never returns
+gotsomewind:
+ tst.l _prev
+ bne.s gotsomehead
+ CAL_SH WSIZE
+ move.l d0,_prev
+ beq.s nohead
+ CAL_SH HASH_SIZE
+ move.l d0,_head
+ bne.s gotfreshhead ; newly calloc'd memory is zeroed
+nohead: pea hash_message
+ MOVINT #ZE_MEM,-(sp)
+ jsr _ziperr ; never returns
+gotsomehead:
+ ENDC ; DYN_ALLOC
+
+ move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop
+ BASEPTR _head,a0
+wipeh: clr.l (a0)+
+ dbra d0,wipeh
+gotfreshhead:
+ move.l Level,d0
+ IFEQ Sizeof_config-8
+ asl.l #3,d0
+ ELSE
+ mulu #Sizeof_config,d0
+ ENDC
+ lea config_table,a0
+ add.l d0,a0
+ move.w Max_lazy(a0),max_lazy_match
+ move.w Good_length(a0),good_match
+ move.w Nice_length(a0),nice_match
+ move.w Max_chain(a0),max_chain_len
+ CLRINT _strstart
+ clr.l _block_start
+ bsr match_init
+
+ clr.w eofile
+ MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short
+ move.l Window,-(sp) ; even when int size is long, as if deflate.c
+ move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined.
+ jsr (a0)
+ addq #4+INTSIZE,sp
+ move.w d0,lookahead
+ beq.s noread
+ cmp.w #EOF,d0
+ bne.s irefill
+noread: move.w #1,eofile
+ clr.w lookahead
+ bra.s init_done
+
+irefill:
+ move.w lookahead,d0
+ cmp.w #MIN_LOOKAHEAD,d0
+ bhs.s hashify
+ bsr _fill_window ; use the C-callable version
+hashify:
+ clr.w ins_h
+ moveq #MIN_MATCH-2,d0
+hash1: move.b (Window)+,d1
+ UP_HASH Level,d1
+ dbra d0,hash1
+
+init_done:
+ movem.l (sp)+,INIREGS
+ rts
+
+; strings for error messages:
+hash_message dc.b 'hash table allocation',0
+window_message dc.b 'window allocation',0
+level_message dc.b 'bad pack level',0
+
+ end