summaryrefslogtreecommitdiff
path: root/vp8/decoder/arm/neon/dboolhuff_neon.asm
blob: 01315a40e6077574631654caad1e27b4b860f3d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
;
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
;  Use of this source code is governed by a BSD-style license 
;  that can be found in the LICENSE file in the root of the source
;  tree. An additional intellectual property rights grant can be found
;  in the file PATENTS.  All contributing project authors may 
;  be found in the AUTHORS file in the root of the source tree.
;


    EXPORT  |vp8_decode_value_neon|
    EXPORT  |vp8dx_start_decode_neon|
    EXPORT  |vp8dx_stop_decode_neon|
    EXPORT  |vp8dx_decode_bool_neon|

    ARM
    REQUIRE8
    PRESERVE8

    INCLUDE vpx_asm_offsets.asm

    AREA    |.text|, CODE, READONLY  ; name this block of code

;   int z = 0;
;   int bit;
;   for ( bit=bits-1; bit>=0; bit-- )
;   {
;       z |= (vp8dx_decode_bool(br, 0x80)<<bit);
;   }
;   return z;

;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
|vp8_decode_value_neon| PROC
    stmdb   sp!, {r4 - r6, lr}
    mov     r4, r0
    mov     r5, r1
    mov     r6, #0

    subs    r5, r5, #1
    bmi     decode_value_exit

decode_value_loop
    mov     r1, #0x80
    mov     r0, r4
    bl      vp8dx_decode_bool_neon_internal       ; needed for conversion to s file
    orr     r6, r6, r0, lsl r5
    subs    r5, r5, #1
    bpl     decode_value_loop

decode_value_exit
    mov     r0, r6
    ldmia   sp!, {r4 - r6, pc}
    ENDP    ; |vp8_decode_value_neon|


;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
|vp8dx_start_decode_neon| PROC
    stmdb   sp!, {r4 - r5, lr}
    mov     r2, #0
    mov     r3, #255

    str     r2, [r0, #bool_decoder_lowvalue]
    str     r3, [r0, #bool_decoder_range]
    str     r1, [r0, #bool_decoder_buffer]

    mov     r3, #8
    mov     r2, #4
    str     r3, [r0, #bool_decoder_count]
    str     r2, [r0, #bool_decoder_pos]

    ldrb    r2, [r1, #3]
    ldrb    r3, [r1, #2]
    ldrb    r4, [r1, #1]
    ldrb    r5, [r1]

    orr     r1, r2, r3, lsl #8
    orr     r1, r1, r4, lsl #16
    orr     r1, r1, r5, lsl #24

    str     r1, [r0, #bool_decoder_value]

    ldmia   sp!, {r4 - r5, pc}
    ENDP    ; |vp8dx_start_decode_neon|


;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
|vp8dx_stop_decode_neon| PROC
    mov     pc, lr
    ENDP    ; |vp8dx_stop_decode_neon|


; bigsplit  RN  r1
; buffer_v  RN  r1
; count_v       RN  r4
; range_v       RN  r2
; value_v       RN  r3
; pos_v     RN  r5
; split     RN  r6
; bit           RN  lr
;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
|vp8dx_decode_bool_neon| PROC
vp8dx_decode_bool_neon_internal
;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
;before memory allocation
    stmdb   sp!, {r4 - r5, lr}

    ldr     r2, [r0, #bool_decoder_range]       ;load range (r2), value(r3)
    ldr     r3, [r0, #bool_decoder_value]
    ;ldrd   r2, r3, [r0, #bool_decoder_range]   ;ldrd costs 2 cycles
    ;

    mov     r4, r2, lsl #8
    sub     r4, r4, #256
    mov     r12, #1

    smlawb  r4, r4, r1, r12         ;split = 1 +  (((range-1) * probability) >> 8)

    mov     lr, r0
    mov     r0, #0                  ;bit = 0
    ;
    subs    r5, r3, r4, lsl #24

    subhs   r2, r2, r4              ;range = br->range-split
    movlo   r2, r4                  ;range = split
    movhs   r0, #1                  ;bit = 1
    movhs   r3, r5                  ;value = value-bigsplit

    cmp     r2, #0x80
    blt     range_less_0x80
    strd    r2, r3, [lr, #bool_decoder_range]   ;store result

    ldmia   sp!, {r4 - r5, pc}

range_less_0x80

    ldrd    r4, r5, [lr, #bool_decoder_count]   ;load count, pos, buffer
    ldr     r1, [lr, #bool_decoder_buffer]

    clz     r12, r2
    add     r1, r1, r5

    sub     r12, r12, #24
    subs    r4, r4, r12             ;count -= shift
    mov     r2, r2, lsl r12         ;range <<= shift
    mov     r3, r3, lsl r12         ;value <<= shift
    addle   r4, r4, #8              ;count += 8
    ldrleb  r12, [r1], #1           ;br->buffer[br->pos]

    rsble   r1, r4, #8              ;-count
    addle   r5, r5, #1              ;br->pos++
    orrle   r3, r3, r12, lsl r1     ;value |= (br->buffer[br->pos]) << (-count)

    strd    r2, r3, [lr, #bool_decoder_range]   ;store result
    strd    r4, r5, [lr, #bool_decoder_count]

    ldmia   sp!, {r4 - r5, pc}
    ENDP    ; |vp8dx_decode_bool_neon|

    END