1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_decode_value_neon|
EXPORT |vp8dx_start_decode_neon|
EXPORT |vp8dx_stop_decode_neon|
EXPORT |vp8dx_decode_bool_neon|
ARM
REQUIRE8
PRESERVE8
INCLUDE vpx_asm_offsets.asm
AREA |.text|, CODE, READONLY ; name this block of code
; int z = 0;
; int bit;
; for ( bit=bits-1; bit>=0; bit-- )
; {
; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
; }
; return z;
;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
|vp8_decode_value_neon| PROC
stmdb sp!, {r4 - r6, lr}
mov r4, r0
mov r5, r1
mov r6, #0
subs r5, r5, #1
bmi decode_value_exit
decode_value_loop
mov r1, #0x80
mov r0, r4
bl vp8dx_decode_bool_neon_internal ; needed for conversion to s file
orr r6, r6, r0, lsl r5
subs r5, r5, #1
bpl decode_value_loop
decode_value_exit
mov r0, r6
ldmia sp!, {r4 - r6, pc}
ENDP ; |vp8_decode_value_neon|
;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
|vp8dx_start_decode_neon| PROC
stmdb sp!, {r4 - r5, lr}
mov r2, #0
mov r3, #255
str r2, [r0, #bool_decoder_lowvalue]
str r3, [r0, #bool_decoder_range]
str r1, [r0, #bool_decoder_buffer]
mov r3, #8
mov r2, #4
str r3, [r0, #bool_decoder_count]
str r2, [r0, #bool_decoder_pos]
ldrb r2, [r1, #3]
ldrb r3, [r1, #2]
ldrb r4, [r1, #1]
ldrb r5, [r1]
orr r1, r2, r3, lsl #8
orr r1, r1, r4, lsl #16
orr r1, r1, r5, lsl #24
str r1, [r0, #bool_decoder_value]
ldmia sp!, {r4 - r5, pc}
ENDP ; |vp8dx_start_decode_neon|
;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
|vp8dx_stop_decode_neon| PROC
mov pc, lr
ENDP ; |vp8dx_stop_decode_neon|
; bigsplit RN r1
; buffer_v RN r1
; count_v RN r4
; range_v RN r2
; value_v RN r3
; pos_v RN r5
; split RN r6
; bit RN lr
;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
|vp8dx_decode_bool_neon| PROC
vp8dx_decode_bool_neon_internal
;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
;before memory allocation
stmdb sp!, {r4 - r5, lr}
ldr r2, [r0, #bool_decoder_range] ;load range (r2), value(r3)
ldr r3, [r0, #bool_decoder_value]
;ldrd r2, r3, [r0, #bool_decoder_range] ;ldrd costs 2 cycles
;
mov r4, r2, lsl #8
sub r4, r4, #256
mov r12, #1
smlawb r4, r4, r1, r12 ;split = 1 + (((range-1) * probability) >> 8)
mov lr, r0
mov r0, #0 ;bit = 0
;
subs r5, r3, r4, lsl #24
subhs r2, r2, r4 ;range = br->range-split
movlo r2, r4 ;range = split
movhs r0, #1 ;bit = 1
movhs r3, r5 ;value = value-bigsplit
cmp r2, #0x80
blt range_less_0x80
strd r2, r3, [lr, #bool_decoder_range] ;store result
ldmia sp!, {r4 - r5, pc}
range_less_0x80
ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer
ldr r1, [lr, #bool_decoder_buffer]
clz r12, r2
add r1, r1, r5
sub r12, r12, #24
subs r4, r4, r12 ;count -= shift
mov r2, r2, lsl r12 ;range <<= shift
mov r3, r3, lsl r12 ;value <<= shift
addle r4, r4, #8 ;count += 8
ldrleb r12, [r1], #1 ;br->buffer[br->pos]
rsble r1, r4, #8 ;-count
addle r5, r5, #1 ;br->pos++
orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count)
strd r2, r3, [lr, #bool_decoder_range] ;store result
strd r4, r5, [lr, #bool_decoder_count]
ldmia sp!, {r4 - r5, pc}
ENDP ; |vp8dx_decode_bool_neon|
END
|