diff options
Diffstat (limited to 'vp8/encoder/ppc/encodemb_altivec.asm')
-rw-r--r-- | vp8/encoder/ppc/encodemb_altivec.asm | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm new file mode 100644 index 000000000..e0e976d71 --- /dev/null +++ b/vp8/encoder/ppc/encodemb_altivec.asm @@ -0,0 +1,152 @@ +; +; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + .globl vp8_subtract_mbuv_ppc + .globl vp8_subtract_mby_ppc + +;# r3 short *diff +;# r4 unsigned char *usrc +;# r5 unsigned char *vsrc +;# r6 unsigned char *pred +;# r7 int stride +vp8_subtract_mbuv_ppc: + mfspr r11, 256 ;# get old VRSAVE + oris r12, r11, 0xf000 + mtspr 256, r12 ;# set VRSAVE + + li r9, 256 + add r3, r3, r9 + add r3, r3, r9 + add r6, r6, r9 + + li r10, 16 + li r9, 4 + mtctr r9 + + vspltisw v0, 0 + +mbu_loop: + lvsl v5, 0, r4 ;# permutate value for alignment + lvx v1, 0, r4 ;# src + lvx v2, 0, r6 ;# pred + + add r4, r4, r7 + addi r6, r6, 16 + + vperm v1, v1, v0, v5 + + vmrghb v3, v0, v1 ;# unpack high src to short + vmrghb v4, v0, v2 ;# unpack high pred to short + + lvsl v5, 0, r4 ;# permutate value for alignment + lvx v1, 0, r4 ;# src + + add r4, r4, r7 + + vsubshs v3, v3, v4 + + stvx v3, 0, r3 ;# store out diff + + vperm v1, v1, v0, v5 + + vmrghb v3, v0, v1 ;# unpack high src to short + vmrglb v4, v0, v2 ;# unpack high pred to short + + vsubshs v3, v3, v4 + + stvx v3, r10, r3 ;# store out diff + + addi r3, r3, 32 + + bdnz mbu_loop + + mtctr r9 + +mbv_loop: + lvsl v5, 0, r5 ;# permutate value for alignment + lvx v1, 0, r5 ;# src + lvx v2, 0, r6 ;# pred + + add r5, r5, r7 + addi r6, r6, 16 + + vperm v1, v1, v0, v5 + + vmrghb v3, v0, v1 ;# unpack high src to short + vmrghb v4, v0, v2 ;# unpack high pred to short + + lvsl v5, 0, r5 ;# permutate value for alignment + lvx v1, 0, r5 ;# src + + add r5, r5, r7 + + vsubshs v3, v3, v4 + + stvx v3, 0, r3 ;# store out diff + + vperm v1, v1, v0, v5 + + vmrghb v3, v0, v1 ;# unpack high src to short + vmrglb v4, v0, v2 ;# unpack high pred to short + + vsubshs v3, v3, v4 + + stvx v3, r10, r3 ;# store out diff + + addi r3, r3, 32 + + bdnz mbv_loop + + mtspr 256, r11 ;# reset old VRSAVE + + blr + +;# r3 short *diff +;# r4 unsigned char *src +;# r5 unsigned char *pred +;# r6 int stride +vp8_subtract_mby_ppc: + mfspr r11, 256 ;# get old VRSAVE + oris r12, r11, 0xf800 + mtspr 256, r12 ;# set VRSAVE + + li r10, 16 + mtctr r10 + + vspltisw v0, 0 + +mby_loop: + lvx v1, 0, r4 ;# src + lvx v2, 0, r5 ;# pred + + add r4, r4, r6 + addi r5, r5, 16 + + vmrghb v3, v0, v1 ;# unpack high src to short + vmrghb v4, v0, v2 ;# unpack high pred to short + + vsubshs v3, v3, v4 + + stvx v3, 0, r3 ;# store out diff + + vmrglb v3, v0, v1 ;# unpack low src to short + vmrglb v4, v0, v2 ;# unpack low pred to short + + vsubshs v3, v3, v4 + + stvx v3, r10, r3 ;# store out diff + + addi r3, r3, 32 + + bdnz mby_loop + + mtspr 256, r11 ;# reset old VRSAVE + + blr |