summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_chromamc.asm
diff options
context:
space:
mode:
authorHenrik Gramner <hengar-6@student.ltu.se>2012-04-04 20:03:15 +0000
committerJustin Ruggles <justin.ruggles@gmail.com>2012-04-11 15:47:00 -0400
commit729f90e26802057f06905ab15a34612168eeac80 (patch)
tree41f8c4cedf10851b5b437aeeb558ce3d0f8db1dc /libavcodec/x86/h264_chromamc.asm
parente1ce756844e684876318570dcebc74bc66c084f0 (diff)
downloadffmpeg-729f90e26802057f06905ab15a34612168eeac80.tar.gz
x86inc improvements for 64-bit
Add support for all x86-64 registers Prefer caller-saved register over callee-saved on WIN64 Support up to 15 function arguments Also (by Ronald S. Bultje) Fix up our asm to work with new x86inc.asm. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
Diffstat (limited to 'libavcodec/x86/h264_chromamc.asm')
-rw-r--r--libavcodec/x86/h264_chromamc.asm48
1 files changed, 30 insertions, 18 deletions
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index 8b621fa8bb..64a4efe057 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -91,9 +91,22 @@ SECTION .text
%endmacro
%macro chroma_mc8_mmx_func 3
+%ifidn %2, rv40
+%ifdef PIC
+%define rnd_1d_rv40 r8
+%define rnd_2d_rv40 r8
+%define extra_regs 2
+%else ; no-PIC
+%define rnd_1d_rv40 rnd_rv40_1d_tbl
+%define rnd_2d_rv40 rnd_rv40_2d_tbl
+%define extra_regs 1
+%endif ; PIC
+%else
+%define extra_regs 0
+%endif ; rv40
; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my)
-cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
+cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
@@ -106,19 +119,12 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
.at_least_one_non_zero
%ifidn %2, rv40
-%ifdef PIC
-%define rnd_1d_rv40 r11
-%define rnd_2d_rv40 r11
-%else ; no-PIC
-%define rnd_1d_rv40 rnd_rv40_1d_tbl
-%define rnd_2d_rv40 rnd_rv40_2d_tbl
-%endif
%if ARCH_X86_64
- mov r10, r5
- and r10, 6 ; &~1 for mx/my=[0,7]
- lea r10, [r10*4+r4]
- sar r10d, 1
-%define rnd_bias r10
+ mov r7, r5
+ and r7, 6 ; &~1 for mx/my=[0,7]
+ lea r7, [r7*4+r4]
+ sar r7d, 1
+%define rnd_bias r7
%define dest_reg r0
%else ; x86-32
mov r0, r5
@@ -145,7 +151,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%ifidn %2, rv40
%ifdef PIC
- lea r11, [rnd_rv40_1d_tbl]
+ lea r8, [rnd_rv40_1d_tbl]
%endif
%if ARCH_X86_64 == 0
mov r5, r0m
@@ -196,7 +202,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
movd m6, r5d ; y
%ifidn %2, rv40
%ifdef PIC
- lea r11, [rnd_rv40_2d_tbl]
+ lea r8, [rnd_rv40_2d_tbl]
%endif
%if ARCH_X86_64 == 0
mov r5, r0m
@@ -278,7 +284,13 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%endmacro
%macro chroma_mc4_mmx_func 3
-cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
+%define extra_regs 0
+%ifidn %2, rv40
+%ifdef PIC
+%define extra_regs 1
+%endif ; PIC
+%endif ; rv40
+cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
%if ARCH_X86_64
movsxd r2, r2d
%endif
@@ -296,8 +308,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
%ifidn %2, rv40
%ifdef PIC
- lea r11, [rnd_rv40_2d_tbl]
-%define rnd_2d_rv40 r11
+ lea r6, [rnd_rv40_2d_tbl]
+%define rnd_2d_rv40 r6
%else
%define rnd_2d_rv40 rnd_rv40_2d_tbl
%endif