diff options
author | Henrik Gramner <hengar-6@student.ltu.se> | 2012-04-04 20:03:15 +0000 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-04-11 15:47:00 -0400 |
commit | 729f90e26802057f06905ab15a34612168eeac80 (patch) | |
tree | 41f8c4cedf10851b5b437aeeb558ce3d0f8db1dc /libavcodec/x86/h264_chromamc.asm | |
parent | e1ce756844e684876318570dcebc74bc66c084f0 (diff) | |
download | ffmpeg-729f90e26802057f06905ab15a34612168eeac80.tar.gz |
x86inc improvements for 64-bit
Add support for all x86-64 registers
Prefer caller-saved register over callee-saved on WIN64
Support up to 15 function arguments
Also (by Ronald S. Bultje)
Fix up our asm to work with new x86inc.asm.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
Diffstat (limited to 'libavcodec/x86/h264_chromamc.asm')
-rw-r--r-- | libavcodec/x86/h264_chromamc.asm | 48 |
1 files changed, 30 insertions, 18 deletions
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index 8b621fa8bb..64a4efe057 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -91,9 +91,22 @@ SECTION .text %endmacro %macro chroma_mc8_mmx_func 3 +%ifidn %2, rv40 +%ifdef PIC +%define rnd_1d_rv40 r8 +%define rnd_2d_rv40 r8 +%define extra_regs 2 +%else ; no-PIC +%define rnd_1d_rv40 rnd_rv40_1d_tbl +%define rnd_2d_rv40 rnd_rv40_2d_tbl +%define extra_regs 1 +%endif ; PIC +%else +%define extra_regs 0 +%endif ; rv40 ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ; int stride, int h, int mx, int my) -cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 +cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0 %if ARCH_X86_64 movsxd r2, r2d %endif @@ -106,19 +119,12 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 .at_least_one_non_zero %ifidn %2, rv40 -%ifdef PIC -%define rnd_1d_rv40 r11 -%define rnd_2d_rv40 r11 -%else ; no-PIC -%define rnd_1d_rv40 rnd_rv40_1d_tbl -%define rnd_2d_rv40 rnd_rv40_2d_tbl -%endif %if ARCH_X86_64 - mov r10, r5 - and r10, 6 ; &~1 for mx/my=[0,7] - lea r10, [r10*4+r4] - sar r10d, 1 -%define rnd_bias r10 + mov r7, r5 + and r7, 6 ; &~1 for mx/my=[0,7] + lea r7, [r7*4+r4] + sar r7d, 1 +%define rnd_bias r7 %define dest_reg r0 %else ; x86-32 mov r0, r5 @@ -145,7 +151,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 %ifidn %2, rv40 %ifdef PIC - lea r11, [rnd_rv40_1d_tbl] + lea r8, [rnd_rv40_1d_tbl] %endif %if ARCH_X86_64 == 0 mov r5, r0m @@ -196,7 +202,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 movd m6, r5d ; y %ifidn %2, rv40 %ifdef PIC - lea r11, [rnd_rv40_2d_tbl] + lea r8, [rnd_rv40_2d_tbl] %endif %if ARCH_X86_64 == 0 mov r5, r0m @@ -278,7 +284,13 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 %endmacro %macro chroma_mc4_mmx_func 3 -cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 +%define extra_regs 0 +%ifidn %2, rv40 +%ifdef PIC +%define extra_regs 1 +%endif ; PIC +%endif ; rv40 +cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0 %if ARCH_X86_64 movsxd r2, r2d %endif @@ -296,8 +308,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 %ifidn %2, rv40 %ifdef PIC - lea r11, [rnd_rv40_2d_tbl] -%define rnd_2d_rv40 r11 + lea r6, [rnd_rv40_2d_tbl] +%define rnd_2d_rv40 r6 %else %define rnd_2d_rv40 rnd_rv40_2d_tbl %endif |