diff options
author | Marton Balint <cus@passwd.hu> | 2018-04-05 01:37:25 +0200 |
---|---|---|
committer | Marton Balint <cus@passwd.hu> | 2018-11-11 20:30:50 +0100 |
commit | 6c2a7a8e9a3698f37913d3f24723fbb8fa895798 (patch) | |
tree | e078a0d996756411dd2087fbdce35474cce539cf /libavfilter/x86 | |
parent | 6df9020f45eaff66ba2c2bac98cda9ddaacb03f3 (diff) | |
download | ffmpeg-6c2a7a8e9a3698f37913d3f24723fbb8fa895798.tar.gz |
avfilter/vf_framerate: factorize SAD functions which compute SAD for a whole frame
Also add SIMD which works on lines because it is faster then calculating it on
8x8 blocks using pixelutils.
Signed-off-by: Marton Balint <cus@passwd.hu>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r-- | libavfilter/x86/Makefile | 4 | ||||
-rw-r--r-- | libavfilter/x86/scene_sad.asm | 74 | ||||
-rw-r--r-- | libavfilter/x86/scene_sad_init.c | 52 |
3 files changed, 130 insertions, 0 deletions
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index b484c8bd1c..6eecb94359 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -1,3 +1,5 @@ +OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o + OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o @@ -29,6 +31,8 @@ OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o +X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o + X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm new file mode 100644 index 0000000000..d38d71ccca --- /dev/null +++ b/libavfilter/x86/scene_sad.asm @@ -0,0 +1,74 @@ +;***************************************************************************** +;* x86-optimized functions for scene SAD +;* +;* Copyright (C) 2018 Marton Balint +;* +;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + + +%macro SAD_INIT 0 +cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x + add src1q, widthq + add src2q, widthq + neg widthq + pxor m1, m1 +%endmacro + + +%macro SAD_LOOP 0 +.nextrow: + mov xq, widthq + + .loop: + movu m0, [src1q + xq] + psadbw m0, [src2q + xq] + paddq m1, m0 + add xq, mmsize + jl .loop + add src1q, stride1q + add src2q, stride2q + sub endd, 1 + jg .nextrow + + mov r0q, r6mp + movu [r0q], m1 ; sum +REP_RET +%endmacro + + +%macro SAD_FRAMES 0 + SAD_INIT + SAD_LOOP +%endmacro + + +INIT_XMM sse2 +SAD_FRAMES + +%if HAVE_AVX2_EXTERNAL + +INIT_YMM avx2 +SAD_FRAMES + +%endif diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c new file mode 100644 index 0000000000..461fa406d9 --- /dev/null +++ b/libavfilter/x86/scene_sad_init.c @@ -0,0 +1,52 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/scene_sad.h" + +#define SCENE_SAD_FUNC(FUNC_NAME, ASM_FUNC_NAME, MMSIZE) \ +void ASM_FUNC_NAME(SCENE_SAD_PARAMS); \ + \ +static void FUNC_NAME(SCENE_SAD_PARAMS) { \ + uint64_t sad[MMSIZE / 8] = {0}; \ + ptrdiff_t awidth = width & ~(MMSIZE - 1); \ + *sum = 0; \ + ASM_FUNC_NAME(src1, stride1, src2, stride2, awidth, height, sad); \ + for (int i = 0; i < MMSIZE / 8; i++) \ + *sum += sad[i]; \ + ff_scene_sad_c(src1 + awidth, stride1, \ + src2 + awidth, stride2, \ + width - awidth, height, sad); \ + *sum += sad[0]; \ +} + +SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16); +SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32); + +ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth) +{ + int cpu_flags = av_get_cpu_flags(); + if (depth == 8) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) + return scene_sad_avx2; + else if (EXTERNAL_SSE2(cpu_flags)) + return scene_sad_sse2; + } + return NULL; +} |