diff options
Diffstat (limited to 'vp8/encoder/x86/csystemdependent.c')
-rw-r--r-- | vp8/encoder/x86/csystemdependent.c | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c new file mode 100644 index 000000000..186ee6856 --- /dev/null +++ b/vp8/encoder/x86/csystemdependent.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license and patent + * grant that can be found in the LICENSE file in the root of the source + * tree. All contributing project authors may be found in the AUTHORS + * file in the root of the source tree. + */ + + +#include "variance.h" +#include "onyx_int.h" + +SADFunction *vp8_sad16x16; +SADFunction *vp8_sad16x8; +SADFunction *vp8_sad8x16; +SADFunction *vp8_sad8x8; +SADFunction *vp8_sad4x4; + +variance_function *vp8_variance4x4; +variance_function *vp8_variance8x8; +variance_function *vp8_variance8x16; +variance_function *vp8_variance16x8; +variance_function *vp8_variance16x16; + + +variance_function *vp8_mse16x16; + +sub_pixel_variance_function *vp8_sub_pixel_variance4x4; +sub_pixel_variance_function *vp8_sub_pixel_variance8x8; +sub_pixel_variance_function *vp8_sub_pixel_variance8x16; +sub_pixel_variance_function *vp8_sub_pixel_variance16x8; +sub_pixel_variance_function *vp8_sub_pixel_variance16x16; + +int (*vp8_block_error)(short *, short *); +int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc); +void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride); + +extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride); +extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride); + +extern int vp8_block_error_c(short *, short *); +extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc); + +extern int vp8_block_error_mmx(short *, short *); +extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc); + +extern int vp8_block_error_xmm(short *, short *); +extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc); + + + +int (*vp8_mbuverror)(MACROBLOCK *mb); +unsigned int (*vp8_get_mb_ss)(short *); +void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); +void (*vp8_short_fdct8x4)(short *input, short *output, int pitch); +void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); +void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); + +void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch); +void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); +void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); +unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); +unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); + +// c imports +extern int vp8_mbuverror_c(MACROBLOCK *mb); +extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch); +extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch); +extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch); +extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch); + + +extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch); +extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); +extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d); + +extern SADFunction vp8_sad16x16_c; +extern SADFunction vp8_sad16x8_c; +extern SADFunction vp8_sad8x16_c; +extern SADFunction vp8_sad8x8_c; +extern SADFunction vp8_sad4x4_c; + +extern SADFunction vp8_sad16x16_wmt; +extern SADFunction vp8_sad16x8_wmt; +extern SADFunction vp8_sad8x16_wmt; +extern SADFunction vp8_sad8x8_wmt; +extern SADFunction vp8_sad4x4_wmt; + +extern SADFunction vp8_sad16x16_mmx; +extern SADFunction vp8_sad16x8_mmx; +extern SADFunction vp8_sad8x16_mmx; +extern SADFunction vp8_sad8x8_mmx; +extern SADFunction vp8_sad4x4_mmx; + +extern variance_function vp8_variance16x16_c; +extern variance_function vp8_variance8x16_c; +extern variance_function vp8_variance16x8_c; +extern variance_function vp8_variance8x8_c; +extern variance_function vp8_variance4x4_c; +extern variance_function vp8_mse16x16_c; + +extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c; + +extern unsigned int vp8_get_mb_ss_c(short *); +extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); +extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); + +// mmx imports +extern int vp8_mbuverror_mmx(MACROBLOCK *mb); +extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d); +extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch); +extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); +extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); +extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch); +extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch); +extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch); +extern variance_function vp8_variance4x4_mmx; +extern variance_function vp8_variance8x8_mmx; +extern variance_function vp8_variance8x16_mmx; +extern variance_function vp8_variance16x8_mmx; +extern variance_function vp8_variance16x16_mmx; + +extern variance_function vp8_mse16x16_mmx; +extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx; + +extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); +extern unsigned int vp8_get_mb_ss_mmx(short *); +extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); + + +// wmt imports +extern int vp8_mbuverror_xmm(MACROBLOCK *mb); +extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d); +extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch); +extern variance_function vp8_variance4x4_wmt; +extern variance_function vp8_variance8x8_wmt; +extern variance_function vp8_variance8x16_wmt; +extern variance_function vp8_variance16x8_wmt; +extern variance_function vp8_variance16x16_wmt; + +extern variance_function vp8_mse16x16_wmt; +extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt; +extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt; +extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt; +extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); +extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr); +extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); +extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); + +extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled); + +void vp8_cmachine_specific_config(void) +{ + int mmx_enabled; + int xmm_enabled; + int wmt_enabled; + + vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); + + if (wmt_enabled) // Willamette + { + // Willamette instruction set available: + vp8_mbuverror = vp8_mbuverror_xmm; + vp8_fast_quantize_b = vp8_fast_quantize_b_sse; + vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; + vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; + vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx; + vp8_fast_fdct8x4 = vp8_fast_fdct8x4_wmt; + vp8_subtract_b = vp8_subtract_b_mmx; + vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; + vp8_variance4x4 = vp8_variance4x4_mmx; + vp8_variance8x8 = vp8_variance8x8_mmx; + vp8_variance8x16 = vp8_variance8x16_wmt; + vp8_variance16x8 = vp8_variance16x8_wmt; + vp8_variance16x16 = vp8_variance16x16_wmt; + vp8_mse16x16 = vp8_mse16x16_wmt; + vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt; + vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt; + vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt; + vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; + vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; + vp8_get_mb_ss = vp8_get_mb_ss_sse2; + vp8_get16x16pred_error = vp8_get16x16pred_error_sse2; + vp8_get8x8var = vp8_get8x8var_sse2; + vp8_get16x16var = vp8_get16x16var_sse2; + vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; + vp8_sad16x16 = vp8_sad16x16_wmt; + vp8_sad16x8 = vp8_sad16x8_wmt; + vp8_sad8x16 = vp8_sad8x16_wmt; + vp8_sad8x8 = vp8_sad8x8_wmt; + vp8_sad4x4 = vp8_sad4x4_wmt; + vp8_block_error = vp8_block_error_xmm; + vp8_mbblock_error = vp8_mbblock_error_xmm; + vp8_subtract_mby = vp8_subtract_mby_mmx; + + } + else if (mmx_enabled) + { + // MMX instruction set available: + vp8_mbuverror = vp8_mbuverror_mmx; + vp8_fast_quantize_b = vp8_fast_quantize_b_mmx; + vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; + vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; + vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx; + vp8_fast_fdct8x4 = vp8_fast_fdct8x4_mmx; + vp8_subtract_b = vp8_subtract_b_mmx; + vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; + vp8_variance4x4 = vp8_variance4x4_mmx; + vp8_variance8x8 = vp8_variance8x8_mmx; + vp8_variance8x16 = vp8_variance8x16_mmx; + vp8_variance16x8 = vp8_variance16x8_mmx; + vp8_variance16x16 = vp8_variance16x16_mmx; + vp8_mse16x16 = vp8_mse16x16_mmx; + vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx; + vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx; + vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx; + vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx; + vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; + vp8_get_mb_ss = vp8_get_mb_ss_mmx; + vp8_get16x16pred_error = vp8_get16x16pred_error_mmx; + vp8_get8x8var = vp8_get8x8var_mmx; + vp8_get16x16var = vp8_get16x16var_mmx; + vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; + vp8_sad16x16 = vp8_sad16x16_mmx; + vp8_sad16x8 = vp8_sad16x8_mmx; + vp8_sad8x16 = vp8_sad8x16_mmx; + vp8_sad8x8 = vp8_sad8x8_mmx; + vp8_sad4x4 = vp8_sad4x4_mmx; + vp8_block_error = vp8_block_error_mmx; + vp8_mbblock_error = vp8_mbblock_error_mmx; + vp8_subtract_mby = vp8_subtract_mby_mmx; + + } + else + { + // Pure C: + vp8_mbuverror = vp8_mbuverror_c; + vp8_fast_quantize_b = vp8_fast_quantize_b_c; + vp8_short_fdct4x4 = vp8_short_fdct4x4_c; + vp8_short_fdct8x4 = vp8_short_fdct8x4_c; + vp8_fast_fdct4x4 = vp8_fast_fdct4x4_c; + vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c; + vp8_subtract_b = vp8_subtract_b_c; + vp8_subtract_mbuv = vp8_subtract_mbuv_c; + vp8_variance4x4 = vp8_variance4x4_c; + vp8_variance8x8 = vp8_variance8x8_c; + vp8_variance8x16 = vp8_variance8x16_c; + vp8_variance16x8 = vp8_variance16x8_c; + vp8_variance16x16 = vp8_variance16x16_c; + vp8_mse16x16 = vp8_mse16x16_c; + vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c; + vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; + vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c; + vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c; + vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; + vp8_get_mb_ss = vp8_get_mb_ss_c; + vp8_get16x16pred_error = vp8_get16x16pred_error_c; + vp8_get8x8var = vp8_get8x8var_c; + vp8_get16x16var = vp8_get16x16var_c; + vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; + vp8_sad16x16 = vp8_sad16x16_c; + vp8_sad16x8 = vp8_sad16x8_c; + vp8_sad8x16 = vp8_sad8x16_c; + vp8_sad8x8 = vp8_sad8x8_c; + vp8_sad4x4 = vp8_sad4x4_c; + vp8_block_error = vp8_block_error_c; + vp8_mbblock_error = vp8_mbblock_error_c; + vp8_subtract_mby = vp8_subtract_mby_c; + } + +} |