diff options
author | David Schleef <ds@schleef.org> | 2008-09-13 05:52:49 -0700 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2008-09-13 05:52:49 -0700 |
commit | a28a9de744adfa810537fa5c5cdd6baef08e7919 (patch) | |
tree | 49715818221a1176a6132693d15f4b32a42f85d2 | |
parent | 361296501c71c6ee7a1039c553e0f767b60c492f (diff) | |
download | liboil-a28a9de744adfa810537fa5c5cdd6baef08e7919.tar.gz |
Add avg2_32xn_u8
-rw-r--r-- | liboil/i386_amd64/sad8x8.c | 71 | ||||
-rw-r--r-- | liboil/liboilclasses.h | 2 | ||||
-rw-r--r-- | liboil/liboilfuncs-04.h | 2 | ||||
-rw-r--r-- | liboil/liboilfuncs-doc.h | 2 | ||||
-rw-r--r-- | liboil/liboilfuncs.h | 6 | ||||
-rw-r--r-- | liboil/liboiltrampolines.c | 20 | ||||
-rw-r--r-- | liboil/ref/wavelet.c | 49 |
7 files changed, 152 insertions, 0 deletions
diff --git a/liboil/i386_amd64/sad8x8.c b/liboil/i386_amd64/sad8x8.c index f72cbf5..fbc268e 100644 --- a/liboil/i386_amd64/sad8x8.c +++ b/liboil/i386_amd64/sad8x8.c @@ -617,6 +617,47 @@ combine4_16xn_u8_mmx (uint8_t *d, int ds1, OIL_DEFINE_IMPL_FULL (combine4_16xn_u8_mmx, combine4_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); void +combine4_32xn_u8_mmx (uint8_t *d, int ds1, + uint8_t *s1, int ss1, + uint8_t *s2, int ss2, + uint8_t *s3, int ss3, + uint8_t *s4, int ss4, + int16_t *s5_6, int n) +{ + int j; + + asm volatile ("\n" + " pxor %%mm7, %%mm7\n" + " movq 0(%0), %%mm6\n" + " movd 8(%0), %%mm4\n" + " pshufw $0x00, %%mm4, %%mm4\n" + ::"r" (s5_6)); + + for(j=0;j<n;j++){ + asm volatile ("\n" + DO_4(0) + DO_4(4) + DO_4(8) + DO_4(12) + DO_4(16) + DO_4(20) + DO_4(24) + DO_4(28) + + : + : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4)); + + s1 += ss1; + s2 += ss2; + s3 += ss3; + s4 += ss4; + d += ds1; + } + asm volatile ("emms"); +} +OIL_DEFINE_IMPL_FULL (combine4_32xn_u8_mmx, combine4_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); + +void combine2_12xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1, uint8_t *s2, int ss2, @@ -819,3 +860,33 @@ avg2_16xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1, } OIL_DEFINE_IMPL_FULL (avg2_16xn_u8_mmx, avg2_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); +void +avg2_32xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1, + uint8_t *s2, int ss2, int n) +{ + int j; + for(j=0;j<n;j++){ + asm volatile ("\n" + " movq 0(%[s1]), %%mm0\n" + " pavgb 0(%[s2]), %%mm0\n" + " movq %%mm0, 0(%[d])\n" + " movq 8(%[s1]), %%mm0\n" + " pavgb 8(%[s2]), %%mm0\n" + " movq %%mm0, 8(%[d])\n" + " movq 16(%[s1]), %%mm0\n" + " pavgb 16(%[s2]), %%mm0\n" + " movq %%mm0, 16(%[d])\n" + " movq 24(%[s1]), %%mm0\n" + " pavgb 24(%[s2]), %%mm0\n" + " movq %%mm0, 24(%[d])\n" + : + : [d] "r" (d), [s1] "r" (s1), [s2] "r" (s2)); + + s1 += ss1; + s2 += ss2; + d += ds1; + } + asm volatile ("emms"); +} +OIL_DEFINE_IMPL_FULL (avg2_32xn_u8_mmx, avg2_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); + diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h index 8cfa0b2..3fc6e03 100644 --- a/liboil/liboilclasses.h +++ b/liboil/liboilclasses.h @@ -54,6 +54,7 @@ OIL_DECLARE_CLASS(argb_paint_u8); OIL_DECLARE_CLASS(average2_u8); OIL_DECLARE_CLASS(avg2_12xn_u8); OIL_DECLARE_CLASS(avg2_16xn_u8); +OIL_DECLARE_CLASS(avg2_32xn_u8); OIL_DECLARE_CLASS(avg2_8xn_u8); OIL_DECLARE_CLASS(ayuv2argb_u8); OIL_DECLARE_CLASS(ayuv2uyvy); @@ -129,6 +130,7 @@ OIL_DECLARE_CLASS(combine2_16xn_u8); OIL_DECLARE_CLASS(combine2_8xn_u8); OIL_DECLARE_CLASS(combine4_12xn_u8); OIL_DECLARE_CLASS(combine4_16xn_u8); +OIL_DECLARE_CLASS(combine4_32xn_u8); OIL_DECLARE_CLASS(combine4_8xn_u8); OIL_DECLARE_CLASS(compare_u8); OIL_DECLARE_CLASS(composite_add_argb); diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h index 39a07de..103c19e 100644 --- a/liboil/liboilfuncs-04.h +++ b/liboil/liboilfuncs-04.h @@ -54,6 +54,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n); void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n); void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n); +void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n); void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n); void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n); void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n); @@ -129,6 +130,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n); void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n); void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n); +void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n); void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n); void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n); void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n); diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h index c5b3b9e..b914e19 100644 --- a/liboil/liboilfuncs-doc.h +++ b/liboil/liboilfuncs-doc.h @@ -18,6 +18,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n); void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n); void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n); +void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n); void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n); void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n); void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n); @@ -93,6 +94,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n); void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n); void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n); +void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n); void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n); void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n); void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n); diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h index e326a77..cd03099 100644 --- a/liboil/liboilfuncs.h +++ b/liboil/liboilfuncs.h @@ -90,6 +90,9 @@ typedef void (*_oil_type_avg2_12xn_u8)(uint8_t * d_12xn, int ds1, const uint8_t OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_16xn_u8; typedef void (*_oil_type_avg2_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n); #define oil_avg2_16xn_u8 ((_oil_type_avg2_16xn_u8)(*(void **)oil_function_class_ptr_avg2_16xn_u8)) +OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_32xn_u8; +typedef void (*_oil_type_avg2_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n); +#define oil_avg2_32xn_u8 ((_oil_type_avg2_32xn_u8)(*(void **)oil_function_class_ptr_avg2_32xn_u8)) OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_8xn_u8; typedef void (*_oil_type_avg2_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n); #define oil_avg2_8xn_u8 ((_oil_type_avg2_8xn_u8)(*(void **)oil_function_class_ptr_avg2_8xn_u8)) @@ -315,6 +318,9 @@ typedef void (*_oil_type_combine4_12xn_u8)(uint8_t * d_12xn, int ds1, const uint OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_16xn_u8; typedef void (*_oil_type_combine4_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n); #define oil_combine4_16xn_u8 ((_oil_type_combine4_16xn_u8)(*(void **)oil_function_class_ptr_combine4_16xn_u8)) +OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_32xn_u8; +typedef void (*_oil_type_combine4_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n); +#define oil_combine4_32xn_u8 ((_oil_type_combine4_32xn_u8)(*(void **)oil_function_class_ptr_combine4_32xn_u8)) OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_8xn_u8; typedef void (*_oil_type_combine4_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n); #define oil_combine4_8xn_u8 ((_oil_type_combine4_8xn_u8)(*(void **)oil_function_class_ptr_combine4_8xn_u8)) diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c index 9009dff..7096766 100644 --- a/liboil/liboiltrampolines.c +++ b/liboil/liboiltrampolines.c @@ -211,6 +211,16 @@ oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, c ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n))(_oil_function_class_avg2_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, n); } +#undef oil_avg2_32xn_u8 +void +oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n) +{ + if (_oil_function_class_avg2_32xn_u8.func == NULL) { + oil_class_optimize (&_oil_function_class_avg2_32xn_u8); + } + ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n))(_oil_function_class_avg2_32xn_u8.func))(d_32xn, ds1, s1_32xn, ss1, s2_32xn, ss2, n); +} + #undef oil_avg2_8xn_u8 void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n) @@ -961,6 +971,16 @@ oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, s3_16xn, ss3, s4_16xn, ss4, s5_6, n); } +#undef oil_combine4_32xn_u8 +void +oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n) +{ + if (_oil_function_class_combine4_32xn_u8.func == NULL) { + oil_class_optimize (&_oil_function_class_combine4_32xn_u8); + } + ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_32xn_u8.func))(d_32xn, ds1, s1_16xn, ss1, s2_32xn, ss2, s3_32xn, ss3, s4_32xn, ss4, s5_6, n); +} + #undef oil_combine4_8xn_u8 void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n) diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c index 77f5e54..7295545 100644 --- a/liboil/ref/wavelet.c +++ b/liboil/ref/wavelet.c @@ -149,6 +149,9 @@ OIL_DEFINE_CLASS_FULL (combine4_12xn_u8, "uint8_t *d_12xn, int ds1, " OIL_DEFINE_CLASS_FULL (combine4_16xn_u8, "uint8_t *d_16xn, int ds1, " "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, uint8_t *s3_16xn, " "int ss3, uint8_t *s4_16xn, int ss4, int16_t *s5_6, int n", combine4_test); +OIL_DEFINE_CLASS_FULL (combine4_32xn_u8, "uint8_t *d_32xn, int ds1, " + "uint8_t *s1_16xn, int ss1, uint8_t *s2_32xn, int ss2, uint8_t *s3_32xn, " + "int ss3, uint8_t *s4_32xn, int ss4, int16_t *s5_6, int n", combine4_test); OIL_DEFINE_CLASS_FULL (add2_rshift_add_s16, "int16_t *d, int16_t *s1, " "int16_t *s2, int16_t *s3, int16_t *s4_2, int n", add2_test); OIL_DEFINE_CLASS_FULL (add2_rshift_sub_s16, "int16_t *d, int16_t *s1, " @@ -159,6 +162,8 @@ OIL_DEFINE_CLASS (avg2_12xn_u8, "uint8_t *d_12xn, int ds1, " "uint8_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, int n"); OIL_DEFINE_CLASS (avg2_16xn_u8, "uint8_t *d_16xn, int ds1, " "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n"); +OIL_DEFINE_CLASS (avg2_32xn_u8, "uint8_t *d_32xn, int ds1, " + "uint8_t *s1_32xn, int ss1, uint8_t *s2_32xn, int ss2, int n"); void deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n) @@ -791,6 +796,34 @@ combine4_16xn_u8_ref (uint8_t *d, int ds1, OIL_DEFINE_IMPL_REF (combine4_16xn_u8_ref, combine4_16xn_u8); void +combine4_32xn_u8_ref (uint8_t *d, int ds1, + uint8_t *s1, int ss1, + uint8_t *s2, int ss2, + uint8_t *s3, int ss3, + uint8_t *s4, int ss4, + int16_t *s5_6, int n) +{ + int i; + int j; + for(j=0;j<n;j++){ + for(i=0;i<32;i++){ + int x = 0; + x += s5_6[0] * s1[i]; + x += s5_6[1] * s2[i]; + x += s5_6[2] * s3[i]; + x += s5_6[3] * s4[i]; + d[i] = (x + s5_6[4]) >> s5_6[5]; + } + s1 += ss1; + s2 += ss2; + s3 += ss3; + s4 += ss4; + d += ds1; + } +} +OIL_DEFINE_IMPL_REF (combine4_32xn_u8_ref, combine4_32xn_u8); + +void combine2_8xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1, uint8_t *s2, int ss2, @@ -929,3 +962,19 @@ avg2_16xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1, } OIL_DEFINE_IMPL_REF (avg2_16xn_u8_ref, avg2_16xn_u8); +void +avg2_32xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1, + uint8_t *s2, int ss2, int n) +{ + int i; + int j; + for(j=0;j<n;j++){ + for(i=0;i<32;i++){ + d[i] = (s1[i] + s2[i] + 1)>>1; + } + s1 += ss1; + s2 += ss2; + d += ds1; + } +} +OIL_DEFINE_IMPL_REF (avg2_32xn_u8_ref, avg2_32xn_u8); |