diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc.c')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/enc.c | 101 |
1 files changed, 77 insertions, 24 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc.c b/src/3rdparty/libwebp/src/dsp/enc.c index f4e72d4..8899d50 100644 --- a/src/3rdparty/libwebp/src/dsp/enc.c +++ b/src/3rdparty/libwebp/src/dsp/enc.c @@ -40,10 +40,27 @@ const int VP8DspScan[16 + 4 + 4] = { 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; +// general-purpose util function +void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], + VP8Histogram* const histo) { + int max_value = 0, last_non_zero = 1; + int k; + for (k = 0; k <= MAX_COEFF_THRESH; ++k) { + const int value = distribution[k]; + if (value > 0) { + if (value > max_value) max_value = value; + last_non_zero = k; + } + } + histo->max_value = max_value; + histo->last_non_zero = last_non_zero; +} + static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) { int j; + int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { int k; int16_t out[16]; @@ -54,9 +71,10 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, for (k = 0; k < 16; ++k) { const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? const int clipped_value = clip_max(v, MAX_COEFF_THRESH); - histo->distribution[clipped_value]++; + ++distribution[clipped_value]; } } + VP8SetHistogramData(distribution, histo); } //------------------------------------------------------------------------------ @@ -68,7 +86,7 @@ static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] // and make sure it's set to true _last_ (so as to be thread-safe) static volatile int tables_ok = 0; -static void InitTables(void) { +static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { if (!tables_ok) { int i; for (i = -255; i <= 255 + 255; ++i) { @@ -159,6 +177,11 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { } } +static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) { + VP8FTransform(src, ref, out); + VP8FTransform(src + 4, ref + 4, out + 16); +} + static void FTransformWHT(const int16_t* in, int16_t* out) { // input is 12b signed int32_t tmp[16]; @@ -195,8 +218,6 @@ static void FTransformWHT(const int16_t* in, int16_t* out) { //------------------------------------------------------------------------------ // Intra predictions -#define DST(x, y) dst[(x) + (y) * BPS] - static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { int j; for (j = 0; j < size; ++j) { @@ -207,7 +228,7 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { static WEBP_INLINE void VerticalPred(uint8_t* dst, const uint8_t* top, int size) { int j; - if (top) { + if (top != NULL) { for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); } else { Fill(dst, 127, size); @@ -216,7 +237,7 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst, static WEBP_INLINE void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) { - if (left) { + if (left != NULL) { int j; for (j = 0; j < size; ++j) { memset(dst + j * BPS, left[j], size); @@ -229,8 +250,8 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst, static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, const uint8_t* top, int size) { int y; - if (left) { - if (top) { + if (left != NULL) { + if (top != NULL) { const uint8_t* const clip = clip1 + 255 - left[-1]; for (y = 0; y < size; ++y) { const uint8_t* const clip_table = clip + left[y]; @@ -248,7 +269,7 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, // is equivalent to VE prediction where you just copy the top samples. // Note that if top samples are not available, the default value is // then 129, and not 127 as in the VerticalPred case. - if (top) { + if (top != NULL) { VerticalPred(dst, top, size); } else { Fill(dst, 129, size); @@ -261,15 +282,15 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, int size, int round, int shift) { int DC = 0; int j; - if (top) { + if (top != NULL) { for (j = 0; j < size; ++j) DC += top[j]; - if (left) { // top and left present + if (left != NULL) { // top and left present for (j = 0; j < size; ++j) DC += left[j]; } else { // top, but no left DC += DC; } DC = (DC + round) >> shift; - } else if (left) { // left but no top + } else if (left != NULL) { // left but no top for (j = 0; j < size; ++j) DC += left[j]; DC += DC; DC = (DC + round) >> shift; @@ -291,8 +312,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, TrueMotion(C8TM8 + dst, left, top, 8); // V block dst += 8; - if (top) top += 8; - if (left) left += 16; + if (top != NULL) top += 8; + if (left != NULL) left += 16; DCMode(C8DC8 + dst, left, top, 8, 8, 4); VerticalPred(C8VE8 + dst, top, 8); HorizontalPred(C8HE8 + dst, left, 8); @@ -313,6 +334,7 @@ static void Intra16Preds(uint8_t* dst, //------------------------------------------------------------------------------ // luma 4x4 prediction +#define DST(x, y) dst[(x) + (y) * BPS] #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) #define AVG2(a, b) (((a) + (b) + 1) >> 1) @@ -335,10 +357,10 @@ static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal const int J = top[-3]; const int K = top[-4]; const int L = top[-5]; - *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); - *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); - *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); - *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); + WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J)); + WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K)); + WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L)); + WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L)); } static void DC4(uint8_t* dst, const uint8_t* top) { @@ -625,6 +647,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return (last >= 0); } +static int Quantize2Blocks(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { + int nz; + nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; + nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; + return nz; +} + static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) { int n, last = -1; @@ -654,16 +684,22 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], //------------------------------------------------------------------------------ // Block copy -static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { +static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { int y; - for (y = 0; y < size; ++y) { - memcpy(dst, src, size); + for (y = 0; y < h; ++y) { + memcpy(dst, src, w); src += BPS; dst += BPS; } } -static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } +static void Copy4x4(const uint8_t* src, uint8_t* dst) { + Copy(src, dst, 4, 4); +} + +static void Copy16x8(const uint8_t* src, uint8_t* dst) { + Copy(src, dst, 16, 8); +} //------------------------------------------------------------------------------ // Initialization @@ -673,6 +709,7 @@ static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } VP8CHisto VP8CollectHistogram; VP8Idct VP8ITransform; VP8Fdct VP8FTransform; +VP8Fdct VP8FTransform2; VP8WHT VP8FTransformWHT; VP8Intra4Preds VP8EncPredLuma4; VP8IntraPreds VP8EncPredLuma16; @@ -684,18 +721,22 @@ VP8Metric VP8SSE4x4; VP8WMetric VP8TDisto4x4; VP8WMetric VP8TDisto16x16; VP8QuantizeBlock VP8EncQuantizeBlock; +VP8Quantize2Blocks VP8EncQuantize2Blocks; VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; VP8BlockCopy VP8Copy4x4; +VP8BlockCopy VP8Copy16x8; extern void VP8EncDspInitSSE2(void); +extern void VP8EncDspInitSSE41(void); extern void VP8EncDspInitAVX2(void); extern void VP8EncDspInitNEON(void); extern void VP8EncDspInitMIPS32(void); +extern void VP8EncDspInitMIPSdspR2(void); static volatile VP8CPUInfo enc_last_cpuinfo_used = (VP8CPUInfo)&enc_last_cpuinfo_used; -void VP8EncDspInit(void) { +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; VP8DspInit(); // common inverse transforms @@ -705,6 +746,7 @@ void VP8EncDspInit(void) { VP8CollectHistogram = CollectHistogram; VP8ITransform = ITransform; VP8FTransform = FTransform; + VP8FTransform2 = FTransform2; VP8FTransformWHT = FTransformWHT; VP8EncPredLuma4 = Intra4Preds; VP8EncPredLuma16 = Intra16Preds; @@ -716,14 +758,21 @@ void VP8EncDspInit(void) { VP8TDisto4x4 = Disto4x4; VP8TDisto16x16 = Disto16x16; VP8EncQuantizeBlock = QuantizeBlock; + VP8EncQuantize2Blocks = Quantize2Blocks; VP8EncQuantizeBlockWHT = QuantizeBlockWHT; VP8Copy4x4 = Copy4x4; + VP8Copy16x8 = Copy16x8; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8EncDspInitSSE2(); +#if defined(WEBP_USE_SSE41) + if (VP8GetCPUInfo(kSSE4_1)) { + VP8EncDspInitSSE41(); + } +#endif } #endif #if defined(WEBP_USE_AVX2) @@ -741,7 +790,11 @@ void VP8EncDspInit(void) { VP8EncDspInitMIPS32(); } #endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8EncDspInitMIPSdspR2(); + } +#endif } enc_last_cpuinfo_used = VP8GetCPUInfo; } - |