summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaryla <maryla@google.com>2022-05-17 15:02:31 +0200
committerMaryla <maryla@google.com>2022-05-23 20:48:44 +0200
commit93c54371156990f326307fc028b6b8249b52c735 (patch)
tree60658ad73a2b6c9cff6dfd60a73f0ae61e648b41
parentd3006f4b968192e0ba4006f057ed79189381b1ed (diff)
downloadlibwebp-93c54371156990f326307fc028b6b8249b52c735.tar.gz
sharpyuv: add support for 10/12/16 bit rgb and 10/12 bit yuv.
10bit+ input is truncated to 10bits for now. Change-Id: I7ac00ca54c623d94c76ccd8954418e11095997d2
-rw-r--r--sharpyuv/sharpyuv.c399
-rw-r--r--sharpyuv/sharpyuv.h36
-rw-r--r--sharpyuv/sharpyuv_csp.c15
-rw-r--r--sharpyuv/sharpyuv_csp.h2
-rw-r--r--sharpyuv/sharpyuv_dsp.c25
-rw-r--r--sharpyuv/sharpyuv_dsp.h5
-rw-r--r--sharpyuv/sharpyuv_neon.c23
-rw-r--r--sharpyuv/sharpyuv_sse2.c23
-rw-r--r--src/enc/picture_csp_enc.c8
9 files changed, 347 insertions, 189 deletions
diff --git a/sharpyuv/sharpyuv.c b/sharpyuv/sharpyuv.c
index 302a4f23..84de6b0a 100644
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -31,131 +31,173 @@ static const int kMinDimensionIterativeConversion = 4;
#define YUV_FIX 16 // fixed-point precision for RGB->YUV
static const int kYuvHalf = 1 << (YUV_FIX - 1);
-// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
-// banding sometimes. Better use extra precision.
-#define SFIX 2 // fixed-point precision of RGB and Y/W
-#define MAX_Y_T ((256 << SFIX) - 1)
-typedef int16_t fixed_t; // signed type with extra SFIX precision for UV
-typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
+// Max bit depth so that intermediate calculations fit in 16 bits.
+// TODO(b/194336375): the C code can handle up to 14 bits, but the SIMD code
+// currently needs more room.
+static const int kMaxBitDepth = 10;
+
+// Returns the precision shift to use based on the input rgb_bit_depth.
+static int GetPrecisionShift(int rgb_bit_depth) {
+ // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
+ // bits if needed.
+ return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
+ : (kMaxBitDepth - rgb_bit_depth);
+}
-static const int kYuvRounder = (1 << (YUV_FIX + SFIX - 1));
+typedef int16_t fixed_t; // signed type with extra precision for UV
+typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
//------------------------------------------------------------------------------
// Code for gamma correction
// Gamma correction compensates loss of resolution during chroma subsampling.
-static const double kGammaF = 1./0.45;
-#define GAMMA_TAB_FIX 8
-#define GAMMA_TAB_SIZE (1 << GAMMA_TAB_FIX)
-static uint32_t kLinearToGammaTabS[GAMMA_TAB_SIZE + 2];
+// Size of pre-computed table for converting from gamma to linear.
+#define GAMMA_TO_LINEAR_TAB_BITS 10
+#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
+static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
+// Size of pre-computed table for converting from linear to gamma.
+#define LINEAR_TO_GAMMA_TAB_BITS 8
+#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
+static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
+
+static const double kGammaF = 1. / 0.45;
#define GAMMA_TO_LINEAR_BITS 14
-static const int kGammaToLinearHalf = 1 << (GAMMA_TO_LINEAR_BITS - 1);
-static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX
-static volatile int kGammaTablesSOk = 0;
+static volatile int kGammaTablesSOk = 0;
static void InitGammaTablesS(void) {
assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values
if (!kGammaTablesSOk) {
int v;
- const double norm = 1. / MAX_Y_T;
- const double scale = 1. / GAMMA_TAB_SIZE;
const double a = 0.09929682680944;
const double thresh = 0.018053968510807;
- const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
- for (v = 0; v <= MAX_Y_T; ++v) {
- const double g = norm * v;
- double value;
- if (g <= thresh * 4.5) {
- value = g / 4.5;
- } else {
- const double a_rec = 1. / (1. + a);
- value = pow(a_rec * (g + a), kGammaF);
+ // Precompute gamma to linear table.
+ {
+ const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
+ const double a_rec = 1. / (1. + a);
+ const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
+ for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
+ const double g = norm * v;
+ double value;
+ if (g <= thresh * 4.5) {
+ value = g / 4.5;
+ } else {
+ value = pow(a_rec * (g + a), kGammaF);
+ }
+ kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
}
- kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
+ // to prevent small rounding errors to cause read-overflow:
+ kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
+ kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
}
- for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
- const double g = scale * v;
- double value;
- if (g <= thresh) {
- value = 4.5 * g;
- } else {
- value = (1. + a) * pow(g, 1. / kGammaF) - a;
+ // Precompute linear to gamma table.
+ {
+ const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
+ for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
+ const double g = scale * v;
+ double value;
+ if (g <= thresh) {
+ value = 4.5 * g;
+ } else {
+ value = (1. + a) * pow(g, 1. / kGammaF) - a;
+ }
+ kLinearToGammaTabS[v] =
+ (uint32_t)(GAMMA_TO_LINEAR_TAB_SIZE * value + 0.5);
}
- kLinearToGammaTabS[v] = (uint32_t)(MAX_Y_T * value + 0.5);
+ // to prevent small rounding errors to cause read-overflow:
+ kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
+ kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
}
- // to prevent small rounding errors to cause read-overflow:
- kLinearToGammaTabS[GAMMA_TAB_SIZE + 1] = kLinearToGammaTabS[GAMMA_TAB_SIZE];
kGammaTablesSOk = 1;
}
}
-// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
-static WEBP_INLINE uint32_t GammaToLinearS(int v) {
- return kGammaToLinearTabS[v];
-}
-
-static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
- // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
- const uint32_t v = value * GAMMA_TAB_SIZE;
- const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
- // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
- const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part
- // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
- const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
- const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
+static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
+ int tab_pos_shift,
+ int tab_value_shift) {
+ const uint32_t tab_pos = v >> tab_pos_shift;
+ // fractional part, in 'tab_pos_shift' fixed-point precision
+ const uint32_t x = v - (tab_pos << tab_pos_shift); // fractional part
+ // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
+ const uint32_t v0 = tab[tab_pos + 0] << tab_value_shift;
+ const uint32_t v1 = tab[tab_pos + 1] << tab_value_shift;
// Final interpolation.
- const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
- const uint32_t result =
- v0 + ((v2 + kGammaToLinearHalf) >> GAMMA_TO_LINEAR_BITS);
+ const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
+ const int half = (tab_pos_shift > 0) ? 1 << (tab_pos_shift - 1) : 0;
+ const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift);
return result;
}
+static WEBP_INLINE uint32_t GammaToLinear(int v, int bit_depth) {
+ const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
+ if (shift > 0) {
+ return kGammaToLinearTabS[v << shift];
+ }
+ return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
+}
+
+static WEBP_INLINE uint32_t LinearToGamma(uint32_t value, int bit_depth) {
+ const uint32_t v = value << LINEAR_TO_GAMMA_TAB_BITS;
+ return FixedPointInterpolation(v, kLinearToGammaTabS, GAMMA_TO_LINEAR_BITS,
+ bit_depth - GAMMA_TO_LINEAR_TAB_BITS);
+}
+
//------------------------------------------------------------------------------
static uint8_t clip_8b(fixed_t v) {
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
}
-static fixed_y_t clip_y(int y) {
- return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
+static uint16_t clip(fixed_t v, int max) {
+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static fixed_y_t clip_bit_depth(int y, int bit_depth) {
+ const int max = (1 << bit_depth) - 1;
+ return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
}
//------------------------------------------------------------------------------
-static int RGBToGray(int r, int g, int b) {
- const int luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
- return (luma >> YUV_FIX);
+static int RGBToGray(int64_t r, int64_t g, int64_t b) {
+ const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
+ return (int)(luma >> YUV_FIX);
}
-static uint32_t ScaleDown(int a, int b, int c, int d) {
- const uint32_t A = GammaToLinearS(a);
- const uint32_t B = GammaToLinearS(b);
- const uint32_t C = GammaToLinearS(c);
- const uint32_t D = GammaToLinearS(d);
- return LinearToGammaS((A + B + C + D + 2) >> 2);
+static uint32_t ScaleDown(int a, int b, int c, int d, int rgb_bit_depth) {
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+ const uint32_t A = GammaToLinear(a, bit_depth);
+ const uint32_t B = GammaToLinear(b, bit_depth);
+ const uint32_t C = GammaToLinear(c, bit_depth);
+ const uint32_t D = GammaToLinear(d, bit_depth);
+ return LinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
}
-static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
+ int rgb_bit_depth) {
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
int i;
for (i = 0; i < w; ++i) {
- const uint32_t R = GammaToLinearS(src[0 * w + i]);
- const uint32_t G = GammaToLinearS(src[1 * w + i]);
- const uint32_t B = GammaToLinearS(src[2 * w + i]);
+ const uint32_t R = GammaToLinear(src[0 * w + i], bit_depth);
+ const uint32_t G = GammaToLinear(src[1 * w + i], bit_depth);
+ const uint32_t B = GammaToLinear(src[2 * w + i], bit_depth);
const uint32_t Y = RGBToGray(R, G, B);
- dst[i] = (fixed_y_t)LinearToGammaS(Y);
+ dst[i] = (fixed_y_t)LinearToGamma(Y, bit_depth);
}
}
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
- fixed_t* dst, int uv_w) {
+ fixed_t* dst, int uv_w, int rgb_bit_depth) {
int i;
for (i = 0; i < uv_w; ++i) {
- const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
- src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
- const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
- src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
- const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
- src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
+ const int r =
+ ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
+ src2[0 * uv_w + 1], rgb_bit_depth);
+ const int g =
+ ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
+ src2[2 * uv_w + 1], rgb_bit_depth);
+ const int b =
+ ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
+ src2[4 * uv_w + 1], rgb_bit_depth);
const int W = RGBToGray(r, g, b);
dst[0 * uv_w] = (fixed_t)(r - W);
dst[1 * uv_w] = (fixed_t)(g - W);
@@ -176,30 +218,50 @@ static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
//------------------------------------------------------------------------------
-static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
+static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
const int v0 = (A * 3 + B + 2) >> 2;
- return clip_y(v0 + W0);
+ return clip_bit_depth(v0 + W0, bit_depth);
}
//------------------------------------------------------------------------------
-static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
- return ((fixed_y_t)a << SFIX);
+static WEBP_INLINE int Shift(int v, int shift) {
+ return (shift >= 0) ? (v << shift) : (v >> -shift);
+}
+
+static WEBP_INLINE fixed_y_t ChangePrecision(uint16_t a, int shift) {
+ if (shift == 0) return a;
+ if (shift < 0) {
+ const int rounding = 1 << (-shift - 1);
+ return (a + rounding) >> -shift;
+ }
+ return ((fixed_y_t)a << shift);
}
static void ImportOneRow(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
- int step,
+ int rgb_step,
+ int rgb_bit_depth,
int pic_width,
fixed_y_t* const dst) {
+ // Convert the rgb_step from a number of bytes to a number of uint8_t or
+ // uint16_t values depending the bit depth.
+ const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
int i;
const int w = (pic_width + 1) & ~1;
for (i = 0; i < pic_width; ++i) {
const int off = i * step;
- dst[i + 0 * w] = UpLift(r_ptr[off]);
- dst[i + 1 * w] = UpLift(g_ptr[off]);
- dst[i + 2 * w] = UpLift(b_ptr[off]);
+ const int shift = GetPrecisionShift(rgb_bit_depth);
+ if (rgb_bit_depth == 8) {
+ dst[i + 0 * w] = ChangePrecision(r_ptr[off], shift);
+ dst[i + 1 * w] = ChangePrecision(g_ptr[off], shift);
+ dst[i + 2 * w] = ChangePrecision(b_ptr[off], shift);
+ } else {
+ dst[i + 0 * w] = ChangePrecision(((uint16_t*)r_ptr)[off], shift);
+ dst[i + 1 * w] = ChangePrecision(((uint16_t*)g_ptr)[off], shift);
+ dst[i + 2 * w] = ChangePrecision(((uint16_t*)b_ptr)[off], shift);
+ }
}
if (pic_width & 1) { // replicate rightmost pixel
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
@@ -214,24 +276,28 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
const fixed_t* next_uv,
int w,
fixed_y_t* out1,
- fixed_y_t* out2) {
+ fixed_y_t* out2,
+ int rgb_bit_depth) {
const int uv_w = w >> 1;
const int len = (w - 1) >> 1; // length to filter
int k = 3;
+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
while (k-- > 0) { // process each R/G/B segments in turn
// special boundary case for i==0
- out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
- out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
+ out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
+ out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
- SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
- SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
+ SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
+ bit_depth);
+ SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
+ bit_depth);
// special boundary case for i == w - 1 when w is even
if (!(w & 1)) {
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
- best_y[w - 1 + 0]);
+ best_y[w - 1 + 0], bit_depth);
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
- best_y[w - 1 + w]);
+ best_y[w - 1 + w], bit_depth);
}
out1 += w;
out2 += w;
@@ -241,17 +307,19 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
}
}
-static WEBP_INLINE uint8_t RGBToYUVComponent(int r, int g, int b,
- const int coeffs[4]) {
+static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
+ const int coeffs[4], int sfix) {
+ const int srounder = 1 << (YUV_FIX + sfix - 1);
const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
- (coeffs[3] << SFIX) + kYuvRounder;
- return clip_8b((luma >> (YUV_FIX + SFIX)));
+ coeffs[3] + srounder;
+ return (luma >> (YUV_FIX + sfix));
}
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
- uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
- int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
- int width, int height,
+ uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
+ int u_stride, uint8_t* v_ptr, int v_stride,
+ int rgb_bit_depth,
+ int yuv_bit_depth, int width, int height,
const SharpYuvConversionMatrix* yuv_matrix) {
int i, j;
const fixed_t* const best_uv_base = best_uv;
@@ -259,6 +327,9 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
const int h = (height + 1) & ~1;
const int uv_w = w >> 1;
const int uv_h = h >> 1;
+ const int sfix = GetPrecisionShift(rgb_bit_depth);
+ const int yuv_max = (1 << yuv_bit_depth) - 1;
+
for (best_uv = best_uv_base, j = 0; j < height; ++j) {
for (i = 0; i < width; ++i) {
const int off = (i >> 1);
@@ -266,24 +337,38 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
const int r = best_uv[off + 0 * uv_w] + W;
const int g = best_uv[off + 1 * uv_w] + W;
const int b = best_uv[off + 2 * uv_w] + W;
- dst_y[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y);
+ const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
+ if (yuv_bit_depth <= 8) {
+ y_ptr[i] = clip_8b(y);
+ } else {
+ ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
+ }
}
best_y += w;
best_uv += (j & 1) * 3 * uv_w;
- dst_y += dst_stride_y;
+ y_ptr += y_stride;
}
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
for (i = 0; i < uv_w; ++i) {
const int off = i;
+ // Note r, g and b values here are off by W, but a constant offset on all
+ // 3 components doesn't change the value of u and v with a YCbCr matrix.
const int r = best_uv[off + 0 * uv_w];
const int g = best_uv[off + 1 * uv_w];
const int b = best_uv[off + 2 * uv_w];
- dst_u[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u);
- dst_v[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v);
+ const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
+ const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
+ if (yuv_bit_depth <= 8) {
+ u_ptr[i] = clip_8b(u);
+ v_ptr[i] = clip_8b(v);
+ } else {
+ ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
+ ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
+ }
}
best_uv += 3 * uv_w;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ u_ptr += u_stride;
+ v_ptr += v_stride;
}
return 1;
}
@@ -300,10 +385,11 @@ static void* SafeMalloc(uint64_t nmemb, size_t size) {
#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
- const uint8_t* b_ptr, int step, int rgb_stride,
- uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
- int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
- int width, int height,
+ const uint8_t* b_ptr, int rgb_step, int rgb_stride,
+ int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
+ uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
+ int v_stride, int yuv_bit_depth, int width,
+ int height,
const SharpYuvConversionMatrix* yuv_matrix) {
// we expand the right/bottom border if needed
const int w = (width + 1) & ~1;
@@ -344,19 +430,20 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
fixed_y_t* const src2 = tmp_buffer + 3 * w;
// prepare two rows of input
- ImportOneRow(r_ptr, g_ptr, b_ptr, step, width, src1);
+ ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
+ src1);
if (!is_last_row) {
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
- step, width, src2);
+ rgb_step, rgb_bit_depth, width, src2);
} else {
memcpy(src2, src1, 3 * w * sizeof(*src2));
}
StoreGray(src1, best_y + 0, w);
StoreGray(src2, best_y + w, w);
- UpdateW(src1, target_y, w);
- UpdateW(src2, target_y + w, w);
- UpdateChroma(src1, src2, target_uv, uv_w);
+ UpdateW(src1, target_y, w, rgb_bit_depth);
+ UpdateW(src2, target_y + w, w, rgb_bit_depth);
+ UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth);
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
best_y += 2 * w;
best_uv += 3 * uv_w;
@@ -382,17 +469,20 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
fixed_y_t* const src2 = tmp_buffer + 3 * w;
{
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
- InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
+ InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
+ src1, src2, rgb_bit_depth);
prev_uv = cur_uv;
cur_uv = next_uv;
}
- UpdateW(src1, best_rgb_y + 0 * w, w);
- UpdateW(src2, best_rgb_y + 1 * w, w);
- UpdateChroma(src1, src2, best_rgb_uv, uv_w);
+ UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth);
+ UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth);
+ UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth);
// update two rows of Y and one row of RGB
- diff_y_sum += SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w);
+ diff_y_sum +=
+ SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
+ rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
best_y += 2 * w;
@@ -407,10 +497,11 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
}
prev_diff_y_sum = diff_y_sum;
}
+
// final reconstruction
- ok = ConvertWRGBToYUV(best_y_base, best_uv_base, dst_y, dst_stride_y, dst_u,
- dst_stride_u, dst_v, dst_stride_v, width, height,
- yuv_matrix);
+ ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
+ u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
+ width, height, yuv_matrix);
End:
free(best_y_base);
@@ -444,20 +535,66 @@ void SharpYuvInit(VP8CPUInfo cpu_info_func) {
sharpyuv_last_cpuinfo_used = cpu_info_func;
}
-int SharpYuvConvert(const uint8_t* r_ptr, const uint8_t* g_ptr,
- const uint8_t* b_ptr, int step, int rgb_stride,
- uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
- int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
- int width, int height,
- const SharpYuvConversionMatrix* yuv_matrix) {
+int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
+ const void* b_ptr, int rgb_step, int rgb_stride,
+ int rgb_bit_depth, void* y_ptr, int y_stride,
+ void* u_ptr, int u_stride, void* v_ptr,
+ int v_stride, int yuv_bit_depth, int width,
+ int height, const SharpYuvConversionMatrix* yuv_matrix) {
+ SharpYuvConversionMatrix scaled_matrix;
+ const int rgb_max = (1 << rgb_bit_depth) - 1;
+ const int rgb_round = 1 << (rgb_bit_depth - 1);
+ const int yuv_max = (1 << yuv_bit_depth) - 1;
+ const int sfix = GetPrecisionShift(rgb_bit_depth);
+
if (width < kMinDimensionIterativeConversion ||
- height < kMinDimensionIterativeConversion) {
+ height < kMinDimensionIterativeConversion ||
+ r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
+ u_ptr == NULL || v_ptr == NULL) {
+ return 0;
+ }
+ if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
+ rgb_bit_depth != 16) {
+ return 0;
+ }
+ if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
+ return 0;
+ }
+ if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
+ // Step/stride should be even for uint16_t buffers.
+ return 0;
+ }
+ if (yuv_bit_depth > 8 &&
+ (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
+ // Stride should be even for uint16_t buffers.
return 0;
}
SharpYuvInit(NULL);
- return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, step, rgb_stride, dst_y,
- dst_stride_y, dst_u, dst_stride_u, dst_v,
- dst_stride_v, width, height, yuv_matrix);
+
+ // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
+ // rgb->yuv conversion matrix.
+ if (rgb_bit_depth == yuv_bit_depth) {
+ memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
+ } else {
+ int i;
+ for (i = 0; i < 3; ++i) {
+ scaled_matrix.rgb_to_y[i] =
+ (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
+ scaled_matrix.rgb_to_u[i] =
+ (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
+ scaled_matrix.rgb_to_v[i] =
+ (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
+ }
+ }
+ // Also incorporate precision change scaling.
+ scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
+ scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
+ scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
+
+ return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
+ rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
+ v_ptr, v_stride, yuv_bit_depth, width, height,
+ &scaled_matrix);
}
//------------------------------------------------------------------------------
diff --git a/sharpyuv/sharpyuv.h b/sharpyuv/sharpyuv.h
index d0ef379f..70f9b998 100644
--- a/sharpyuv/sharpyuv.h
+++ b/sharpyuv/sharpyuv.h
@@ -35,15 +35,33 @@ typedef struct {
// Assumes that the image will be upsampled using a bilinear filter. If nearest
// neighbor is used instead, the upsampled image might look worse than with
// standard downsampling.
-// TODO(maryla): add 10 bits support. Add YUV444 to YUV420 conversion.
-// Maybe also add 422 support (it's rarely used in practice, especially for
-// images).
-int SharpYuvConvert(const uint8_t* r_ptr, const uint8_t* g_ptr,
- const uint8_t* b_ptr, int step, int rgb_stride,
- uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
- int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
- int width, int height,
- const SharpYuvConversionMatrix* yuv_matrix);
+// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
+// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
+// rgb_step: distance in bytes between two horizontally adjacent pixels on the
+// r, g and b channels. If rgb_bit_depth is > 8, it should be a
+// multiple of 2.
+// rgb_stride: distance in bytes between two vertically adjacent pixels on the
+// r, g, and b channels. If rgb_bit_depth is > 8, it should be a
+// multiple of 2.
+// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
+// Note: for 10+ bit, input is truncated to 10 bits.
+// TODO(b/194336375): increase precision.
+// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
+// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should
+// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
+// otherwise.
+// y_stride, u_stride, v_stride: distance in bytes between two vertically
+// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
+// should be multiples of 2.
+// width, height: width and height of the image in pixels
+int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
+ int rgb_step, int rgb_stride, int rgb_bit_depth,
+ void* y_ptr, int y_stride, void* u_ptr, int u_stride,
+ void* v_ptr, int v_stride, int yuv_bit_depth, int width,
+ int height, const SharpYuvConversionMatrix* yuv_matrix);
+
+// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
+// support (it's rarely used in practice, especially for images).
#ifdef __cplusplus
} // extern "C"
diff --git a/sharpyuv/sharpyuv_csp.c b/sharpyuv/sharpyuv_csp.c
index 4dc8142c..5334fa64 100644
--- a/sharpyuv/sharpyuv_csp.c
+++ b/sharpyuv/sharpyuv_csp.c
@@ -15,7 +15,7 @@
#include <math.h>
#include <string.h>
-static int ToFixed16(float f) { return (int)round(f * (1 << 16)); }
+static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
SharpYuvConversionMatrix* matrix) {
@@ -25,28 +25,27 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
const float cr = 0.5f / (1.0f - kb);
const float cb = 0.5f / (1.0f - kr);
- const int shift = yuv_color_space->bits - 8;
+ const int shift = yuv_color_space->bit_depth - 8;
- const float denom = (float)((1 << yuv_color_space->bits) - 1);
+ const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
float scale_y = 1.0f;
- float addY = 0.0f;
+ float add_y = 0.0f;
float scale_u = cr;
float scale_v = cb;
float add_uv = (float)(128 << shift);
-
- assert(yuv_color_space->bits >= 8);
+ assert(yuv_color_space->bit_depth >= 8);
if (yuv_color_space->range == kSharpYuvRangeLimited) {
scale_y *= (219 << shift) / denom;
scale_u *= (224 << shift) / denom;
scale_v *= (224 << shift) / denom;
- addY = (float)(16 << shift);
+ add_y = (float)(16 << shift);
}
matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
- matrix->rgb_to_y[3] = ToFixed16(addY);
+ matrix->rgb_to_y[3] = ToFixed16(add_y);
matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
diff --git a/sharpyuv/sharpyuv_csp.h b/sharpyuv/sharpyuv_csp.h
index 153c1156..63c99ef5 100644
--- a/sharpyuv/sharpyuv_csp.h
+++ b/sharpyuv/sharpyuv_csp.h
@@ -30,7 +30,7 @@ typedef struct {
// Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
float kr;
float kb;
- int bits; // Only 8 bit is supported by SharpYuvConvert.
+ int bit_depth; // 8, 10 or 12
SharpYuvRange range;
} SharpYuvColorSpace;
diff --git a/sharpyuv/sharpyuv_dsp.c b/sharpyuv/sharpyuv_dsp.c
index 93f1f01e..956fa7ce 100644
--- a/sharpyuv/sharpyuv_dsp.c
+++ b/sharpyuv/sharpyuv_dsp.c
@@ -21,19 +21,19 @@
//-----------------------------------------------------------------------------
#if !WEBP_NEON_OMIT_C_CODE
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+static uint16_t clip(int v, int max) {
+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
}
static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
+ uint16_t* dst, int len, int bit_depth) {
uint64_t diff = 0;
int i;
+ const int max_y = (1 << bit_depth) - 1;
for (i = 0; i < len; ++i) {
const int diff_y = ref[i] - src[i];
const int new_y = (int)dst[i] + diff_y;
- dst[i] = clip_y(new_y);
+ dst[i] = clip(new_y, max_y);
diff += (uint64_t)abs(diff_y);
}
return diff;
@@ -49,27 +49,28 @@ static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
}
static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
+ const uint16_t* best_y, uint16_t* out,
+ int bit_depth) {
int i;
+ const int max_y = (1 << bit_depth) - 1;
for (i = 0; i < len; ++i, ++A, ++B) {
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
+ out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
+ out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
-#undef MAX_Y
-
//-----------------------------------------------------------------------------
uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
- uint16_t* dst, int len);
+ uint16_t* dst, int len, int bit_depth);
void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
int len);
void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out);
+ const uint16_t* best_y, uint16_t* out,
+ int bit_depth);
extern void InitSharpYuvSSE2(void);
extern void InitSharpYuvNEON(void);
diff --git a/sharpyuv/sharpyuv_dsp.h b/sharpyuv/sharpyuv_dsp.h
index 5cacd27c..e561d8d3 100644
--- a/sharpyuv/sharpyuv_dsp.h
+++ b/sharpyuv/sharpyuv_dsp.h
@@ -17,11 +17,12 @@
#include "src/dsp/cpu.h"
extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
- uint16_t* dst, int len);
+ uint16_t* dst, int len, int bit_depth);
extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
int16_t* dst, int len);
extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out);
+ const uint16_t* best_y, uint16_t* out,
+ int bit_depth);
void SharpYuvInitDsp(VP8CPUInfo cpu_info_func);
diff --git a/sharpyuv/sharpyuv_neon.c b/sharpyuv/sharpyuv_neon.c
index d9ff24af..e15ec8a3 100644
--- a/sharpyuv/sharpyuv_neon.c
+++ b/sharpyuv/sharpyuv_neon.c
@@ -23,16 +23,16 @@ extern void InitSharpYuvNEON(void);
#if defined(WEBP_USE_NEON)
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y_NEON(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+static uint16_t clip_NEON(int v, int max) {
+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
}
static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
+ uint16_t* dst, int len, int bit_depth) {
+ const int max_y = (1 << bit_depth) - 1;
int i;
const int16x8_t zero = vdupq_n_s16(0);
- const int16x8_t max = vdupq_n_s16(MAX_Y);
+ const int16x8_t max = vdupq_n_s16(max_y);
uint64x2_t sum = vdupq_n_u64(0);
uint64_t diff;
@@ -52,7 +52,7 @@ static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
for (; i < len; ++i) {
const int diff_y = ref[i] - src[i];
const int new_y = (int)(dst[i]) + diff_y;
- dst[i] = clip_y_NEON(new_y);
+ dst[i] = clip_NEON(new_y, max_y);
diff += (uint64_t)(abs(diff_y));
}
return diff;
@@ -76,9 +76,11 @@ static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
}
static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
+ const uint16_t* best_y, uint16_t* out,
+ int bit_depth) {
+ const int max_y = (1 << bit_depth) - 1;
int i;
- const int16x8_t max = vdupq_n_s16(MAX_Y);
+ const int16x8_t max = vdupq_n_s16(max_y);
const int16x8_t zero = vdupq_n_s16(0);
for (i = 0; i + 8 <= len; i += 8) {
const int16x8_t a0 = vld1q_s16(A + i + 0);
@@ -112,11 +114,10 @@ static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
const int a0a1b0b1 = a0b1 + a1b0 + 8;
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
- out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
+ out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
+ out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
}
}
-#undef MAX_Y
//------------------------------------------------------------------------------
diff --git a/sharpyuv/sharpyuv_sse2.c b/sharpyuv/sharpyuv_sse2.c
index b52a1910..cfa519dc 100644
--- a/sharpyuv/sharpyuv_sse2.c
+++ b/sharpyuv/sharpyuv_sse2.c
@@ -22,18 +22,18 @@ extern void InitSharpYuvSSE2(void);
#if defined(WEBP_USE_SSE2)
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
+static uint16_t clip_SSE2(int v, int max) {
+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
}
static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
+ uint16_t* dst, int len, int bit_depth) {
+ const int max_y = (1 << bit_depth) - 1;
uint64_t diff = 0;
uint32_t tmp[4];
int i;
const __m128i zero = _mm_setzero_si128();
- const __m128i max = _mm_set1_epi16(MAX_Y);
+ const __m128i max = _mm_set1_epi16(max_y);
const __m128i one = _mm_set1_epi16(1);
__m128i sum = zero;
@@ -55,7 +55,7 @@ static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
for (; i < len; ++i) {
const int diff_y = ref[i] - src[i];
const int new_y = (int)dst[i] + diff_y;
- dst[i] = clip_y(new_y);
+ dst[i] = clip_SSE2(new_y, max_y);
diff += (uint64_t)abs(diff_y);
}
return diff;
@@ -79,10 +79,12 @@ static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
}
static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
+ const uint16_t* best_y, uint16_t* out,
+ int bit_depth) {
+ const int max_y = (1 << bit_depth) - 1;
int i;
const __m128i kCst8 = _mm_set1_epi16(8);
- const __m128i max = _mm_set1_epi16(MAX_Y);
+ const __m128i max = _mm_set1_epi16(max_y);
const __m128i zero = _mm_setzero_si128();
for (i = 0; i + 8 <= len; i += 8) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
@@ -121,11 +123,10 @@ static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
const int a0a1b0b1 = a0b1 + a1b0 + 8;
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
+ out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
+ out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
}
}
-#undef MAX_Y
//------------------------------------------------------------------------------
diff --git a/src/enc/picture_csp_enc.c b/src/enc/picture_csp_enc.c
index 08826331..5e60f5ba 100644
--- a/src/enc/picture_csp_enc.c
+++ b/src/enc/picture_csp_enc.c
@@ -191,10 +191,10 @@ static int PreprocessARGB(const uint8_t* r_ptr,
int step, int rgb_stride,
WebPPicture* const picture) {
const int ok = SharpYuvConvert(
- r_ptr, g_ptr, b_ptr, step, rgb_stride, picture->y, picture->y_stride,
- picture->u, picture->uv_stride, picture->v, picture->uv_stride,
- picture->width, picture->height,
- SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
+ r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8,
+ picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v,
+ picture->uv_stride, /*yuv_bit_depth=*/8, picture->width,
+ picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
if (!ok) {
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}