1 files changed, 57 insertions, 27 deletions
diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c
index dd6885a..549ad26 100644
--- a/src/3rdparty/libwebp/src/enc/quant.c
+++ b/src/3rdparty/libwebp/src/enc/quant.c
@@ -30,8 +30,6 @@
 #define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
                           // power-law modulation. Must be strictly less than 1.
 
-#define I4_PENALTY 14000  // Rate-penalty for quick i4/i16 decision
-
 // number of non-zero coeffs below which we consider the block very flat
 // (and apply a penalty to complex predictions)
 #define FLATNESS_LIMIT_I16 10      // I16 mode
@@ -236,6 +234,8 @@ static int ExpandMatrix(VP8Matrix* const m, int type) {
   return (sum + 8) >> 4;
 }
 
+static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; }
+
 static void SetupMatrices(VP8Encoder* enc) {
   int i;
   const int tlambda_scale =
@@ -245,7 +245,7 @@ static void SetupMatrices(VP8Encoder* enc) {
   for (i = 0; i < num_segments; ++i) {
     VP8SegmentInfo* const m = &enc->dqm_[i];
     const int q = m->quant_;
-    int q4, q16, quv;
+    int q_i4, q_i16, q_uv;
     m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
     m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
 
@@ -255,21 +255,33 @@ static void SetupMatrices(VP8Encoder* enc) {
     m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
     m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
 
-    q4  = ExpandMatrix(&m->y1_, 0);
-    q16 = ExpandMatrix(&m->y2_, 1);
-    quv = ExpandMatrix(&m->uv_, 2);
-
-    m->lambda_i4_          = (3 * q4 * q4) >> 7;
-    m->lambda_i16_         = (3 * q16 * q16);
-    m->lambda_uv_          = (3 * quv * quv) >> 6;
-    m->lambda_mode_        = (1 * q4 * q4) >> 7;
-    m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
-    m->lambda_trellis_i16_ = (q16 * q16) >> 2;
-    m->lambda_trellis_uv_  = (quv *quv) << 1;
-    m->tlambda_            = (tlambda_scale * q4) >> 5;
+    q_i4  = ExpandMatrix(&m->y1_, 0);
+    q_i16 = ExpandMatrix(&m->y2_, 1);
+    q_uv  = ExpandMatrix(&m->uv_, 2);
+
+    m->lambda_i4_          = (3 * q_i4 * q_i4) >> 7;
+    m->lambda_i16_         = (3 * q_i16 * q_i16);
+    m->lambda_uv_          = (3 * q_uv * q_uv) >> 6;
+    m->lambda_mode_        = (1 * q_i4 * q_i4) >> 7;
+    m->lambda_trellis_i4_  = (7 * q_i4 * q_i4) >> 3;
+    m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;
+    m->lambda_trellis_uv_  = (q_uv * q_uv) << 1;
+    m->tlambda_            = (tlambda_scale * q_i4) >> 5;
+
+    // none of these constants should be < 1
+    CheckLambdaValue(&m->lambda_i4_);
+    CheckLambdaValue(&m->lambda_i16_);
+    CheckLambdaValue(&m->lambda_uv_);
+    CheckLambdaValue(&m->lambda_mode_);
+    CheckLambdaValue(&m->lambda_trellis_i4_);
+    CheckLambdaValue(&m->lambda_trellis_i16_);
+    CheckLambdaValue(&m->lambda_trellis_uv_);
+    CheckLambdaValue(&m->tlambda_);
 
     m->min_disto_ = 10 * m->y1_.q_[0];   // quantization-aware min disto
     m->max_edge_  = 0;
+
+    m->i4_penalty_ = 1000 * q_i4 * q_i4;
   }
 }
 
@@ -348,7 +360,12 @@ static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
 
 static void SimplifySegments(VP8Encoder* const enc) {
   int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
-  const int num_segments = enc->segment_hdr_.num_segments_;
+  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+  // explicit check is needed to avoid a spurious warning about 'i' exceeding
+  // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).
+  const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)
+                               ? enc->segment_hdr_.num_segments_
+                               : NUM_MB_SEGMENTS;
   int num_final_segments = 1;
   int s1, s2;
   for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
@@ -1127,19 +1144,29 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
                                   int try_both_modes, int refine_uv_mode,
                                   VP8ModeScore* const rd) {
   score_t best_score = MAX_COST;
-  score_t score_i4 = (score_t)I4_PENALTY;
-  int16_t tmp_levels[16][16];
-  uint8_t modes_i4[16];
   int nz = 0;
   int mode;
   int is_i16 = try_both_modes || (it->mb_->type_ == 1);
 
+  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+  // Some empiric constants, of approximate order of magnitude.
+  const int lambda_d_i16 = 106;
+  const int lambda_d_i4 = 11;
+  const int lambda_d_uv = 120;
+  score_t score_i4 = dqm->i4_penalty_;
+  score_t i4_bit_sum = 0;
+  const score_t bit_limit = it->enc_->mb_header_limit_;
+
   if (is_i16) {   // First, evaluate Intra16 distortion
     int best_mode = -1;
     const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
     for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
       const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
-      const score_t score = VP8SSE16x16(src, ref);
+      const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT
+                          + VP8FixedCostsI16[mode] * lambda_d_i16;
+      if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
+        continue;
+      }
       if (score < best_score) {
         best_mode = mode;
         best_score = score;
@@ -1159,25 +1186,28 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
       int best_i4_mode = -1;
       score_t best_i4_score = MAX_COST;
       const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+      const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
 
       VP8MakeIntra4Preds(it);
       for (mode = 0; mode < NUM_BMODES; ++mode) {
         const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
-        const score_t score = VP8SSE4x4(src, ref);
+        const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
+                            + mode_costs[mode] * lambda_d_i4;
         if (score < best_i4_score) {
           best_i4_mode = mode;
           best_i4_score = score;
         }
       }
-      modes_i4[it->i4_] = best_i4_mode;
+      i4_bit_sum += mode_costs[best_i4_mode];
+      rd->modes_i4[it->i4_] = best_i4_mode;
       score_i4 += best_i4_score;
-      if (score_i4 >= best_score) {
+      if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
         // Intra4 won't be better than Intra16. Bail out and pick Intra16.
         is_i16 = 1;
         break;
       } else {  // reconstruct partial block inside yuv_out2_ buffer
         uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
-        nz |= ReconstructIntra4(it, tmp_levels[it->i4_],
+        nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
                                 src, tmp_dst, best_i4_mode) << it->i4_;
       }
     } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
@@ -1185,8 +1215,7 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
 
   // Final reconstruction, depending on which mode is selected.
   if (!is_i16) {
-    VP8SetIntra4Mode(it, modes_i4);
-    memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
+    VP8SetIntra4Mode(it, rd->modes_i4);
     SwapOut(it);
     best_score = score_i4;
   } else {
@@ -1200,7 +1229,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
     const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
     for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
       const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
-      const score_t score = VP8SSE16x8(src, ref);
+      const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT
+                          + VP8FixedCostsUV[mode] * lambda_d_uv;
       if (score < best_uv_score) {
         best_mode = mode;
         best_uv_score = score;