1 files changed, 578 insertions, 422 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 1b559fad34a..ad30951afa3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -43,6 +43,7 @@
 #include "vp9/encoder/vp9_ethread.h"
 #include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_multi_thread.h"
+#include "vp9/encoder/vp9_partition_models.h"
 #include "vp9/encoder/vp9_pickmode.h"
 #include "vp9/encoder/vp9_rd.h"
 #include "vp9/encoder/vp9_rdopt.h"
@@ -2026,8 +2027,10 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
                             [has_second_ref(mi)]++;
 
         if (has_second_ref(mi)) {
-          counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
-                          [ref0 == GOLDEN_FRAME]++;
+          const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+          const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
+          const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1];
+          counts->comp_ref[ctx][bit]++;
         } else {
           counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
                             [ref0 != LAST_FRAME]++;
@@ -3039,22 +3042,6 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
 }
 #endif
 
-#define NN_MAX_HIDDEN_LAYERS 10
-#define NN_MAX_NODES_PER_LAYER 128
-
-// Neural net model config.
-typedef struct {
-  int num_inputs;         // Number of input nodes, i.e. features.
-  int num_outputs;        // Number of output nodes.
-  int num_hidden_layers;  // Number of hidden layers, maximum 10.
-  // Number of nodes for each hidden layer.
-  int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
-  // Weight parameters, indexed by layer.
-  const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
-  // Bias parameters, indexed by layer.
-  const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
-} NN_CONFIG;
-
 // Calculate prediction based on the given input features and neural net config.
 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
 // layer.
@@ -3102,164 +3089,7 @@ static void nn_predict(const float *features, const NN_CONFIG *nn_config,
   }
 }
 
-static const float partition_nn_weights_64x64_layer0[7 * 8] = {
-  -3.571348f, 0.014835f,  -3.255393f, -0.098090f, -0.013120f, 0.000221f,
-  0.056273f,  0.190179f,  -0.268130f, -1.828242f, -0.010655f, 0.937244f,
-  -0.435120f, 0.512125f,  1.610679f,  0.190816f,  -0.799075f, -0.377348f,
-  -0.144232f, 0.614383f,  -0.980388f, 1.754150f,  -0.185603f, -0.061854f,
-  -0.807172f, 1.240177f,  1.419531f,  -0.438544f, -5.980774f, 0.139045f,
-  -0.032359f, -0.068887f, -1.237918f, 0.115706f,  0.003164f,  2.924212f,
-  1.246838f,  -0.035833f, 0.810011f,  -0.805894f, 0.010966f,  0.076463f,
-  -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f,
-  -0.168961f, -3.326450f, -2.731094f, 0.002518f,  0.018840f,  -1.656815f,
-  0.068039f,  0.010586f,
-};
-
-static const float partition_nn_bias_64x64_layer0[8] = {
-  -3.469882f, 0.683989f, 0.194010f,  0.313782f,
-  -3.153335f, 2.245849f, -1.946190f, -3.740020f,
-};
-
-static const float partition_nn_weights_64x64_layer1[8] = {
-  -8.058566f, 0.108306f, -0.280620f, -0.818823f,
-  -6.445117f, 0.865364f, -1.127127f, -8.808660f,
-};
-
-static const float partition_nn_bias_64x64_layer1[1] = {
-  6.46909416f,
-};
-
-static const NN_CONFIG partition_nnconfig_64x64 = {
-  7,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  {
-      partition_nn_weights_64x64_layer0,
-      partition_nn_weights_64x64_layer1,
-  },
-  {
-      partition_nn_bias_64x64_layer0,
-      partition_nn_bias_64x64_layer1,
-  },
-};
-
-static const float partition_nn_weights_32x32_layer0[7 * 8] = {
-  -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f,  0.027221f,
-  -0.039137f, -0.907724f, -3.151662f, 0.007106f,  0.018726f,  -0.534928f,
-  0.022744f,  0.000159f,  -1.717189f, -3.229031f, -0.027311f, 0.269863f,
-  -0.400747f, -0.394366f, -0.108878f, 0.603027f,  0.455369f,  -0.197170f,
-  1.241746f,  -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f,
-  -0.138347f, -0.030754f, -0.200774f, 0.453795f,  0.055625f,  -3.163116f,
-  -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f,  -0.036439f,
-  -0.801228f, 0.313409f,  -0.159942f, 0.031267f,  0.886454f,  -1.531644f,
-  -0.089655f, 0.037683f,  -0.163441f, -0.130454f, -0.058344f, 0.060011f,
-  0.275387f,  1.552226f,
-};
-
-static const float partition_nn_bias_32x32_layer0[8] = {
-  -0.838372f, -2.609089f, -0.055763f, 1.329485f,
-  -1.297638f, -2.636622f, -0.826909f, 1.012644f,
-};
-
-static const float partition_nn_weights_32x32_layer1[8] = {
-  -1.792632f, -7.322353f, -0.683386f, 0.676564f,
-  -1.488118f, -7.527719f, 1.240163f,  0.614309f,
-};
-
-static const float partition_nn_bias_32x32_layer1[1] = {
-  4.97422546f,
-};
-
-static const NN_CONFIG partition_nnconfig_32x32 = {
-  7,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  {
-      partition_nn_weights_32x32_layer0,
-      partition_nn_weights_32x32_layer1,
-  },
-  {
-      partition_nn_bias_32x32_layer0,
-      partition_nn_bias_32x32_layer1,
-  },
-};
-
-static const float partition_nn_weights_16x16_layer0[7 * 8] = {
-  -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f,
-  0.130891f,  -3.096753f, 0.174968f,  -0.188769f, -0.640796f, 1.305661f,
-  1.700638f,  -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f,
-  -0.148617f, 0.172733f,  -0.018619f, 2.152595f,  0.778405f,  -0.156455f,
-  0.612995f,  -0.467878f, 0.152022f,  -0.236183f, 0.339635f,  -0.087119f,
-  -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f,  -0.793705f,
-  -6.399260f, 0.010624f,  -0.064199f, -0.650621f, 0.338087f,  -0.001531f,
-  1.023655f,  -3.700272f, -0.055281f, -0.386884f, 0.375504f,  -0.898678f,
-  0.281156f,  -0.314611f, 0.863354f,  -0.040582f, -0.145019f, 0.029329f,
-  -2.197880f, -0.108733f,
-};
-
-static const float partition_nn_bias_16x16_layer0[8] = {
-  0.411516f,  -2.143737f, -3.693192f, 2.123142f,
-  -1.356910f, -3.561016f, -0.765045f, -2.417082f,
-};
-
-static const float partition_nn_weights_16x16_layer1[8] = {
-  -0.619755f, -2.202391f, -4.337171f, 0.611319f,
-  0.377677f,  -4.998723f, -1.052235f, 1.949922f,
-};
-
-static const float partition_nn_bias_16x16_layer1[1] = {
-  3.20981717f,
-};
-
-static const NN_CONFIG partition_nnconfig_16x16 = {
-  7,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  {
-      partition_nn_weights_16x16_layer0,
-      partition_nn_weights_16x16_layer1,
-  },
-  {
-      partition_nn_bias_16x16_layer0,
-      partition_nn_bias_16x16_layer1,
-  },
-};
-
-static const float partition_feature_mean[24] = {
-  303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,
-  689.413511f,    162.027012f,     1.478213f,  0.0,
-  135382.260230f, 912738.513263f,  28.845217f, 1.515230f,
-  544.158492f,    131.807995f,     1.436863f,  0.0f,
-  43682.377587f,  208131.711766f,  28.084737f, 1.356677f,
-  138.254122f,    119.522553f,     1.252322f,  0.0f,
-};
-
-static const float partition_feature_std[24] = {
-  673689.212982f, 5996652.516628f, 0.024449f, 1.989792f,
-  985.880847f,    0.014638f,       2.001898f, 0.0f,
-  208798.775332f, 1812548.443284f, 0.018693f, 1.838009f,
-  396.986910f,    0.015657f,       1.332541f, 0.0f,
-  55888.847031f,  448587.962714f,  0.017900f, 1.904776f,
-  98.652832f,     0.016598f,       1.320992f, 0.0f,
-};
-
-// Error tolerance: 0.01%-0.0.05%-0.1%
-static const float partition_linear_weights[24] = {
-  0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f,
-  0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f,
-  0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f,
-  0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f,
-};
-
+#define FEATURES 7
 // Machine-learning based partition search early termination.
 // Return 1 to skip split and rect partitions.
 static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -3280,7 +3110,7 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   const NN_CONFIG *nn_config = NULL;
   const float *mean, *sd, *linear_weights;
   float nn_score, linear_score;
-  float features[7];
+  float features[FEATURES];
 
   assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
   vpx_clear_system_state();
@@ -3288,15 +3118,15 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   switch (bsize) {
     case BLOCK_64X64:
       offset = 0;
-      nn_config = &partition_nnconfig_64x64;
+      nn_config = &vp9_partition_nnconfig_64x64;
       break;
     case BLOCK_32X32:
       offset = 8;
-      nn_config = &partition_nnconfig_32x32;
+      nn_config = &vp9_partition_nnconfig_32x32;
       break;
     case BLOCK_16X16:
       offset = 16;
-      nn_config = &partition_nnconfig_16x16;
+      nn_config = &vp9_partition_nnconfig_16x16;
       break;
     default: assert(0 && "Unexpected block size."); return 0;
   }
@@ -3325,8 +3155,8 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
       last_par = 1;
   }
 
-  mean = &partition_feature_mean[offset];
-  sd = &partition_feature_std[offset];
+  mean = &vp9_partition_feature_mean[offset];
+  sd = &vp9_partition_feature_std[offset];
   features[0] = ((float)ctx->rate - mean[0]) / sd[0];
   features[1] = ((float)ctx->dist - mean[1]) / sd[1];
   features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
@@ -3336,9 +3166,10 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   features[6] = ((float)last_par - mean[6]) * sd[6];
 
   // Predict using linear model.
-  linear_weights = &partition_linear_weights[offset];
-  linear_score = linear_weights[7];
-  for (i = 0; i < 7; ++i) linear_score += linear_weights[i] * features[i];
+  linear_weights = &vp9_partition_linear_weights[offset];
+  linear_score = linear_weights[FEATURES];
+  for (i = 0; i < FEATURES; ++i)
+    linear_score += linear_weights[i] * features[i];
   if (linear_score > 0.1f) return 0;
 
   // Predict using neural net model.
@@ -3348,210 +3179,9 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   if (nn_score < -0.0f && linear_score < 0.1f) return 1;
   return 0;
 }
+#undef FEATURES
 
 #define FEATURES 4
-#define Q_CTX 3
-#define RESOLUTION_CTX 2
-static const float partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX]
-                                                [FEATURES + 1] = {
-                                                  {
-                                                      {
-                                                          -0.016673f,
-                                                          -0.001025f,
-                                                          -0.000032f,
-                                                          0.000833f,
-                                                          1.94261885f - 2.1f,
-                                                      },
-                                                      {
-                                                          -0.160867f,
-                                                          -0.002101f,
-                                                          0.000011f,
-                                                          0.002448f,
-                                                          1.65738142f - 2.5f,
-                                                      },
-                                                      {
-                                                          -0.628934f,
-                                                          -0.011459f,
-                                                          -0.000009f,
-                                                          0.013833f,
-                                                          1.47982645f - 1.6f,
-                                                      },
-                                                  },
-                                                  {
-                                                      {
-                                                          -0.064309f,
-                                                          -0.006121f,
-                                                          0.000232f,
-                                                          0.005778f,
-                                                          0.7989465f - 5.0f,
-                                                      },
-                                                      {
-                                                          -0.314957f,
-                                                          -0.009346f,
-                                                          -0.000225f,
-                                                          0.010072f,
-                                                          2.80695581f - 5.5f,
-                                                      },
-                                                      {
-                                                          -0.635535f,
-                                                          -0.015135f,
-                                                          0.000091f,
-                                                          0.015247f,
-                                                          2.90381241f - 5.0f,
-                                                      },
-                                                  },
-                                                };
-
-static const float partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX]
-                                                [FEATURES + 1] = {
-                                                  {
-                                                      {
-                                                          -0.010554f,
-                                                          -0.003081f,
-                                                          -0.000134f,
-                                                          0.004491f,
-                                                          1.68445992f - 3.5f,
-                                                      },
-                                                      {
-                                                          -0.051489f,
-                                                          -0.007609f,
-                                                          0.000016f,
-                                                          0.009792f,
-                                                          1.28089404f - 2.5f,
-                                                      },
-                                                      {
-                                                          -0.163097f,
-                                                          -0.013081f,
-                                                          0.000022f,
-                                                          0.019006f,
-                                                          1.36129403f - 3.2f,
-                                                      },
-                                                  },
-                                                  {
-                                                      {
-                                                          -0.024629f,
-                                                          -0.006492f,
-                                                          -0.000254f,
-                                                          0.004895f,
-                                                          1.27919173f - 4.5f,
-                                                      },
-                                                      {
-                                                          -0.083936f,
-                                                          -0.009827f,
-                                                          -0.000200f,
-                                                          0.010399f,
-                                                          2.73731065f - 4.5f,
-                                                      },
-                                                      {
-                                                          -0.279052f,
-                                                          -0.013334f,
-                                                          0.000289f,
-                                                          0.023203f,
-                                                          2.43595719f - 3.5f,
-                                                      },
-                                                  },
-                                                };
-
-static const float partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX]
-                                                [FEATURES + 1] = {
-                                                  {
-                                                      {
-                                                          -0.013154f,
-                                                          -0.002404f,
-                                                          -0.000977f,
-                                                          0.008450f,
-                                                          2.57404566f - 5.5f,
-                                                      },
-                                                      {
-                                                          -0.019146f,
-                                                          -0.004018f,
-                                                          0.000064f,
-                                                          0.008187f,
-                                                          2.15043926f - 2.5f,
-                                                      },
-                                                      {
-                                                          -0.075755f,
-                                                          -0.010858f,
-                                                          0.000030f,
-                                                          0.024505f,
-                                                          2.06848121f - 2.5f,
-                                                      },
-                                                  },
-                                                  {
-                                                      {
-                                                          -0.007636f,
-                                                          -0.002751f,
-                                                          -0.000682f,
-                                                          0.005968f,
-                                                          0.19225763f - 4.5f,
-                                                      },
-                                                      {
-                                                          -0.047306f,
-                                                          -0.009113f,
-                                                          -0.000518f,
-                                                          0.016007f,
-                                                          2.61068869f - 4.0f,
-                                                      },
-                                                      {
-                                                          -0.069336f,
-                                                          -0.010448f,
-                                                          -0.001120f,
-                                                          0.023083f,
-                                                          1.47591054f - 5.5f,
-                                                      },
-                                                  },
-                                                };
-
-static const float partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX]
-                                               [FEATURES + 1] = {
-                                                 {
-                                                     {
-                                                         -0.011807f,
-                                                         -0.009873f,
-                                                         -0.000931f,
-                                                         0.034768f,
-                                                         1.32254851f - 2.0f,
-                                                     },
-                                                     {
-                                                         -0.003861f,
-                                                         -0.002701f,
-                                                         0.000100f,
-                                                         0.013876f,
-                                                         1.96755111f - 1.5f,
-                                                     },
-                                                     {
-                                                         -0.013522f,
-                                                         -0.008677f,
-                                                         -0.000562f,
-                                                         0.034468f,
-                                                         1.53440356f - 1.5f,
-                                                     },
-                                                 },
-                                                 {
-                                                     {
-                                                         -0.003221f,
-                                                         -0.002125f,
-                                                         0.000993f,
-                                                         0.012768f,
-                                                         0.03541421f - 2.0f,
-                                                     },
-                                                     {
-                                                         -0.006069f,
-                                                         -0.007335f,
-                                                         0.000229f,
-                                                         0.026104f,
-                                                         0.17135315f - 1.5f,
-                                                     },
-                                                     {
-                                                         -0.039894f,
-                                                         -0.011419f,
-                                                         0.000070f,
-                                                         0.061817f,
-                                                         0.6739977f - 1.5f,
-                                                     },
-                                                 },
-                                               };
-
 // ML-based partition search breakout.
 static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
                                const MACROBLOCK *const x,
@@ -3568,16 +3198,16 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
 
   switch (bsize) {
     case BLOCK_64X64:
-      linear_weights = partition_breakout_weights_64[resolution_ctx][q_ctx];
+      linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx];
       break;
     case BLOCK_32X32:
-      linear_weights = partition_breakout_weights_32[resolution_ctx][q_ctx];
+      linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx];
       break;
     case BLOCK_16X16:
-      linear_weights = partition_breakout_weights_16[resolution_ctx][q_ctx];
+      linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx];
       break;
     case BLOCK_8X8:
-      linear_weights = partition_breakout_weights_8[resolution_ctx][q_ctx];
+      linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx];
       break;
     default: assert(0 && "Unexpected block size."); return 0;
   }
@@ -3619,8 +3249,283 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
   return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
 }
 #undef FEATURES
-#undef Q_CTX
-#undef RESOLUTION_CTX
+
+#define FEATURES 17
+#define LABELS 4
+static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
+                                    BLOCK_SIZE bsize,
+                                    const PC_TREE *const pc_tree,
+                                    int *allow_horz, int *allow_vert,
+                                    int64_t ref_rd, int mi_row, int mi_col) {
+  const NN_CONFIG *nn_config = NULL;
+  float score[LABELS] = {
+    0.0f,
+  };
+  int thresh = -1;
+  int i;
+
+  if (ref_rd <= 0 || ref_rd > 1000000000) return;
+
+  switch (bsize) {
+    case BLOCK_8X8: break;
+    case BLOCK_16X16:
+      nn_config = &vp9_rect_part_nnconfig_16;
+      thresh = cpi->sf.ml_prune_rect_partition_threhold[1];
+      break;
+    case BLOCK_32X32:
+      nn_config = &vp9_rect_part_nnconfig_32;
+      thresh = cpi->sf.ml_prune_rect_partition_threhold[2];
+      break;
+    case BLOCK_64X64:
+      nn_config = &vp9_rect_part_nnconfig_64;
+      thresh = cpi->sf.ml_prune_rect_partition_threhold[3];
+      break;
+    default: assert(0 && "Unexpected block size."); return;
+  }
+  if (!nn_config || thresh < 0) return;
+
+  // Feature extraction and model score calculation.
+  {
+    const int64_t none_rdcost = pc_tree->none.rdcost;
+    const VP9_COMMON *const cm = &cpi->common;
+    const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+    int feature_index = 0;
+    unsigned int block_var = 0;
+    unsigned int sub_block_var[4] = { 0 };
+    float features[FEATURES];
+
+    features[feature_index++] =
+        (float)(pc_tree->partitioning == PARTITION_NONE);
+    features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+
+    // Calculate source pixel variance.
+    {
+      struct buf_2d buf;
+      const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+      const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+      const MACROBLOCKD *const xd = &x->e_mbd;
+      vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
+      (void)xd;
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
+                                                       bsize, xd->bd);
+      } else {
+        block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+      }
+#else
+      block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+      buf.stride = x->plane[0].src.stride;
+      for (i = 0; i < 4; ++i) {
+        const int x_idx = (i & 1) * bs / 2;
+        const int y_idx = (i >> 1) * bs / 2;
+        buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          sub_block_var[i] =
+              vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
+        } else {
+          sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
+        }
+#else
+        sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      }
+    }
+
+    features[feature_index++] = logf((float)block_var + 1.0f);
+    features[feature_index++] = logf((float)ref_rd + 1.0f);
+    features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000)
+                                    ? (float)pc_tree->none.skippable
+                                    : 0.0f;
+
+    for (i = 0; i < 4; ++i) {
+      const int64_t this_rd = pc_tree->split[i]->none.rdcost;
+      const int rd_valid = this_rd > 0 && this_rd < 1000000000;
+      // Ratio between sub-block RD and whole block RD.
+      features[feature_index++] =
+          rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f;
+      // Sub-block skippable.
+      features[feature_index++] =
+          rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f;
+    }
+
+    {
+      const float denom = (float)(block_var + 1);
+      const float low_b = 0.1f;
+      const float high_b = 10.0f;
+      for (i = 0; i < 4; ++i) {
+        // Ratio between the quarter sub-block variance and the
+        // whole-block variance.
+        float var_ratio = (float)(sub_block_var[i] + 1) / denom;
+        if (var_ratio < low_b) var_ratio = low_b;
+        if (var_ratio > high_b) var_ratio = high_b;
+        features[feature_index++] = var_ratio;
+      }
+    }
+    assert(feature_index == FEATURES);
+    nn_predict(features, nn_config, score);
+  }
+
+  // Make decisions based on the model score.
+  {
+    int max_score = -1000;
+    int horz = 0, vert = 0;
+    int int_score[LABELS];
+    for (i = 0; i < LABELS; ++i) {
+      int_score[i] = (int)(100 * score[i]);
+      max_score = VPXMAX(int_score[i], max_score);
+    }
+    thresh = max_score - thresh;
+    for (i = 0; i < LABELS; ++i) {
+      if (int_score[i] >= thresh) {
+        if ((i >> 0) & 1) horz = 1;
+        if ((i >> 1) & 1) vert = 1;
+      }
+    }
+    *allow_horz = *allow_horz && horz;
+    *allow_vert = *allow_vert && vert;
+  }
+}
+#undef FEATURES
+#undef LABELS
+
+// Use a neural net model to prune partition-none and partition-split search.
+// The model uses prediction residue variance and quantization step size as
+// input features.
+#define FEATURES 6
+static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+                                          BLOCK_SIZE bsize, int mi_row,
+                                          int mi_col, int *none, int *split) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *xd = &x->e_mbd;
+  MODE_INFO *mi = xd->mi[0];
+  const NN_CONFIG *nn_config = NULL;
+  DECLARE_ALIGNED(16, uint8_t, pred_buf[64 * 64]);
+  int i;
+  float thresh_low = -1.0f;
+  float thresh_high = 0.0f;
+
+  switch (bsize) {
+    case BLOCK_64X64:
+      nn_config = &vp9_var_rd_part_nnconfig_64;
+      thresh_low = -3.0f;
+      thresh_high = 3.0f;
+      break;
+    case BLOCK_32X32:
+      nn_config = &vp9_var_rd_part_nnconfig_32;
+      thresh_low = -3.0;
+      thresh_high = 3.0f;
+      break;
+    case BLOCK_16X16:
+      nn_config = &vp9_var_rd_part_nnconfig_16;
+      thresh_low = -4.0;
+      thresh_high = 4.0f;
+      break;
+    case BLOCK_8X8:
+      nn_config = &vp9_var_rd_part_nnconfig_8;
+      thresh_low = -2.0;
+      thresh_high = 2.0f;
+      break;
+    default: assert(0 && "Unexpected block size."); return;
+  }
+
+  if (!nn_config) return;
+
+  mi->ref_frame[1] = NONE;
+  mi->sb_type = bsize;
+  // Do a simple single motion search to find a prediction for current block.
+  // The variance of the residue will be used as input features.
+  {
+    const MV_REFERENCE_FRAME ref =
+        cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
+    YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
+    MV ref_mv = { 0, 0 };
+    MV ref_mv_full = { 0, 0 };
+    const int step_param = 1;
+    const MvLimits tmp_mv_limits = x->mv_limits;
+    const SEARCH_METHODS search_method = NSTEP;
+    const int sadpb = x->sadperbit16;
+    MV best_mv = { 0, 0 };
+    int cost_list[5];
+
+    assert(yv12 != NULL);
+    if (!yv12) return;
+    vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                         &cm->frame_refs[ref - 1].sf);
+    mi->ref_frame[0] = ref;
+    vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
+    vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
+                          search_method, sadpb, cond_cost_list(cpi, cost_list),
+                          &ref_mv, &best_mv, 0, 0);
+    best_mv.row *= 8;
+    best_mv.col *= 8;
+    x->mv_limits = tmp_mv_limits;
+    mi->mv[0].as_mv = best_mv;
+
+    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+    xd->plane[0].dst.buf = pred_buf;
+    xd->plane[0].dst.stride = 64;
+    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+  }
+
+  vpx_clear_system_state();
+
+  {
+    float features[FEATURES] = { 0.0f };
+    const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+    int feature_idx = 0;
+    float score;
+
+    // Generate model input features.
+    features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+    vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+    // Get the variance of the residue as input features.
+    {
+      const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+      const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+      const uint8_t *pred = pred_buf;
+      const uint8_t *src = x->plane[0].src.buf;
+      const int src_stride = x->plane[0].src.stride;
+      const int pred_stride = 64;
+      unsigned int sse;
+      // Variance of whole block.
+      const unsigned int var =
+          cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+      features[feature_idx++] = logf((float)var + 1.0f);
+      for (i = 0; i < 4; ++i) {
+        const int x_idx = (i & 1) * bs / 2;
+        const int y_idx = (i >> 1) * bs / 2;
+        const int src_offset = y_idx * src_stride + x_idx;
+        const int pred_offset = y_idx * pred_stride + x_idx;
+        // Variance of quarter block.
+        const unsigned int sub_var =
+            cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+                                    pred + pred_offset, pred_stride, &sse);
+        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+        features[feature_idx++] = var_ratio;
+      }
+    }
+    assert(feature_idx == FEATURES);
+
+    // Feed the features into the model to get the confidence score.
+    nn_predict(features, nn_config, &score);
+
+    // Higher score means that the model has higher confidence that the split
+    // partition is better than the non-split partition. So if the score is
+    // high enough, we skip the none-split partition search; if the score is
+    // low enough, we skip the split partition search.
+    if (score > thresh_high) *none = 0;
+    if (score < thresh_low) *split = 0;
+  }
+}
+#undef FEATURES
+#undef LABELS
 
 int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
                      int orig_rdmult) {
@@ -3687,7 +3592,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
   TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+  PICK_MODE_CONTEXT *const ctx = &pc_tree->none;
   int i;
   const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   BLOCK_SIZE subsize;
@@ -3777,7 +3682,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   }
 
   if (cpi->sf.use_square_partition_only &&
-      bsize > cpi->sf.use_square_only_threshold) {
+      (bsize > cpi->sf.use_square_only_thresh_high ||
+       bsize < cpi->sf.use_square_only_thresh_low)) {
     if (cpi->use_svc) {
       if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
         partition_horz_allowed &= force_horz_split;
@@ -3850,10 +3756,28 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   }
 #endif
 
+  pc_tree->partitioning = PARTITION_NONE;
+
+  if (cpi->sf.ml_var_partition_pruning) {
+    int do_ml_var_partition_pruning =
+        !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
+        mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
+        mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      do_ml_var_partition_pruning = 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    if (do_ml_var_partition_pruning) {
+      ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
+                                    &partition_none_allowed, &do_split);
+    }
+  }
+
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
                      best_rdc.rdcost);
+    ctx->rdcost = this_rdc.rdcost;
     if (this_rdc.rate != INT_MAX) {
       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
         const int ref1 = ctx->mic.ref_frame[0];
@@ -3963,6 +3887,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
       }
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+  } else {
+    vp9_zero(ctx->pred_mv);
+    ctx->mic.interp_filter = EIGHTTAP;
   }
 
   // store estimated motion vector
@@ -3979,6 +3906,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   // PARTITION_SPLIT
   // TODO(jingning): use the motion vectors given by the above search as
   // the starting point of motion search in the following partition type check.
+  pc_tree->split[0]->none.rdcost = 0;
+  pc_tree->split[1]->none.rdcost = 0;
+  pc_tree->split[2]->none.rdcost = 0;
+  pc_tree->split[3]->none.rdcost = 0;
   if (do_split || must_split) {
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     load_pred_mv(x, ctx);
@@ -4063,9 +3994,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      if ((cpi->sf.less_rectangular_check) &&
-          ((bsize > cpi->sf.use_square_only_threshold) ||
-           (best_rdc.dist < dist_breakout_thr)))
+      if (cpi->sf.less_rectangular_check &&
+          (bsize > cpi->sf.use_square_only_thresh_high ||
+           best_rdc.dist < dist_breakout_thr))
         do_rect &= !partition_none_allowed;
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -4087,6 +4018,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
     if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
   }
 
+  {
+    int do_ml_rect_partition_pruning =
+        !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
+        (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      do_ml_rect_partition_pruning = 0;
+#endif
+    if (do_ml_rect_partition_pruning) {
+      ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
+                              &partition_vert_allowed, best_rdc.rdcost, mi_row,
+                              mi_col);
+    }
+  }
+
   // PARTITION_HORZ
   if (partition_horz_allowed &&
       (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
@@ -4130,8 +4076,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
       best_rdc = sum_rdc;
       pc_tree->partitioning = PARTITION_HORZ;
 
-      if ((cpi->sf.less_rectangular_check) &&
-          (bsize > cpi->sf.use_square_only_threshold))
+      if (cpi->sf.less_rectangular_check &&
+          bsize > cpi->sf.use_square_only_thresh_high)
         do_rect = 0;
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -4300,6 +4246,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
                                 &x->min_partition_size, &x->max_partition_size);
       }
+      td->pc_root->none.rdcost = 0;
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rdc, INT64_MAX, td->pc_root);
     }
@@ -4550,6 +4497,83 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
   }
 }
 
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+#define LABELS 2
+static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+                                      BLOCK_SIZE bsize, int mi_row,
+                                      int mi_col) {
+  VP9_COMMON *const cm = &cpi->common;
+  const NN_CONFIG *nn_config = NULL;
+  float thresh_low = -0.2f;
+  float thresh_high = 0.0f;
+
+  switch (bsize) {
+    case BLOCK_64X64:
+      nn_config = &vp9_var_part_nnconfig_64;
+      thresh_low = -0.3f;
+      thresh_high = -0.1f;
+      break;
+    case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
+    case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
+    case BLOCK_8X8: break;
+    default: assert(0 && "Unexpected block size."); return -1;
+  }
+
+  if (!nn_config) return -1;
+
+  vpx_clear_system_state();
+
+  {
+    float features[FEATURES] = { 0.0f };
+    const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+    int feature_idx = 0;
+    float score[LABELS];
+
+    features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+    vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+    {
+      const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+      const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+      const int sb_offset_row = 8 * (mi_row & 7);
+      const int sb_offset_col = 8 * (mi_col & 7);
+      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
+      const uint8_t *src = x->plane[0].src.buf;
+      const int src_stride = x->plane[0].src.stride;
+      const int pred_stride = 64;
+      unsigned int sse;
+      int i;
+      // Variance of whole block.
+      const unsigned int var =
+          cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+      features[feature_idx++] = logf((float)var + 1.0f);
+      for (i = 0; i < 4; ++i) {
+        const int x_idx = (i & 1) * bs / 2;
+        const int y_idx = (i >> 1) * bs / 2;
+        const int src_offset = y_idx * src_stride + x_idx;
+        const int pred_offset = y_idx * pred_stride + x_idx;
+        // Variance of quarter block.
+        const unsigned int sub_var =
+            cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+                                    pred + pred_offset, pred_stride, &sse);
+        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+        features[feature_idx++] = var_ratio;
+      }
+    }
+
+    assert(feature_idx == FEATURES);
+    nn_predict(features, nn_config, score);
+    if (score[0] > thresh_high) return 3;
+    if (score[0] < thresh_low) return 0;
+    return -1;
+  }
+}
+#undef FEATURES
+#undef LABELS
+#endif  // CONFIG_ML_VAR_PARTITION
+
 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
                                  TileDataEnc *tile_data, TOKENEXTRA **tp,
                                  int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4579,6 +4603,11 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
   int partition_vert_allowed =
       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+#if CONFIG_ML_VAR_PARTITION
+  const int use_ml_based_partitioning =
+      sf->partition_search_type == ML_BASED_PARTITION;
+#endif  // CONFIG_ML_VAR_PARTITION
+
   (void)*tp_orig;
 
   // Avoid checking for rectangular partitions for speed >= 6.
@@ -4609,6 +4638,20 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
     partition_vert_allowed &= force_vert_split;
   }
 
+#if CONFIG_ML_VAR_PARTITION
+  if (use_ml_based_partitioning) {
+    if (partition_none_allowed || do_split) do_rect = 0;
+    if (partition_none_allowed && do_split) {
+      const int ml_predicted_partition =
+          ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
+      if (ml_predicted_partition == 0) do_split = 0;
+      if (ml_predicted_partition == 3) partition_none_allowed = 0;
+    }
+  }
+#endif  // CONFIG_ML_VAR_PARTITION
+
+  if (!partition_none_allowed && !do_split) do_rect = 1;
+
   ctx->pred_pixel_ready =
       !(partition_vert_allowed || partition_horz_allowed || do_split);
 
@@ -4622,26 +4665,28 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
     ctx->skip = x->skip;
 
     if (this_rdc.rate != INT_MAX) {
-      int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+      const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
       this_rdc.rdcost =
           RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
       if (this_rdc.rdcost < best_rdc.rdcost) {
-        int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
-        int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
-
-        dist_breakout_thr >>=
-            8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
-        rate_breakout_thr *= num_pels_log2_lookup[bsize];
-
         best_rdc = this_rdc;
         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
 
-        if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
-            this_rdc.dist < dist_breakout_thr) {
-          do_split = 0;
-          do_rect = 0;
+#if CONFIG_ML_VAR_PARTITION
+        if (!use_ml_based_partitioning)
+#endif  // CONFIG_ML_VAR_PARTITION
+        {
+          int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
+          int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
+          dist_breakout_thr >>=
+              8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+          rate_breakout_thr *= num_pels_log2_lookup[bsize];
+          if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
+              this_rdc.dist < dist_breakout_thr) {
+            do_split = 0;
+            do_rect = 0;
+          }
         }
       }
     }
@@ -5040,6 +5085,111 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
+#if CONFIG_ML_VAR_PARTITION
+// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
+static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
+                               MACROBLOCK *x, int mi_row, int mi_col) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int is_key_frame = frame_is_intra_only(cm);
+
+  set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+
+  if (!is_key_frame) {
+    MACROBLOCKD *xd = &x->e_mbd;
+    MODE_INFO *mi = xd->mi[0];
+    YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+    const YV12_BUFFER_CONFIG *yv12_g = NULL;
+    const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
+                             (mi_row + 4 < cm->mi_rows);
+    int pixels_wide = 64, pixels_high = 64;
+    unsigned int y_sad_g, y_sad_thr;
+    unsigned int y_sad = UINT_MAX;
+
+    assert(yv12 != NULL);
+
+    if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+    if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+    if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+        cpi->svc.use_gf_temporal_ref_current_layer) {
+      // For now, GOLDEN will not be used for non-zero spatial layers, since
+      // it may not be a temporal reference.
+      yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+    }
+
+    // Only compute y_sad_g (sad for golden reference) for speed < 8.
+    if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
+        (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+      vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+                           &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+      y_sad_g = cpi->fn_ptr[bsize].sdf(
+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+          xd->plane[0].pre[0].stride);
+    } else {
+      y_sad_g = UINT_MAX;
+    }
+
+    if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+        cpi->rc.is_src_frame_alt_ref) {
+      yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
+      vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                           &cm->frame_refs[ALTREF_FRAME - 1].sf);
+      mi->ref_frame[0] = ALTREF_FRAME;
+      y_sad_g = UINT_MAX;
+    } else {
+      vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                           &cm->frame_refs[LAST_FRAME - 1].sf);
+      mi->ref_frame[0] = LAST_FRAME;
+    }
+    mi->ref_frame[1] = NONE;
+    mi->sb_type = BLOCK_64X64;
+    mi->mv[0].as_int = 0;
+    mi->interp_filter = BILINEAR;
+
+    {
+      const MV dummy_mv = { 0, 0 };
+      y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+                                            &dummy_mv);
+      x->sb_use_mv_part = 1;
+      x->sb_mvcol_part = mi->mv[0].as_mv.col;
+      x->sb_mvrow_part = mi->mv[0].as_mv.row;
+    }
+
+    // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+    // are close if short_circuit_low_temp_var is on.
+    y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+    if (y_sad_g < y_sad_thr) {
+      vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+                           &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+      mi->ref_frame[0] = GOLDEN_FRAME;
+      mi->mv[0].as_int = 0;
+      y_sad = y_sad_g;
+    } else {
+      x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+    }
+
+    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+    xd->plane[0].dst.buf = x->est_pred;
+    xd->plane[0].dst.stride = 64;
+    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+  } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+    switch (xd->bd) {
+      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
+      case 10:
+        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
+        break;
+      case 12:
+        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
+        break;
+    }
+#else
+    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
+}
+#endif  // CONFIG_ML_VAR_PARTITION
+
 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
                                 TileDataEnc *tile_data, int mi_row,
                                 TOKENEXTRA **tp) {
@@ -5131,6 +5281,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
         break;
+#if CONFIG_ML_VAR_PARTITION
+      case ML_BASED_PARTITION:
+        get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
+        x->max_partition_size = BLOCK_64X64;
+        x->min_partition_size = BLOCK_8X8;
+        x->sb_pickmode_part = 1;
+        nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
+                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
+                             td->pc_root);
+        break;
+#endif  // CONFIG_ML_VAR_PARTITION
       case SOURCE_VAR_BASED_PARTITION:
         set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
@@ -5685,16 +5846,11 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   // side behavior is where the ALT ref buffer has opposite sign bias to
   // the other two.
   if (!frame_is_intra_only(cm)) {
-    if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
-         cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
-        (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
-         cm->ref_frame_sign_bias[LAST_FRAME])) {
-      cpi->allow_comp_inter_inter = 0;
-    } else {
+    if (vp9_compound_reference_allowed(cm)) {
       cpi->allow_comp_inter_inter = 1;
-      cm->comp_fixed_ref = ALTREF_FRAME;
-      cm->comp_var_ref[0] = LAST_FRAME;
-      cm->comp_var_ref[1] = GOLDEN_FRAME;
+      vp9_setup_compound_reference_mode(cm);
+    } else {
+      cpi->allow_comp_inter_inter = 0;
     }
   }