BASELINE: Update chromium to 45.0.2454.40

Change-Id: Id2121d9f11a8fc633677236c65a3e41feef589e4 Reviewed-by: Andras Becsi <andras.becsi@theqtcompany.com>
author: Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> 2015-08-14 11:38:45 +0200
committer: Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> 2015-08-14 17:16:47 +0000
commit: 3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859 (patch)
tree: 43cc572ba067417c7341db81f71ae7cc6e0fcc3e /chromium/third_party/libvpx
parent: f61ab1ac7f855cd281809255c0aedbb1895e1823 (diff)
download: qtwebengine-chromium-3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859.tar.gz
347 files changed, 22658 insertions, 13118 deletions
diff --git a/chromium/third_party/libvpx/BUILD.gn b/chromium/third_party/libvpx/BUILD.gn
index bb55816b80b..d33761594d9 100644
--- a/chromium/third_party/libvpx/BUILD.gn
+++ b/chromium/third_party/libvpx/BUILD.gn
@@ -89,7 +89,7 @@ static_library("libvpx_intrinsics_sse2") {
   configs += [ ":libvpx_config" ]
   configs -= [ "//build/config/compiler:chromium_code" ]
   configs += [ "//build/config/compiler:no_chromium_code" ]
-  if (!is_win) {
+  if (!is_win || is_clang) {
     cflags = [ "-msse2" ]
   }
   if (current_cpu == "x86") {
@@ -103,7 +103,7 @@ static_library("libvpx_intrinsics_ssse3") {
   configs += [ ":libvpx_config" ]
   configs -= [ "//build/config/compiler:chromium_code" ]
   configs += [ "//build/config/compiler:no_chromium_code" ]
-  if (!is_win) {
+  if (!is_win || is_clang) {
     cflags = [ "-mssse3" ]
   }
   if (current_cpu == "x86") {
@@ -117,7 +117,7 @@ static_library("libvpx_intrinsics_sse4_1") {
   configs += [ ":libvpx_config" ]
   configs -= [ "//build/config/compiler:chromium_code" ]
   configs += [ "//build/config/compiler:no_chromium_code" ]
-  if (!is_win) {
+  if (!is_win || is_clang) {
     cflags = [ "-msse4.1" ]
   }
   if (current_cpu == "x86") {
diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium
index a8025f3705c..af0dcef0703 100644
--- a/chromium/third_party/libvpx/README.chromium
+++ b/chromium/third_party/libvpx/README.chromium
@@ -5,9 +5,9 @@ License: BSD
 License File: source/libvpx/LICENSE
 Security Critical: yes
 
-Date: Monday May 11 2015
+Date: Monday June 29 2015
 Branch: master
-Commit: 6d227137221ea4eead9d81daf3d2f0915560bbff
+Commit: f3a1295cffe62806255bace4abb49c8831b7a61f
 
 Description:
 Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/chromium/third_party/libvpx/codereview.settings b/chromium/third_party/libvpx/codereview.settings
index db0d04cc885..0e7d0e87548 100644
--- a/chromium/third_party/libvpx/codereview.settings
+++ b/chromium/third_party/libvpx/codereview.settings
@@ -1,4 +1,9 @@
-# This file is used by gcl to get repository specific information.
 CODE_REVIEW_SERVER: codereview.chromium.org
+CC_LIST: chromium-reviews@chromium.org
 VIEW_VC: https://chromium.googlesource.com/chromium/deps/libvpx/+/
 STATUS: http://chromium-status.appspot.com/status
+TRY_ON_UPLOAD: False
+TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try
+GITCL_PREUPLOAD: http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/git-cl-upload-hook?revision=HEAD
+GITCL_PREDCOMMIT: http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/git-cl-upload-hook?revision=HEAD
+PROJECT: chromium_deps
diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni
index 39f445180ea..1f94e7ce601 100644
--- a/chromium/third_party/libvpx/libvpx_srcs.gni
+++ b/chromium/third_party/libvpx/libvpx_srcs.gni
@@ -188,6 +188,7 @@ libvpx_srcs_x86 = [
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.c",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.h",
+  "//third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h",
   "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c",
@@ -280,7 +281,10 @@ libvpx_srcs_x86 = [
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h",
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h",
   "//third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c",
   "//third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c",
   "//third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h",
@@ -301,6 +305,7 @@ libvpx_srcs_x86 = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -309,6 +314,7 @@ libvpx_srcs_x86 = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/x86.h",
@@ -338,9 +344,9 @@ libvpx_srcs_x86_assembly = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_ssse3.asm",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm",
@@ -369,30 +375,31 @@ libvpx_srcs_x86_assembly = [
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse3.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse4.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_ssse3.asm",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vpx_ports/emms.asm",
   "//third_party/libvpx/source/libvpx/vpx_ports/x86_abi_support.asm",
 ]
 libvpx_srcs_x86_mmx = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c",
 ]
 libvpx_srcs_x86_sse2 = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/recon_wrapper_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/denoising_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c",
 ]
 libvpx_srcs_x86_sse3 = [
 ]
@@ -410,14 +417,14 @@ libvpx_srcs_x86_avx = [
 libvpx_srcs_x86_avx2 = [
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c",
 ]
 libvpx_srcs_x86_64 = [
   "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -604,6 +611,7 @@ libvpx_srcs_x86_64 = [
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.c",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.h",
+  "//third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h",
   "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c",
@@ -696,7 +704,10 @@ libvpx_srcs_x86_64 = [
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h",
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.h",
+  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h",
   "//third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c",
   "//third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c",
   "//third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h",
@@ -717,6 +728,7 @@ libvpx_srcs_x86_64 = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -725,6 +737,7 @@ libvpx_srcs_x86_64 = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/x86.h",
@@ -755,9 +768,9 @@ libvpx_srcs_x86_64_assembly = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_ssse3.asm",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_mmx.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_sse2.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm",
@@ -791,30 +804,31 @@ libvpx_srcs_x86_64_assembly = [
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse3.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse4.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_ssse3.asm",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm",
   "//third_party/libvpx/source/libvpx/vpx_ports/emms.asm",
   "//third_party/libvpx/source/libvpx/vpx_ports/x86_abi_support.asm",
 ]
 libvpx_srcs_x86_64_mmx = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c",
 ]
 libvpx_srcs_x86_64_sse2 = [
   "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/common/x86/recon_wrapper_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c",
+  "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/denoising_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse2.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c",
 ]
 libvpx_srcs_x86_64_sse3 = [
 ]
@@ -832,14 +846,14 @@ libvpx_srcs_x86_64_avx = [
 libvpx_srcs_x86_64_avx2 = [
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c",
-  "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c",
 ]
 libvpx_srcs_arm = [
   "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1140,6 +1154,7 @@ libvpx_srcs_arm = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1150,6 +1165,7 @@ libvpx_srcs_arm = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1175,15 +1191,13 @@ libvpx_srcs_arm_assembly = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
 ]
 libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1210,7 +1224,6 @@ libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c",
   "//third_party/libvpx/source/libvpx/vp8/common/blockd.c",
@@ -1288,7 +1301,6 @@ libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.h",
@@ -1343,6 +1355,7 @@ libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_loopfilter_neon.c",
+  "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.h",
   "//third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.c",
@@ -1519,7 +1532,9 @@ libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1530,6 +1545,7 @@ libvpx_srcs_arm_neon = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1555,12 +1571,9 @@ libvpx_srcs_arm_neon_assembly = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm",
@@ -1582,6 +1595,7 @@ libvpx_srcs_arm_neon_assembly = [
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_save_reg_neon.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
 ]
 libvpx_srcs_arm_neon_cpu_detect = [
   "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1882,6 +1896,7 @@ libvpx_srcs_arm_neon_cpu_detect = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1892,6 +1907,7 @@ libvpx_srcs_arm_neon_cpu_detect = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1917,15 +1933,13 @@ libvpx_srcs_arm_neon_cpu_detect_assembly = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm",
@@ -1962,19 +1976,18 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_loopfilter_neon.c",
+  "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c",
   "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c",
@@ -1982,6 +1995,7 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [
   "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
 ]
 libvpx_srcs_arm64 = [
   "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -2005,7 +2019,6 @@ libvpx_srcs_arm64 = [
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c",
   "//third_party/libvpx/source/libvpx/vp8/common/blockd.c",
@@ -2083,7 +2096,6 @@ libvpx_srcs_arm64 = [
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
-  "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.c",
   "//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.h",
@@ -2329,7 +2341,9 @@ libvpx_srcs_arm64 = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2340,6 +2354,7 @@ libvpx_srcs_arm64 = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -2645,6 +2660,7 @@ libvpx_srcs_mips = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2653,6 +2669,7 @@ libvpx_srcs_mips = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -2956,6 +2973,7 @@ libvpx_srcs_nacl = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2964,6 +2982,7 @@ libvpx_srcs_nacl = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -3267,6 +3286,7 @@ libvpx_srcs_generic = [
   "//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
   "//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
   "//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+  "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
   "//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
   "//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
   "//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -3275,6 +3295,7 @@ libvpx_srcs_generic = [
   "//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+  "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
   "//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
   "//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
index bd6d32b07b7..1f457e299f6 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
@@ -22,8 +22,6 @@
     '<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -103,7 +101,6 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -325,7 +322,9 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
+    '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -336,6 +335,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
index 31e6fca63f0..5b4acd877ea 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
@@ -26,7 +26,6 @@
     '<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
-    '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
     '<(libvpx_source)/vp8/common/arm/variance_arm.c',
     '<(libvpx_source)/vp8/common/blockd.c',
@@ -104,7 +103,6 @@
     '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
-    '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
     '<(libvpx_source)/vp8/encoder/bitstream.c',
     '<(libvpx_source)/vp8/encoder/bitstream.h',
@@ -350,7 +348,9 @@
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
     '<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+    '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -361,6 +361,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
index 277180640de..81e03e049b2 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
@@ -22,8 +22,6 @@
     '<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -48,7 +46,6 @@
     '<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
-    '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
     '<(libvpx_source)/vp8/common/arm/variance_arm.c',
     '<(libvpx_source)/vp8/common/blockd.c',
@@ -121,7 +118,6 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -129,7 +125,6 @@
     '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
-    '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
     '<(libvpx_source)/vp8/encoder/bitstream.c',
     '<(libvpx_source)/vp8/encoder/bitstream.h',
@@ -200,6 +195,7 @@
     '<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm',
     '<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_neon.c',
     '<(libvpx_source)/vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
+    '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon.c',
     '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm',
     '<(libvpx_source)/vp9/common/arm/neon/vp9_save_reg_neon.asm',
     '<(libvpx_source)/vp9/common/vp9_alloccommon.c',
@@ -379,7 +375,10 @@
     '<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
     '<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
     '<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+    '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
+    '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -390,6 +389,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
index bd6d32b07b7..1f457e299f6 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
@@ -22,8 +22,6 @@
     '<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
-    '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
     '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -103,7 +101,6 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -325,7 +322,9 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
+    '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -336,6 +335,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
index 6b60be6cfe0..b60479e37c5 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
@@ -29,13 +29,11 @@
         '<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
-        '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
         '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c',
         '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
         '<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
         '<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
-        '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
         '<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm',
@@ -58,6 +56,7 @@
         '<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_neon.c',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
+        '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon.c',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm',
         '<(libvpx_source)/vp9/common/arm/neon/vp9_save_reg_neon.asm',
         '<(libvpx_source)/vp9/encoder/arm/neon/vp9_avg_neon.c',
@@ -67,6 +66,7 @@
         '<(libvpx_source)/vp9/encoder/arm/neon/vp9_variance_neon.c',
         '<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
         '<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+        '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
       ],
       'includes': [ 'ads2gas.gypi' ],
       'cflags!': [ '-mfpu=vfpv3-d16' ],
diff --git a/chromium/third_party/libvpx/libvpx_srcs_generic.gypi b/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
index eb808cc1436..71a5c4adcb8 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
@@ -295,6 +295,7 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -303,6 +304,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_mips.gypi b/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
index 8abf1082a11..df3f66fa791 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
@@ -297,6 +297,7 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -305,6 +306,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi b/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
index eb808cc1436..71a5c4adcb8 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
@@ -295,6 +295,7 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -303,6 +304,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
index d61c186aca5..63f05e35f01 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
@@ -85,10 +85,10 @@
     '<(libvpx_source)/vp8/common/x86/subpixel_mmx.asm',
     '<(libvpx_source)/vp8/common/x86/subpixel_sse2.asm',
     '<(libvpx_source)/vp8/common/x86/subpixel_ssse3.asm',
-    '<(libvpx_source)/vp8/common/x86/variance_impl_mmx.asm',
     '<(libvpx_source)/vp8/common/x86/variance_impl_sse2.asm',
     '<(libvpx_source)/vp8/common/x86/variance_impl_ssse3.asm',
     '<(libvpx_source)/vp8/common/x86/vp8_asm_stubs.c',
+    '<(libvpx_source)/vp8/common/x86/vp8_variance_impl_mmx.asm',
     '<(libvpx_source)/vp8/decoder/dboolhuff.c',
     '<(libvpx_source)/vp8/decoder/dboolhuff.h',
     '<(libvpx_source)/vp8/decoder/decodeframe.c',
@@ -216,6 +216,7 @@
     '<(libvpx_source)/vp9/common/vp9_thread_common.h',
     '<(libvpx_source)/vp9/common/vp9_tile_common.c',
     '<(libvpx_source)/vp9/common/vp9_tile_common.h',
+    '<(libvpx_source)/vp9/common/x86/convolve.h',
     '<(libvpx_source)/vp9/common/x86/vp9_asm_stubs.c',
     '<(libvpx_source)/vp9/common/x86/vp9_copy_sse2.asm',
     '<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.h',
@@ -318,8 +319,11 @@
     '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
     '<(libvpx_source)/vp9/encoder/vp9_writer.c',
     '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2_impl.h',
     '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_subtract_sse2.asm',
@@ -344,6 +348,7 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_dsp/x86/sad4d_sse2.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_mmx.asm',
@@ -351,6 +356,7 @@
     '<(libvpx_source)/vpx_dsp/x86/sad_sse3.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_sse4.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_ssse3.asm',
+    '<(libvpx_source)/vpx_dsp/x86/variance_impl_mmx.asm',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
     '<(libvpx_source)/vpx_mem/vpx_mem.h',
@@ -359,6 +365,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_ports/x86.h',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
index c99ebee722a..5072d33fa0c 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
@@ -86,10 +86,10 @@
     '<(libvpx_source)/vp8/common/x86/subpixel_mmx.asm',
     '<(libvpx_source)/vp8/common/x86/subpixel_sse2.asm',
     '<(libvpx_source)/vp8/common/x86/subpixel_ssse3.asm',
-    '<(libvpx_source)/vp8/common/x86/variance_impl_mmx.asm',
     '<(libvpx_source)/vp8/common/x86/variance_impl_sse2.asm',
     '<(libvpx_source)/vp8/common/x86/variance_impl_ssse3.asm',
     '<(libvpx_source)/vp8/common/x86/vp8_asm_stubs.c',
+    '<(libvpx_source)/vp8/common/x86/vp8_variance_impl_mmx.asm',
     '<(libvpx_source)/vp8/decoder/dboolhuff.c',
     '<(libvpx_source)/vp8/decoder/dboolhuff.h',
     '<(libvpx_source)/vp8/decoder/decodeframe.c',
@@ -218,6 +218,7 @@
     '<(libvpx_source)/vp9/common/vp9_thread_common.h',
     '<(libvpx_source)/vp9/common/vp9_tile_common.c',
     '<(libvpx_source)/vp9/common/vp9_tile_common.h',
+    '<(libvpx_source)/vp9/common/x86/convolve.h',
     '<(libvpx_source)/vp9/common/x86/vp9_asm_stubs.c',
     '<(libvpx_source)/vp9/common/x86/vp9_copy_sse2.asm',
     '<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.h',
@@ -321,8 +322,11 @@
     '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
     '<(libvpx_source)/vp9/encoder/vp9_writer.c',
     '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2_impl.h',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
@@ -350,6 +354,7 @@
     '<(libvpx_source)/vpx/vpx_image.h',
     '<(libvpx_source)/vpx/vpx_integer.h',
     '<(libvpx_source)/vpx_dsp/sad.c',
+    '<(libvpx_source)/vpx_dsp/variance.c',
     '<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
     '<(libvpx_source)/vpx_dsp/x86/sad4d_sse2.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_mmx.asm',
@@ -357,6 +362,7 @@
     '<(libvpx_source)/vpx_dsp/x86/sad_sse3.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_sse4.asm',
     '<(libvpx_source)/vpx_dsp/x86/sad_ssse3.asm',
+    '<(libvpx_source)/vpx_dsp/x86/variance_impl_mmx.asm',
     '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
     '<(libvpx_source)/vpx_mem/vpx_mem.c',
     '<(libvpx_source)/vpx_mem/vpx_mem.h',
@@ -365,6 +371,7 @@
     '<(libvpx_source)/vpx_ports/mem.h',
     '<(libvpx_source)/vpx_ports/mem_ops.h',
     '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/msvc.h',
     '<(libvpx_source)/vpx_ports/vpx_once.h',
     '<(libvpx_source)/vpx_ports/vpx_timer.h',
     '<(libvpx_source)/vpx_ports/x86.h',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
index ecf9fb7e810..f8590f576ac 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
@@ -14,8 +14,9 @@
       ],
       'sources': [
         '<(libvpx_source)/vp8/common/x86/idct_blk_mmx.c',
-        '<(libvpx_source)/vp8/common/x86/variance_mmx.c',
+        '<(libvpx_source)/vp8/common/x86/vp8_variance_mmx.c',
         '<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_mmx.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_mmx.c',
       ],
       'cflags': [ '-mmmx', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-mmmx' ] },
@@ -30,19 +31,18 @@
       'sources': [
         '<(libvpx_source)/vp8/common/x86/idct_blk_sse2.c',
         '<(libvpx_source)/vp8/common/x86/recon_wrapper_sse2.c',
-        '<(libvpx_source)/vp8/common/x86/variance_sse2.c',
+        '<(libvpx_source)/vp8/common/x86/vp8_variance_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/denoising_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/quantize_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_avg_intrin_sse2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct_impl_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_denoiser_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_variance_sse2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_sse2.c',
       ],
       'cflags': [ '-msse2', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-msse2' ] },
@@ -104,14 +104,14 @@
       'sources': [
         '<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_avx2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_dct_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_error_intrin_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_variance_avx2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c',
         '<(libvpx_source)/vpx_dsp/x86/sad4d_avx2.c',
         '<(libvpx_source)/vpx_dsp/x86/sad_avx2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_avx2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_impl_avx2.c',
       ],
       'cflags': [ '-mavx2', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-mavx2' ] },
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
index ecf9fb7e810..f8590f576ac 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
@@ -14,8 +14,9 @@
       ],
       'sources': [
         '<(libvpx_source)/vp8/common/x86/idct_blk_mmx.c',
-        '<(libvpx_source)/vp8/common/x86/variance_mmx.c',
+        '<(libvpx_source)/vp8/common/x86/vp8_variance_mmx.c',
         '<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_mmx.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_mmx.c',
       ],
       'cflags': [ '-mmmx', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-mmmx' ] },
@@ -30,19 +31,18 @@
       'sources': [
         '<(libvpx_source)/vp8/common/x86/idct_blk_sse2.c',
         '<(libvpx_source)/vp8/common/x86/recon_wrapper_sse2.c',
-        '<(libvpx_source)/vp8/common/x86/variance_sse2.c',
+        '<(libvpx_source)/vp8/common/x86/vp8_variance_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/denoising_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/quantize_sse2.c',
         '<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_avg_intrin_sse2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct_impl_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_denoiser_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_sse2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_variance_sse2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_sse2.c',
       ],
       'cflags': [ '-msse2', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-msse2' ] },
@@ -104,14 +104,14 @@
       'sources': [
         '<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_avx2.c',
         '<(libvpx_source)/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_dct_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_error_intrin_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c',
         '<(libvpx_source)/vp9/encoder/x86/vp9_variance_avx2.c',
-        '<(libvpx_source)/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c',
         '<(libvpx_source)/vpx_dsp/x86/sad4d_avx2.c',
         '<(libvpx_source)/vpx_dsp/x86/sad_avx2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_avx2.c',
+        '<(libvpx_source)/vpx_dsp/x86/variance_impl_avx2.c',
       ],
       'cflags': [ '-mavx2', ],
       'xcode_settings': { 'OTHER_CFLAGS': [ '-mavx2' ] },
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
index 21caf9804bc..16cd808528c 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
@@ -33,8 +33,7 @@ RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch,
 
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict4x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_armv6
 
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -136,13 +135,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -199,11 +191,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -263,9 +250,6 @@ void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, i
 void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_neon(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -282,8 +266,7 @@ unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char  *src_ptr, int  so
 
 unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance8x8_armv6(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_variance8x8_neon(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_variance8x8)(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_armv6
 
 void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch);
 void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch);
@@ -297,27 +280,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
 void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
@@ -347,8 +309,6 @@ static void setup_rtcd_internal(void)
 
     vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
-    vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_armv6;
-    if (flags & HAS_NEON) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_neon;
     vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_armv6;
@@ -379,8 +339,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
     vp8_fast_quantize_b = vp8_fast_quantize_b_c;
     if (flags & HAS_NEON) vp8_fast_quantize_b = vp8_fast_quantize_b_neon;
-    vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
-    if (flags & HAS_NEON) vp8_get4x4sse_cs = vp8_get4x4sse_cs_neon;
     vp8_loop_filter_bh = vp8_loop_filter_bh_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
     vp8_loop_filter_bv = vp8_loop_filter_bv_armv6;
@@ -397,8 +355,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
     vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
-    vp8_mse16x16 = vp8_mse16x16_armv6;
-    if (flags & HAS_NEON) vp8_mse16x16 = vp8_mse16x16_neon;
     vp8_short_fdct4x4 = vp8_short_fdct4x4_armv6;
     if (flags & HAS_NEON) vp8_short_fdct4x4 = vp8_short_fdct4x4_neon;
     vp8_short_fdct8x4 = vp8_short_fdct8x4_armv6;
@@ -417,22 +373,12 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_armv6;
     if (flags & HAS_NEON) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_neon;
-    vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_armv6;
-    if (flags & HAS_NEON) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_neon;
     vp8_subtract_b = vp8_subtract_b_c;
     if (flags & HAS_NEON) vp8_subtract_b = vp8_subtract_b_neon;
     vp8_subtract_mbuv = vp8_subtract_mbuv_c;
     if (flags & HAS_NEON) vp8_subtract_mbuv = vp8_subtract_mbuv_neon;
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_NEON) vp8_subtract_mby = vp8_subtract_mby_neon;
-    vp8_variance16x16 = vp8_variance16x16_armv6;
-    if (flags & HAS_NEON) vp8_variance16x16 = vp8_variance16x16_neon;
-    vp8_variance16x8 = vp8_variance16x8_c;
-    if (flags & HAS_NEON) vp8_variance16x8 = vp8_variance16x8_neon;
-    vp8_variance8x16 = vp8_variance8x16_c;
-    if (flags & HAS_NEON) vp8_variance8x16 = vp8_variance8x16_neon;
-    vp8_variance8x8 = vp8_variance8x8_armv6;
-    if (flags & HAS_NEON) vp8_variance8x8 = vp8_variance8x8_neon;
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
     if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
     vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index eefc3fd7a47..a4108d1f790 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
 #define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
 
 void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
 #define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
 
 void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
 
 void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
 #define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
 
 void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
 #define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -410,18 +419,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -565,51 +562,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
@@ -643,16 +595,52 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp9_convolve_avg = vp9_convolve_avg_neon;
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_NEON) vp9_convolve_copy = vp9_convolve_copy_neon;
+    vp9_d135_predictor_4x4 = vp9_d135_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_d135_predictor_4x4 = vp9_d135_predictor_4x4_neon;
+    vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
+    if (flags & HAS_NEON) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_neon;
+    vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_neon;
+    vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
+    if (flags & HAS_NEON) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_neon;
+    vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c;
+    if (flags & HAS_NEON) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_neon;
+    vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_c;
+    if (flags & HAS_NEON) vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_neon;
+    vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_neon;
+    vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c;
+    if (flags & HAS_NEON) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_neon;
+    vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c;
+    if (flags & HAS_NEON) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_neon;
+    vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_c;
+    if (flags & HAS_NEON) vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_neon;
+    vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_neon;
+    vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c;
+    if (flags & HAS_NEON) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_neon;
+    vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c;
+    if (flags & HAS_NEON) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_neon;
+    vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c;
+    if (flags & HAS_NEON) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_neon;
+    vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_neon;
+    vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
+    if (flags & HAS_NEON) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_neon;
+    vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c;
+    if (flags & HAS_NEON) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_neon;
+    vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_c;
+    if (flags & HAS_NEON) vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_neon;
+    vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_c;
+    if (flags & HAS_NEON) vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_neon;
+    vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c;
+    if (flags & HAS_NEON) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_neon;
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_NEON) vp9_fdct8x8 = vp9_fdct8x8_neon;
     vp9_fdct8x8_1 = vp9_fdct8x8_1_c;
     if (flags & HAS_NEON) vp9_fdct8x8_1 = vp9_fdct8x8_1_neon;
     vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
     if (flags & HAS_NEON) vp9_fdct8x8_quant = vp9_fdct8x8_quant_neon;
-    vp9_get16x16var = vp9_get16x16var_c;
-    if (flags & HAS_NEON) vp9_get16x16var = vp9_get16x16var_neon;
-    vp9_get8x8var = vp9_get8x8var_c;
-    if (flags & HAS_NEON) vp9_get8x8var = vp9_get8x8var_neon;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_NEON) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_neon;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -737,18 +725,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_neon;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_NEON) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_neon;
-    vp9_variance16x16 = vp9_variance16x16_c;
-    if (flags & HAS_NEON) vp9_variance16x16 = vp9_variance16x16_neon;
-    vp9_variance32x32 = vp9_variance32x32_c;
-    if (flags & HAS_NEON) vp9_variance32x32 = vp9_variance32x32_neon;
-    vp9_variance32x64 = vp9_variance32x64_c;
-    if (flags & HAS_NEON) vp9_variance32x64 = vp9_variance32x64_neon;
-    vp9_variance64x32 = vp9_variance64x32_c;
-    if (flags & HAS_NEON) vp9_variance64x32 = vp9_variance64x32_neon;
-    vp9_variance64x64 = vp9_variance64x64_c;
-    if (flags & HAS_NEON) vp9_variance64x64 = vp9_variance64x64_neon;
-    vp9_variance8x8 = vp9_variance8x8_c;
-    if (flags & HAS_NEON) vp9_variance8x8 = vp9_variance8x8_neon;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index 10c565ed61b..2fcb7d25b85 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
index 9b5a0ea3583..721ab0ae421 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index 3fb03579215..a1a3dcd04a7 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
index 5d3e9df812b..f926cf29a59 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
@@ -18,6 +18,38 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+RTCD_EXTERN unsigned int (*vpx_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -194,6 +226,55 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
@@ -206,6 +287,14 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_c;
+    if (flags & HAS_NEON) vpx_get16x16var = vpx_get16x16var_neon;
+    vpx_get4x4sse_cs = vpx_get4x4sse_cs_c;
+    if (flags & HAS_NEON) vpx_get4x4sse_cs = vpx_get4x4sse_cs_neon;
+    vpx_get8x8var = vpx_get8x8var_c;
+    if (flags & HAS_NEON) vpx_get8x8var = vpx_get8x8var_neon;
+    vpx_mse16x16 = vpx_mse16x16_media;
+    if (flags & HAS_NEON) vpx_mse16x16 = vpx_mse16x16_neon;
     vpx_sad16x16 = vpx_sad16x16_media;
     if (flags & HAS_NEON) vpx_sad16x16 = vpx_sad16x16_neon;
     vpx_sad16x16x4d = vpx_sad16x16x4d_c;
@@ -226,6 +315,22 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vpx_sad8x16 = vpx_sad8x16_neon;
     vpx_sad8x8 = vpx_sad8x8_c;
     if (flags & HAS_NEON) vpx_sad8x8 = vpx_sad8x8_neon;
+    vpx_variance16x16 = vpx_variance16x16_media;
+    if (flags & HAS_NEON) vpx_variance16x16 = vpx_variance16x16_neon;
+    vpx_variance16x8 = vpx_variance16x8_c;
+    if (flags & HAS_NEON) vpx_variance16x8 = vpx_variance16x8_neon;
+    vpx_variance32x32 = vpx_variance32x32_c;
+    if (flags & HAS_NEON) vpx_variance32x32 = vpx_variance32x32_neon;
+    vpx_variance32x64 = vpx_variance32x64_c;
+    if (flags & HAS_NEON) vpx_variance32x64 = vpx_variance32x64_neon;
+    vpx_variance64x32 = vpx_variance64x32_c;
+    if (flags & HAS_NEON) vpx_variance64x32 = vpx_variance64x32_neon;
+    vpx_variance64x64 = vpx_variance64x64_c;
+    if (flags & HAS_NEON) vpx_variance64x64 = vpx_variance64x64_neon;
+    vpx_variance8x16 = vpx_variance8x16_c;
+    if (flags & HAS_NEON) vpx_variance8x16 = vpx_variance8x16_neon;
+    vpx_variance8x8 = vpx_variance8x8_media;
+    if (flags & HAS_NEON) vpx_variance8x8 = vpx_variance8x8_neon;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
index 8710a5784d1..a7a593501ad 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
@@ -33,8 +33,7 @@ void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst
 
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict4x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_armv6
 
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -136,13 +135,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_neon
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -199,11 +191,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_neon
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -263,9 +250,6 @@ void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, i
 void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_neon(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -282,8 +266,7 @@ unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char  *src_ptr, int  so
 
 unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance8x8_armv6(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_variance8x8_neon(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_neon
+#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_armv6
 
 void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch);
 void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch);
@@ -297,27 +280,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
 void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_neon
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_neon
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_neon
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_neon
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_neon
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
index 9e445d5476f..8cc98e853c4 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
 #define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
 
 void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_neon
 
 void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
 #define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
 
 void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_neon
 
 void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
 
 void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_neon
 
 void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_neon
 
 void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
 #define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
 
 void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_neon
 
 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_neon
 
 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_neon
 
 void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_neon
 
 void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_neon
 
 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_neon
 
 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_neon
 
 void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_neon
 
 void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_neon
 
 void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_neon
 
 void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_neon
 
 void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_neon
 
 void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_neon
 
 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_neon
 
 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_neon
 
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_neon
 
 int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
 #define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_neon
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_neon
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_neon
@@ -410,18 +419,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -565,51 +562,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_neon
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_neon
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_neon
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_neon
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_neon
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_neon
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_neon
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
index fb523269888..b1043e656d9 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
index 58a32f0238e..e05d6991036 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
index 477014ee9c6..4aaf47ba413 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
index 955c7640f79..82aa34db379 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
@@ -18,6 +18,38 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_neon
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_neon
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_neon
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_neon
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -194,6 +226,55 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_neon
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_neon
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_neon
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_neon
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_neon
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_neon
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_neon
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_neon
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
index 37e4a3529d3..6d28eb60d2f 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
@@ -119,12 +119,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -173,10 +167,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_armv6
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -228,9 +218,6 @@ void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_armv6
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_armv6
@@ -257,23 +244,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
 void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_c
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_armv6
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_armv6
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
index fcca3063998..d1974a6f368 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
 
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
index cf7330af874..c0e9fb24ff9 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
index 336b29cfd0c..0b06ef9adf1 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
index 99f4a7cea0a..0328467f029 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
index 20948755101..ea77e14aa19 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
@@ -18,6 +18,34 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_media
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_media
@@ -184,6 +212,47 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_media
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_media
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
index 60f13cf8812..af969c3617f 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
@@ -31,8 +31,7 @@ void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst
 #define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_neon
 
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
 
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -124,13 +123,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_neon
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -178,10 +170,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_neon
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -232,9 +220,6 @@ void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
 
@@ -262,25 +247,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
 void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_neon
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_neon
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_neon
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_neon
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_neon
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
index a976f9a7731..1541cf05522 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
 #define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
 
 void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_neon
 
 void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
 #define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
 
 void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_neon
 
 void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
 
 void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_neon
 
 void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_neon
 
 void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
 #define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
 
 void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_neon
 
 void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_neon
 
 void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_neon
 
 void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_neon
 
 void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_neon
 
 void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_neon
 
 void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_neon
 
 void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_neon
 
 void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_neon
 
 void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_neon
 
 void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_neon
 
 void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_neon
 
 void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_neon
 
 void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_neon
 
 void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_neon
 
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_neon
 
 int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
 #define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_neon
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_neon
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_neon
@@ -402,18 +411,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -557,51 +554,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_neon
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_neon
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_neon
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_neon
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_neon
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_neon
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_neon
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
index c7ef4366028..8d1201d83be 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
index 84cebcdae2f..6674d94aed4 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
index 0409b886a57..f54e589ef32 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
index a0b6898b29b..af0741cca13 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
@@ -18,6 +18,37 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_neon
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_neon
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_neon
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_neon
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_neon
@@ -193,6 +224,53 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_neon
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_neon
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_neon
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_neon
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_neon
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_neon
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_neon
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_neon
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
index 1bd7496fe55..7a33ed2effb 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
 
@@ -224,21 +212,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
 void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_c
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
 
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
index 20db06465bf..a2a18fdc7fd 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
index 0e8d7265f36..2a18a45e3e3 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=generic-gnu --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
index ec8222e18c2..290508c1c88 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_c
 
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 RTCD_EXTERN void (*vp8_clear_system_state)();
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-    vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
-    if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-    vp8_get_mb_ss = vp8_get_mb_ss_c;
-    if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
-    if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-    vp8_mse16x16 = vp8_mse16x16_c;
-    if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-    vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
-    if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-    vp8_variance16x16 = vp8_variance16x16_c;
-    if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-    vp8_variance16x8 = vp8_variance16x8_c;
-    if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-    vp8_variance4x4 = vp8_variance4x4_c;
-    if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
-    if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-    vp8_variance8x16 = vp8_variance8x16_c;
-    if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-    vp8_variance8x8 = vp8_variance8x8_c;
-    if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
     vp9_fwht4x4 = vp9_fwht4x4_c;
     if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
-    vp9_get16x16var = vp9_get16x16var_c;
-    if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
-    vp9_get8x8var = vp9_get8x8var_c;
-    if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
-    vp9_get_mb_ss = vp9_get_mb_ss_c;
-    if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
     vp9_minmax_8x8 = vp9_minmax_8x8_c;
     if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
-    vp9_mse16x16 = vp9_mse16x16_c;
-    if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
-    vp9_mse16x8 = vp9_mse16x8_c;
-    if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
-    vp9_mse8x16 = vp9_mse8x16_c;
-    if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
-    vp9_mse8x8 = vp9_mse8x8_c;
-    if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
     vp9_plane_add_noise = vp9_plane_add_noise_c;
     if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
     vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
-    vp9_variance16x16 = vp9_variance16x16_c;
-    if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance16x32 = vp9_variance16x32_c;
-    if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
-    vp9_variance16x8 = vp9_variance16x8_c;
-    if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
-    vp9_variance32x16 = vp9_variance32x16_c;
-    if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_c;
-    if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance32x64 = vp9_variance32x64_c;
-    if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
-    vp9_variance4x4 = vp9_variance4x4_c;
-    if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
-    vp9_variance4x8 = vp9_variance4x8_c;
-    if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
-    vp9_variance64x32 = vp9_variance64x32_c;
-    if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_c;
-    if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
-    vp9_variance8x16 = vp9_variance8x16_c;
-    if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
-    vp9_variance8x4 = vp9_variance8x4_c;
-    if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
-    vp9_variance8x8 = vp9_variance8x8_c;
-    if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
     vp9_vector_var = vp9_vector_var_c;
     if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
 }
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
index b9418dda315..9550f440153 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SYS_MMAN_H 1
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
index f254f6d7167..08b58cc7244 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86-linux-gcc --disable-ccache --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
index 3715b4aef3e..c20daf6517f 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_c;
+    if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_get8x8var = vpx_get8x8var_c;
+    if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+    if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+    vpx_get_mb_ss = vpx_get_mb_ss_c;
+    if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+    if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+    vpx_mse16x16 = vpx_mse16x16_c;
+    if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+    if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+    vpx_mse16x8 = vpx_mse16x8_c;
+    if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+    vpx_mse8x16 = vpx_mse8x16_c;
+    if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+    vpx_mse8x8 = vpx_mse8x8_c;
+    if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
     vpx_sad16x16 = vpx_sad16x16_c;
     if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
     if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_c;
+    if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance16x32 = vpx_variance16x32_c;
+    if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+    vpx_variance16x8 = vpx_variance16x8_c;
+    if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+    vpx_variance32x16 = vpx_variance32x16_c;
+    if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_c;
+    if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance32x64 = vpx_variance32x64_c;
+    if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+    vpx_variance4x4 = vpx_variance4x4_c;
+    if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+    if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+    vpx_variance4x8 = vpx_variance4x8_c;
+    if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+    vpx_variance64x32 = vpx_variance64x32_c;
+    if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_c;
+    if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+    vpx_variance8x16 = vpx_variance8x16_c;
+    if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+    vpx_variance8x4 = vpx_variance8x4_c;
+    if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+    vpx_variance8x8 = vpx_variance8x8_c;
+    if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
index ec819184a76..3befb52db1e 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
 
@@ -227,21 +215,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
 void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
 #define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
 
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
index 1000fbb39b3..9e6876b5859 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=mips64-linux-gcc --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
index 194dfa5d91d..a9e7f3ac8d9 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_c
 
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
index ec819184a76..3befb52db1e 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
 
@@ -227,21 +215,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
 void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
 #define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
 
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
index 7f5c1f6e858..2c1375d883a 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=mips32-linux-gcc --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
index 1783d40cca5..4210a7c868d 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_c
 
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 #define vp8_clear_system_state vpx_reset_mmx_state
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_xmm
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_sse2
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_mmx
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_sse2
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-    vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
     if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
-    vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_quantize_b = vp9_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
-    vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
index 68a0d73808d..0c4b550b78a 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SYS_MMAN_H 1
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
index 1221f4849ea..4f190b1958d 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86_64-linux-gcc --disable-ccache --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
index 72f336395ce..27d91efdbd4 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
     vpx_sad16x16x3 = vpx_sad16x16x3_c;
     if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 RTCD_EXTERN void (*vp8_clear_system_state)();
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-    vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
-    if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-    vp8_get_mb_ss = vp8_get_mb_ss_c;
-    if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
-    if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-    vp8_mse16x16 = vp8_mse16x16_c;
-    if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-    vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
-    if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-    vp8_variance16x16 = vp8_variance16x16_c;
-    if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-    vp8_variance16x8 = vp8_variance16x8_c;
-    if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-    vp8_variance4x4 = vp8_variance4x4_c;
-    if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
-    if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-    vp8_variance8x16 = vp8_variance8x16_c;
-    if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-    vp8_variance8x8 = vp8_variance8x8_c;
-    if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
     vp9_fwht4x4 = vp9_fwht4x4_c;
     if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
-    vp9_get16x16var = vp9_get16x16var_c;
-    if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
-    vp9_get8x8var = vp9_get8x8var_c;
-    if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
-    vp9_get_mb_ss = vp9_get_mb_ss_c;
-    if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
     vp9_minmax_8x8 = vp9_minmax_8x8_c;
     if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
-    vp9_mse16x16 = vp9_mse16x16_c;
-    if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
-    vp9_mse16x8 = vp9_mse16x8_c;
-    if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
-    vp9_mse8x16 = vp9_mse8x16_c;
-    if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
-    vp9_mse8x8 = vp9_mse8x8_c;
-    if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
     vp9_plane_add_noise = vp9_plane_add_noise_c;
     if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
     vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
-    vp9_variance16x16 = vp9_variance16x16_c;
-    if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance16x32 = vp9_variance16x32_c;
-    if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
-    vp9_variance16x8 = vp9_variance16x8_c;
-    if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
-    vp9_variance32x16 = vp9_variance32x16_c;
-    if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_c;
-    if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance32x64 = vp9_variance32x64_c;
-    if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
-    vp9_variance4x4 = vp9_variance4x4_c;
-    if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
-    vp9_variance4x8 = vp9_variance4x8_c;
-    if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
-    vp9_variance64x32 = vp9_variance64x32_c;
-    if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_c;
-    if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
-    vp9_variance8x16 = vp9_variance8x16_c;
-    if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
-    vp9_variance8x4 = vp9_variance8x4_c;
-    if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
-    vp9_variance8x8 = vp9_variance8x8_c;
-    if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
     vp9_vector_var = vp9_vector_var_c;
     if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
 }
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
index b9418dda315..9550f440153 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SYS_MMAN_H 1
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
index d8cb73c99ab..90849ad6fbf 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86-darwin9-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
index 3715b4aef3e..c20daf6517f 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_c;
+    if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_get8x8var = vpx_get8x8var_c;
+    if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+    if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+    vpx_get_mb_ss = vpx_get_mb_ss_c;
+    if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+    if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+    vpx_mse16x16 = vpx_mse16x16_c;
+    if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+    if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+    vpx_mse16x8 = vpx_mse16x8_c;
+    if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+    vpx_mse8x16 = vpx_mse8x16_c;
+    if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+    vpx_mse8x8 = vpx_mse8x8_c;
+    if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
     vpx_sad16x16 = vpx_sad16x16_c;
     if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
     if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_c;
+    if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance16x32 = vpx_variance16x32_c;
+    if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+    vpx_variance16x8 = vpx_variance16x8_c;
+    if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+    vpx_variance32x16 = vpx_variance32x16_c;
+    if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_c;
+    if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance32x64 = vpx_variance32x64_c;
+    if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+    vpx_variance4x4 = vpx_variance4x4_c;
+    if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+    if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+    vpx_variance4x8 = vpx_variance4x8_c;
+    if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+    vpx_variance64x32 = vpx_variance64x32_c;
+    if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_c;
+    if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+    vpx_variance8x16 = vpx_variance8x16_c;
+    if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+    vpx_variance8x4 = vpx_variance8x4_c;
+    if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+    vpx_variance8x8 = vpx_variance8x8_c;
+    if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 #define vp8_clear_system_state vpx_reset_mmx_state
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_xmm
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_sse2
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_mmx
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_sse2
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-    vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
     if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
-    vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_quantize_b = vp9_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
-    vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
index 68a0d73808d..0c4b550b78a 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SYS_MMAN_H 1
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
index 62f0db7d0c7..cba8a3d5809 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86_64-darwin9-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
index 72f336395ce..27d91efdbd4 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
     vpx_sad16x16x3 = vpx_sad16x16x3_c;
     if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
index 1bd7496fe55..7a33ed2effb 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
 int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 #define vp8_full_search_sad vp8_full_search_sad_c
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
 int vp8_mbuverror_c(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_c
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 #define vp8_plane_add_noise vp8_plane_add_noise_c
 
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 #define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
 
@@ -224,21 +212,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
 void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_c
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
 
diff --git a/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
 void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_c
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
 
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
 void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_c
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_c
 
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_c
 
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm b/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
index 20db06465bf..a2a18fdc7fd 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
@@ -23,7 +23,6 @@
 .equ HAVE_AVX2 ,  0
 .equ HAVE_VPX_PORTS ,  1
 .equ HAVE_STDINT_H ,  1
-.equ HAVE_ALT_TREE_LAYOUT ,  0
 .equ HAVE_PTHREAD_H ,  1
 .equ HAVE_SYS_MMAN_H ,  1
 .equ HAVE_UNISTD_H ,  0
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.c b/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
index 0e8d7265f36..2a18a45e3e3 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=generic-gnu --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
index ec8222e18c2..290508c1c88 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 0
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SYS_MMAN_H 1
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 #define vpx_sad16x16 vpx_sad16x16_c
 
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
 void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 #define vpx_sad8x8x8 vpx_sad8x8x8_c
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
 void vpx_dsp_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 RTCD_EXTERN void (*vp8_clear_system_state)();
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-    vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
-    if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-    vp8_get_mb_ss = vp8_get_mb_ss_c;
-    if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
-    if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-    vp8_mse16x16 = vp8_mse16x16_c;
-    if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-    vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
-    if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
-    if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-    vp8_variance16x16 = vp8_variance16x16_c;
-    if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-    vp8_variance16x8 = vp8_variance16x8_c;
-    if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-    vp8_variance4x4 = vp8_variance4x4_c;
-    if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
-    if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-    vp8_variance8x16 = vp8_variance8x16_c;
-    if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-    vp8_variance8x8 = vp8_variance8x8_c;
-    if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
-    if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
     vp9_fwht4x4 = vp9_fwht4x4_c;
     if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
-    vp9_get16x16var = vp9_get16x16var_c;
-    if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
-    vp9_get8x8var = vp9_get8x8var_c;
-    if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
-    vp9_get_mb_ss = vp9_get_mb_ss_c;
-    if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
     vp9_minmax_8x8 = vp9_minmax_8x8_c;
     if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
-    vp9_mse16x16 = vp9_mse16x16_c;
-    if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
-    vp9_mse16x8 = vp9_mse16x8_c;
-    if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
-    vp9_mse8x16 = vp9_mse8x16_c;
-    if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
-    vp9_mse8x8 = vp9_mse8x8_c;
-    if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
     vp9_plane_add_noise = vp9_plane_add_noise_c;
     if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
     vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
-    vp9_variance16x16 = vp9_variance16x16_c;
-    if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance16x32 = vp9_variance16x32_c;
-    if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
-    vp9_variance16x8 = vp9_variance16x8_c;
-    if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
-    vp9_variance32x16 = vp9_variance32x16_c;
-    if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_c;
-    if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance32x64 = vp9_variance32x64_c;
-    if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
-    vp9_variance4x4 = vp9_variance4x4_c;
-    if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
-    vp9_variance4x8 = vp9_variance4x8_c;
-    if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
-    vp9_variance64x32 = vp9_variance64x32_c;
-    if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_c;
-    if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
-    vp9_variance8x16 = vp9_variance8x16_c;
-    if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
-    vp9_variance8x4 = vp9_variance8x4_c;
-    if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
-    vp9_variance8x8 = vp9_variance8x8_c;
-    if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
     vp9_vector_var = vp9_vector_var_c;
     if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
 }
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
index 2509394a18d..c817164acd9 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 0
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 0
 %define HAVE_SYS_MMAN_H 0
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
index b2271e28dac..d88b6d68609 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86-win32-vs12 --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
index fa8997e86c1..e5c65cf5956 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_c;
+    if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_get8x8var = vpx_get8x8var_c;
+    if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+    if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+    vpx_get_mb_ss = vpx_get_mb_ss_c;
+    if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+    if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+    vpx_mse16x16 = vpx_mse16x16_c;
+    if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+    if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+    vpx_mse16x8 = vpx_mse16x8_c;
+    if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+    vpx_mse8x16 = vpx_mse8x16_c;
+    if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+    vpx_mse8x8 = vpx_mse8x8_c;
+    if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
     vpx_sad16x16 = vpx_sad16x16_c;
     if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
     if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_c;
+    if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance16x32 = vpx_variance16x32_c;
+    if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+    vpx_variance16x8 = vpx_variance16x8_c;
+    if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+    vpx_variance32x16 = vpx_variance32x16_c;
+    if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_c;
+    if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance32x64 = vpx_variance32x64_c;
+    if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+    vpx_variance4x4 = vpx_variance4x4_c;
+    if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+    if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+    vpx_variance4x8 = vpx_variance4x8_c;
+    if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+    vpx_variance64x32 = vpx_variance64x32_c;
+    if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_c;
+    if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+    vpx_variance8x16 = vpx_variance8x16_c;
+    if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+    vpx_variance8x4 = vpx_variance8x4_c;
+    if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+    vpx_variance8x8 = vpx_variance8x8_c;
+    if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+    if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 #define vp8_clear_system_state vpx_reset_mmx_state
 
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
 
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
 int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
 
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
 void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
 #define vp8_intra4x4_predict vp8_intra4x4_predict_c
 
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
 int vp8_mbuverror_xmm(struct macroblock *mb);
 #define vp8_mbuverror vp8_mbuverror_xmm
 
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
 void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
 void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
 unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
 unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
 void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
 #define vp8_subtract_mby vp8_subtract_mby_sse2
 
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
 unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
 unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
 RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 
 void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
 void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
 #define vp9_fwht4x4 vp9_fwht4x4_mmx
 
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
 void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
 #define vp9_minmax_8x8 vp9_minmax_8x8_sse2
 
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
 void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
 #define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
 void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
 int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
 int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
 #define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+    vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-    vp9_get16x16var = vp9_get16x16var_sse2;
-    if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
     if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
-    vp9_mse16x16 = vp9_mse16x16_sse2;
-    if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
     vp9_quantize_b = vp9_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
-    vp9_variance16x16 = vp9_variance16x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
-    vp9_variance32x16 = vp9_variance32x16_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
-    vp9_variance32x32 = vp9_variance32x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
-    vp9_variance64x32 = vp9_variance64x32_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
-    vp9_variance64x64 = vp9_variance64x64_sse2;
-    if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
index b2d9a3776f4..27abdda9cb1 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
@@ -20,7 +20,6 @@
 %define HAVE_AVX2 1
 %define HAVE_VPX_PORTS 1
 %define HAVE_STDINT_H 0
-%define HAVE_ALT_TREE_LAYOUT 0
 %define HAVE_PTHREAD_H 0
 %define HAVE_SYS_MMAN_H 0
 %define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
index 67a4c84a4d2..6dd3005d3c5 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
@@ -5,5 +5,6 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "--target=x86_64-win64-vs12 --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
index 5e6837cbe9a..d97def150e8 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
@@ -32,7 +32,6 @@
 #define HAVE_AVX2 1
 #define HAVE_VPX_PORTS 1
 #define HAVE_STDINT_H 0
-#define HAVE_ALT_TREE_LAYOUT 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SYS_MMAN_H 0
 #define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
 extern "C" {
 #endif
 
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
 unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
 unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
 void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
 
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
 void vpx_dsp_rtcd(void);
 
 #ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
+    vpx_get16x16var = vpx_get16x16var_sse2;
+    if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+    vpx_mse16x16 = vpx_mse16x16_sse2;
+    if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
     vpx_sad16x16x3 = vpx_sad16x16x3_c;
     if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
     vpx_sad8x8x8 = vpx_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+    vpx_variance16x16 = vpx_variance16x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+    vpx_variance32x16 = vpx_variance32x16_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+    vpx_variance32x32 = vpx_variance32x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+    vpx_variance64x32 = vpx_variance64x32_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+    vpx_variance64x64 = vpx_variance64x64_sse2;
+    if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libvpx/source/libvpx/CHANGELOG b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
index a318784150a..b0d306442b3 100644
--- a/chromium/third_party/libvpx/source/libvpx/CHANGELOG
+++ b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
@@ -1,3 +1,8 @@
+xxxx-yy-zz v1.4.0 "Changes for next release"
+  vpxenc is changed to use VP9 by default.
+  Encoder controls added for 1 pass SVC.
+  Decoder control to toggle on/off loopfilter.
+
 2015-04-03 v1.4.0 "Indian Runner Duck"
   This release includes significant improvements to the VP9 codec.
 
diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README
index fcd1c2e18cf..7b44ba7a22a 100644
--- a/chromium/third_party/libvpx/source/libvpx/README
+++ b/chromium/third_party/libvpx/source/libvpx/README
@@ -101,13 +101,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
     x86_64-win64-vs10
     x86_64-win64-vs11
     x86_64-win64-vs12
-    universal-darwin8-gcc
-    universal-darwin9-gcc
-    universal-darwin10-gcc
-    universal-darwin11-gcc
-    universal-darwin12-gcc
-    universal-darwin13-gcc
-    universal-darwin14-gcc
     generic-gnu
 
   The generic-gnu target, in conjunction with the CROSS environment variable,
diff --git a/chromium/third_party/libvpx/source/libvpx/args.c b/chromium/third_party/libvpx/source/libvpx/args.c
index 9dabc9bdd6d..14b031040a4 100644
--- a/chromium/third_party/libvpx/source/libvpx/args.c
+++ b/chromium/third_party/libvpx/source/libvpx/args.c
@@ -14,9 +14,7 @@
 #include <limits.h>
 #include "args.h"
 
-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
+#include "vpx_ports/msvc.h"
 
 #if defined(__GNUC__) && __GNUC__
 extern void die(const char *fmt, ...) __attribute__((noreturn));
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
index 0add523f99d..e971c9d1c45 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
@@ -163,6 +163,7 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
 endif
 
 # Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
 ifeq ($(CONFIG_VP8), yes)
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
 endif
@@ -175,6 +176,9 @@ $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
 ifeq ($(TARGET_ARCH_ABI),x86)
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
 endif
+endef
+
+$(eval $(call rtcd_dep_template))
 
 .PHONY: clean
 clean:
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
index fc7749a5519..f1b1cca33bf 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
@@ -22,8 +22,10 @@ clean:: .DEFAULT
 exampletest: .DEFAULT
 install:: .DEFAULT
 test:: .DEFAULT
+test-no-data-check:: .DEFAULT
 testdata:: .DEFAULT
 utiltest: .DEFAULT
+exampletest-no-data-check utiltest-no-data-check: .DEFAULT
 
 
 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -56,13 +58,10 @@ dist:
         fi
 endif
 
+# Since we invoke make recursively for multiple targets we need to include the
+# .mk file for the correct target, but only when $(target) is non-empty.
 ifneq ($(target),)
-# Normally, we want to build the filename from the target and the toolchain.
-# This disambiguates from the $(target).mk file that exists in the source tree.
-# However, the toolchain is part of the target in universal builds, so we
-# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
-# if we're in the universal case.
-include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
+include $(target)-$(TOOLCHAIN).mk
 endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
@@ -116,6 +115,9 @@ test::
 testdata::
 .PHONY: utiltest
 utiltest:
+.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
+test-no-data-check::
+exampletest-no-data-check utiltest-no-data-check:
 
 # Add compiler flags for intrinsic files
 ifeq ($(TOOLCHAIN), x86-os2-gcc)
@@ -313,18 +315,15 @@ $(1):
         $$(filter %.o,$$^) $$(extralibs)
 endef
 
-
-
-define lipo_lib_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)libtool -static -o $$@ $$?
-endef
-
-define lipo_bin_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)lipo -output $$@ -create $$?
+define dll_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+$(1):
+	$(if $(quiet),@echo "    [LD] $$@")
+	$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
+        -o $$@ \
+        $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
 endef
 
 
@@ -385,6 +384,7 @@ LIBS=$(call enabled,LIBS)
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
 $(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
 $(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))
 
 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
 ifeq ($(MAKECMDGOALS),dist)
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
index 68cc8bb4a5d..688fa12c52a 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
@@ -390,7 +390,7 @@ write_common_config_banner() {
 write_common_config_targets() {
   for t in ${all_targets}; do
     if enabled ${t}; then
-      if enabled universal || enabled child; then
+      if enabled child; then
         fwrite config.mk "ALL_TARGETS += ${t}-${toolchain}"
       else
         fwrite config.mk "ALL_TARGETS += ${t}"
@@ -647,14 +647,6 @@ process_common_toolchain() {
 
     # detect tgt_os
     case "$gcctarget" in
-      *darwin8*)
-        tgt_isa=universal
-        tgt_os=darwin8
-        ;;
-      *darwin9*)
-        tgt_isa=universal
-        tgt_os=darwin9
-        ;;
       *darwin10*)
         tgt_isa=x86_64
         tgt_os=darwin10
@@ -736,6 +728,13 @@ process_common_toolchain() {
   # Handle darwin variants. Newer SDKs allow targeting older
   # platforms, so use the newest one available.
   case ${toolchain} in
+    arm*-darwin*)
+      ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+      if [ -d "${ios_sdk_dir}" ]; then
+        add_cflags  "-isysroot ${ios_sdk_dir}"
+        add_ldflags "-isysroot ${ios_sdk_dir}"
+      fi
+      ;;
     *-darwin*)
       osx_sdk_dir="$(show_darwin_sdk_path macosx)"
       if [ -d "${osx_sdk_dir}" ]; then
@@ -811,7 +810,14 @@ process_common_toolchain() {
           if disabled neon && enabled neon_asm; then
             die "Disabling neon while keeping neon-asm is not supported"
           fi
-          soft_enable media
+          case ${toolchain} in
+            *-darwin*)
+              # Neon is guaranteed on iOS 6+ devices, while old media extensions
+              # no longer assemble with iOS 9 SDK
+              ;;
+            *)
+              soft_enable media
+          esac
           ;;
         armv6)
           soft_enable media
@@ -1215,7 +1221,7 @@ EOF
           ;;
       esac
       ;;
-    universal*|*-gcc|generic-gnu)
+    *-gcc|generic-gnu)
       link_with_cc=gcc
       enable_feature gcc
       setup_gnu_toolchain
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
index 643ebd634be..b653651030e 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -263,8 +263,8 @@ case "$target" in
     ;;
     arm*)
         platforms[0]="ARM"
-        asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
-        asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
+        asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
+        asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
     ;;
     *) die "Unsupported target $target!"
     ;;
diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure
index 98542855a3e..6cac15aeea5 100755
--- a/chromium/third_party/libvpx/source/libvpx/configure
+++ b/chromium/third_party/libvpx/source/libvpx/configure
@@ -148,13 +148,6 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
-all_platforms="${all_platforms} universal-darwin8-gcc"
-all_platforms="${all_platforms} universal-darwin9-gcc"
-all_platforms="${all_platforms} universal-darwin10-gcc"
-all_platforms="${all_platforms} universal-darwin11-gcc"
-all_platforms="${all_platforms} universal-darwin12-gcc"
-all_platforms="${all_platforms} universal-darwin13-gcc"
-all_platforms="${all_platforms} universal-darwin14-gcc"
 all_platforms="${all_platforms} generic-gnu"
 
 # all_targets is a list of all targets that can be configured
@@ -191,6 +184,10 @@ if [ ${doxy_major:-0} -ge 1 ]; then
     [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
 fi
 
+# disable codecs when their source directory does not exist
+[ -d "${source_path}/vp8" ] || disable_feature vp8
+[ -d "${source_path}/vp9" ] || disable_feature vp9
+
 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
@@ -206,31 +203,16 @@ enable_feature multithread
 enable_feature os_support
 enable_feature temporal_denoising
 
-[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
-for d in vp8 vp9; do
-    [ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
-done
-
-if ! enabled alt_tree_layout; then
-# development environment
-[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
-[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
-else
-# customer environment
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
-
-[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
-fi
-
-CODECS="$(echo ${CODECS} | tr ' ' '\n')"
-CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
+CODECS="
+    vp8_encoder
+    vp8_decoder
+    vp9_encoder
+    vp9_decoder
+"
+CODEC_FAMILIES="
+    vp8
+    vp9
+"
 
 ARCH_LIST="
     arm
@@ -262,7 +244,6 @@ HAVE_LIST="
     ${ARCH_EXT_LIST}
     vpx_ports
     stdint_h
-    alt_tree_layout
     pthread_h
     sys_mman_h
     unistd_h
@@ -436,22 +417,8 @@ post_process_cmdline() {
 
 process_targets() {
     enabled child || write_common_config_banner
-    enabled universal || write_common_target_config_h  ${BUILD_PFX}vpx_config.h
-
-    # For fat binaries, call configure recursively to configure for each
-    # binary architecture to be included.
-    if enabled universal; then
-        # Call configure (ourselves) for each subarchitecture
-        for arch in $fat_bin_archs; do
-            BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
-        done
-    fi
-
-    # The write_common_config (config.mk) logic is deferred until after the
-    # recursive calls to configure complete, because we want our universal
-    # targets to be executed last.
+    write_common_target_config_h ${BUILD_PFX}vpx_config.h
     write_common_config_targets
-    enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk
 
     # Calculate the default distribution name, based on the enabled features
     cf=""
@@ -527,11 +494,11 @@ process_detect() {
         # Can only build shared libs on a subset of platforms. Doing this check
         # here rather than at option parse time because the target auto-detect
         # magic happens after the command line has been parsed.
-        if ! enabled linux; then
+        if ! enabled linux && ! enabled os2; then
             if enabled gnu; then
                 echo "--enable-shared is only supported on ELF; assuming this is OK"
             else
-                die "--enable-shared only supported on ELF for now"
+                die "--enable-shared only supported on ELF and OS/2 for now"
             fi
         fi
     fi
@@ -596,24 +563,6 @@ EOF
 process_toolchain() {
     process_common_toolchain
 
-    # Handle universal binaries for this architecture
-    case $toolchain in
-        universal-darwin*)
-            darwin_ver=${tgt_os##darwin}
-
-            # Tiger (10.4/darwin8) brought support for x86
-            if [ $darwin_ver -ge 8 ]; then
-                fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
-            fi
-
-            # Leopard (10.5/darwin9) brought 64 bit support
-            if [ $darwin_ver -ge 9 ]; then
-                fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
-            fi
-            ;;
-    esac
-
-
     # Enable some useful compiler flags
     if enabled gcc; then
         enabled werror && check_add_cflags -Werror
@@ -701,7 +650,7 @@ process_toolchain() {
     esac
 
     # Other toolchain specific defaults
-    case $toolchain in x86*|universal*) soft_enable postproc;; esac
+    case $toolchain in x86*) soft_enable postproc;; esac
 
     if enabled postproc_visualizer; then
         enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -765,6 +714,7 @@ CONFIGURE_ARGS="$@"
 process "$@"
 print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
 cat <<EOF >> ${BUILD_PFX}vpx_config.c
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "$CONFIGURE_ARGS";
 const char *vpx_codec_build_config(void) {return cfg;}
 EOF
diff --git a/chromium/third_party/libvpx/source/libvpx/examples.mk b/chromium/third_party/libvpx/source/libvpx/examples.mk
index 4ff1de4eeae..fad02cfcd2b 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples.mk
+++ b/chromium/third_party/libvpx/source/libvpx/examples.mk
@@ -56,6 +56,7 @@ UTILS-$(CONFIG_DECODERS)    += vpxdec.c
 vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/mem_ops.h
 vpxdec.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxdec.SRCS                 += vpx_ports/msvc.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
@@ -80,6 +81,7 @@ vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += vpx_ports/msvc.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -98,6 +100,7 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
   vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
   vp9_spatial_svc_encoder.SRCS        += video_common.h
   vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+  vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
   vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
   vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
   vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
@@ -112,6 +115,7 @@ vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
 vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
 vpx_temporal_svc_encoder.SRCS        += video_common.h
 vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
+vpx_temporal_svc_encoder.SRCS        += vpx_ports/msvc.h
 vpx_temporal_svc_encoder.GUID        = B18C08F2-A439-4502-A78E-849BE3D60947
 vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_DECODERS)        += simple_decoder.c
@@ -122,6 +126,7 @@ simple_decoder.SRCS                += video_common.h
 simple_decoder.SRCS                += video_reader.h video_reader.c
 simple_decoder.SRCS                += vpx_ports/mem_ops.h
 simple_decoder.SRCS                += vpx_ports/mem_ops_aligned.h
+simple_decoder.SRCS                += vpx_ports/msvc.h
 simple_decoder.DESCRIPTION          = Simplified decoder loop
 EXAMPLES-$(CONFIG_DECODERS)        += postproc.c
 postproc.SRCS                      += ivfdec.h ivfdec.c
@@ -130,6 +135,7 @@ postproc.SRCS                      += video_common.h
 postproc.SRCS                      += video_reader.h video_reader.c
 postproc.SRCS                      += vpx_ports/mem_ops.h
 postproc.SRCS                      += vpx_ports/mem_ops_aligned.h
+postproc.SRCS                      += vpx_ports/msvc.h
 postproc.GUID                       = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION                = Decoder postprocessor control
 EXAMPLES-$(CONFIG_DECODERS)        += decode_to_md5.c
@@ -140,6 +146,7 @@ decode_to_md5.SRCS                 += video_common.h
 decode_to_md5.SRCS                 += video_reader.h video_reader.c
 decode_to_md5.SRCS                 += vpx_ports/mem_ops.h
 decode_to_md5.SRCS                 += vpx_ports/mem_ops_aligned.h
+decode_to_md5.SRCS                 += vpx_ports/msvc.h
 decode_to_md5.GUID                  = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION           = Frame by frame MD5 checksum
 EXAMPLES-$(CONFIG_ENCODERS)     += simple_encoder.c
@@ -147,6 +154,7 @@ simple_encoder.SRCS             += ivfenc.h ivfenc.c
 simple_encoder.SRCS             += tools_common.h tools_common.c
 simple_encoder.SRCS             += video_common.h
 simple_encoder.SRCS             += video_writer.h video_writer.c
+simple_encoder.SRCS             += vpx_ports/msvc.h
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
 EXAMPLES-$(CONFIG_VP9_ENCODER)  += vp9_lossless_encoder.c
@@ -154,6 +162,7 @@ vp9_lossless_encoder.SRCS       += ivfenc.h ivfenc.c
 vp9_lossless_encoder.SRCS       += tools_common.h tools_common.c
 vp9_lossless_encoder.SRCS       += video_common.h
 vp9_lossless_encoder.SRCS       += video_writer.h video_writer.c
+vp9_lossless_encoder.SRCS       += vpx_ports/msvc.h
 vp9_lossless_encoder.GUID        = B63C7C88-5348-46DC-A5A6-CC151EF93366
 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
 EXAMPLES-$(CONFIG_ENCODERS)     += twopass_encoder.c
@@ -161,6 +170,7 @@ twopass_encoder.SRCS            += ivfenc.h ivfenc.c
 twopass_encoder.SRCS            += tools_common.h tools_common.c
 twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
+twopass_encoder.SRCS            += vpx_ports/msvc.h
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
 EXAMPLES-$(CONFIG_DECODERS)     += decode_with_drops.c
@@ -170,6 +180,7 @@ decode_with_drops.SRCS          += video_common.h
 decode_with_drops.SRCS          += video_reader.h video_reader.c
 decode_with_drops.SRCS          += vpx_ports/mem_ops.h
 decode_with_drops.SRCS          += vpx_ports/mem_ops_aligned.h
+decode_with_drops.SRCS          += vpx_ports/msvc.h
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
@@ -177,6 +188,7 @@ set_maps.SRCS                      += ivfenc.h ivfenc.c
 set_maps.SRCS                      += tools_common.h tools_common.c
 set_maps.SRCS                      += video_common.h
 set_maps.SRCS                      += video_writer.h video_writer.c
+set_maps.SRCS                      += vpx_ports/msvc.h
 set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
@@ -184,6 +196,7 @@ vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
 vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp8cx_set_ref.SRCS                 += video_common.h
 vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
+vp8cx_set_ref.SRCS                 += vpx_ports/msvc.h
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame
 
@@ -194,6 +207,7 @@ EXAMPLES-$(CONFIG_VP8_ENCODER)          += vp8_multi_resolution_encoder.c
 vp8_multi_resolution_encoder.SRCS       += ivfenc.h ivfenc.c
 vp8_multi_resolution_encoder.SRCS       += tools_common.h tools_common.c
 vp8_multi_resolution_encoder.SRCS       += video_writer.h video_writer.c
+vp8_multi_resolution_encoder.SRCS       += vpx_ports/msvc.h
 vp8_multi_resolution_encoder.SRCS       += $(LIBYUV_SRCS)
 vp8_multi_resolution_encoder.GUID        = 04f8738e-63c8-423b-90fa-7c2703a374de
 vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -254,14 +268,6 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
 $(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))
 
 
-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_OBJS variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_OBJS variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
-
-
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -269,24 +275,28 @@ DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
 DIST-SRCS-yes              += $(ALL_SRCS)
 INSTALL-SRCS-yes           += $(UTIL_SRCS)
-OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
+OBJS-$(NOT_MSVS)           += $(call objs,$(ALL_SRCS))
 BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 
 
 # Instantiate linker template for all examples.
 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
+ifneq ($(filter darwin%,$(TGT_OS)),)
+SHARED_LIB_SUF=.dylib
+else
+ifneq ($(filter os2%,$(TGT_OS)),)
+SHARED_LIB_SUF=_dll.a
+else
+SHARED_LIB_SUF=.so
+endif
+endif
 CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
 $(foreach bin,$(BINS-yes),\
-    $(if $(BUILD_OBJS),$(eval $(bin):\
-        $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
-    $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
+    $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
+    $(eval $(call linker_template,$(bin),\
         $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
         -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
-        )))\
-    $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
-    )
-
+        )))
 
 # The following pairs define a mapping of locations in the distribution
 # tree to locations in the source/build trees.
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c b/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
index a3843bed336..1ae7a4b57f5 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
   exit(EXIT_FAILURE);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c b/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
index 36f7d80e127..2233e473d36 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
@@ -65,7 +65,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
   exit(EXIT_FAILURE);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/postproc.c b/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
index e34426a6194..a8ac208d9bd 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
@@ -52,7 +52,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
   exit(EXIT_FAILURE);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c b/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
index f8c35255fa2..e6fdd5bb2af 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "../tools_common.h"
 #include "../vp9/encoder/vp9_resize.h"
 
 static const char *exec_name = NULL;
@@ -26,7 +27,7 @@ static void usage() {
   printf("<output_yuv> [<frames>]\n");
 }
 
-void usage_exit() {
+void usage_exit(void) {
   usage();
   exit(EXIT_FAILURE);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c b/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
index 5555baac22e..1dc3ac0c98f 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
@@ -55,7 +55,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
           exec_name);
   exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c b/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
index 08a21668542..8ccc81035e3 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
@@ -88,7 +88,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
   exit(EXIT_FAILURE);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
index e805c258747..a3077297318 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
@@ -106,7 +106,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr,
           "Usage: %s <codec> <width> <height> <infile> <outfile> "
               "<keyframe-interval> [<error-resilient>]\nSee comments in "
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
index 0ec83ddccdf..aecc11d3f4e 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
@@ -58,7 +58,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
           exec_name);
   exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
index e623567b8fe..2b032049c0b 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
@@ -37,15 +37,14 @@
 #include <unistd.h>
 #endif
 #include "vpx_ports/vpx_timer.h"
-#define VPX_CODEC_DISABLE_COMPAT 1
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
 #include "vpx_ports/mem_ops.h"
-#include "./tools_common.h"
+#include "../tools_common.h"
 #define interface (vpx_codec_vp8_cx())
 #define fourcc    0x30385056
 
-void usage_exit() {
+void usage_exit(void) {
   exit(EXIT_FAILURE);
 }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c b/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
index a2982821a42..8b4cc303d3c 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
@@ -58,7 +58,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
           exec_name);
   exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
index 54275770d5f..82725168304 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
@@ -20,7 +20,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
                   "encoding feature. Supports raw input only.\n");
   fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
index f4deb693b2f..5a609766561 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -14,11 +14,13 @@
  * that benefit from a scalable bitstream.
  */
 
+#include <math.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 
+
 #include "../args.h"
 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -27,11 +29,18 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 #include "../vpxstats.h"
+#define OUTPUT_RC_STATS 1
 
 static const arg_def_t skip_frames_arg =
     ARG_DEF("s", "skip-frames", 1, "input frames to skip");
 static const arg_def_t frames_arg =
     ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t threads_arg =
+    ARG_DEF("th", "threads", 1, "number of threads to use");
+#if OUTPUT_RC_STATS
+static const arg_def_t output_rc_stats_arg =
+    ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
+#endif
 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
 static const arg_def_t timebase_arg =
@@ -42,6 +51,9 @@ static const arg_def_t spatial_layers_arg =
     ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
 static const arg_def_t temporal_layers_arg =
     ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
+static const arg_def_t temporal_layering_mode_arg =
+    ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
+        "VP9E_TEMPORAL_LAYERING_MODE");
 static const arg_def_t kf_dist_arg =
     ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
 static const arg_def_t scale_factors_arg =
@@ -65,6 +77,8 @@ static const arg_def_t lag_in_frame_arg =
         "generating any outputs");
 static const arg_def_t rc_end_usage_arg =
     ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
+static const arg_def_t speed_arg =
+    ARG_DEF("sp", "speed", 1, "speed configuration");
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -85,10 +99,16 @@ static const arg_def_t *svc_args[] = {
   &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
   &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
   &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg,                 &lag_in_frame_arg,
+  &max_bitrate_arg,   &temporal_layers_arg, &temporal_layering_mode_arg,
+  &lag_in_frame_arg,  &threads_arg,
+#if OUTPUT_RC_STATS
+  &output_rc_stats_arg,
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
   &bitdepth_arg,
 #endif
+  &speed_arg,
   &rc_end_usage_arg,  NULL
 };
 
@@ -102,6 +122,10 @@ static const uint32_t default_bitrate = 1000;
 static const uint32_t default_spatial_layers = 5;
 static const uint32_t default_temporal_layers = 1;
 static const uint32_t default_kf_dist = 100;
+static const uint32_t default_temporal_layering_mode = 0;
+static const uint32_t default_output_rc_stats = 0;
+static const int32_t default_speed = -1;  // -1 means use library default.
+static const uint32_t default_threads = 0;  // zero means use library default.
 
 typedef struct {
   const char *input_filename;
@@ -116,7 +140,7 @@ typedef struct {
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
           exec_name);
   fprintf(stderr, "Options:\n");
@@ -143,6 +167,12 @@ static void parse_command_line(int argc, const char **argv_,
   svc_ctx->log_level = SVC_LOG_DEBUG;
   svc_ctx->spatial_layers = default_spatial_layers;
   svc_ctx->temporal_layers = default_temporal_layers;
+  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
+#if OUTPUT_RC_STATS
+  svc_ctx->output_rc_stat = default_output_rc_stats;
+#endif
+  svc_ctx->speed = default_speed;
+  svc_ctx->threads = default_threads;
 
   // start with default encoder configuration
   res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -184,6 +214,20 @@ static void parse_command_line(int argc, const char **argv_,
       svc_ctx->spatial_layers = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
       svc_ctx->temporal_layers = arg_parse_uint(&arg);
+#if OUTPUT_RC_STATS
+    } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
+      svc_ctx->output_rc_stat = arg_parse_uint(&arg);
+#endif
+    } else if (arg_match(&arg, &speed_arg, argi)) {
+      svc_ctx->speed = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &threads_arg, argi)) {
+      svc_ctx->threads = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
+      svc_ctx->temporal_layering_mode =
+          enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+      if (svc_ctx->temporal_layering_mode) {
+        enc_cfg->g_error_resilient = 1;
+      }
     } else if (arg_match(&arg, &kf_dist_arg, argi)) {
       enc_cfg->kf_min_dist = arg_parse_uint(&arg);
       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -316,6 +360,185 @@ static void parse_command_line(int argc, const char **argv_,
       enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }
 
+#if OUTPUT_RC_STATS
+// For rate control encoding stats.
+struct RateControlStats {
+  // Number of input frames per layer.
+  int layer_input_frames[VPX_MAX_LAYERS];
+  // Total (cumulative) number of encoded frames per layer.
+  int layer_tot_enc_frames[VPX_MAX_LAYERS];
+  // Number of encoded non-key frames per layer.
+  int layer_enc_frames[VPX_MAX_LAYERS];
+  // Framerate per layer (cumulative).
+  double layer_framerate[VPX_MAX_LAYERS];
+  // Target average frame size per layer (per-frame-bandwidth per layer).
+  double layer_pfb[VPX_MAX_LAYERS];
+  // Actual average frame size per layer.
+  double layer_avg_frame_size[VPX_MAX_LAYERS];
+  // Average rate mismatch per layer (|target - actual| / target).
+  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
+  // Actual encoding bitrate per layer (cumulative).
+  double layer_encoding_bitrate[VPX_MAX_LAYERS];
+  // Average of the short-time encoder actual bitrate.
+  // TODO(marpan): Should we add these short-time stats for each layer?
+  double avg_st_encoding_bitrate;
+  // Variance of the short-time encoder actual bitrate.
+  double variance_st_encoding_bitrate;
+  // Window (number of frames) for computing short-time encoding bitrate.
+  int window_size;
+  // Number of window measurements.
+  int window_count;
+};
+
+// Note: these rate control stats assume only 1 key frame in the
+// sequence (i.e., first frame only).
+static void set_rate_control_stats(struct RateControlStats *rc,
+                                     vpx_codec_enc_cfg_t *cfg) {
+  unsigned int sl, tl;
+  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+  // per-frame-bandwidth, for the rate control encoding stats below.
+  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int tlayer0 = sl * cfg->ts_number_layers;
+      rc->layer_framerate[layer] =
+          framerate / cfg->ts_rate_decimator[tl];
+      if (tl > 0) {
+        rc->layer_pfb[layer] = 1000.0 *
+            (cfg->layer_target_bitrate[layer] -
+                cfg->layer_target_bitrate[layer - 1]) /
+            (rc->layer_framerate[layer] -
+                rc->layer_framerate[layer - 1]);
+      } else {
+        rc->layer_pfb[tlayer0] = 1000.0 *
+            cfg->layer_target_bitrate[tlayer0] /
+            rc->layer_framerate[tlayer0];
+      }
+      rc->layer_input_frames[layer] = 0;
+      rc->layer_enc_frames[layer] = 0;
+      rc->layer_tot_enc_frames[layer] = 0;
+      rc->layer_encoding_bitrate[layer] = 0.0;
+      rc->layer_avg_frame_size[layer] = 0.0;
+      rc->layer_avg_rate_mismatch[layer] = 0.0;
+    }
+  }
+  rc->window_count = 0;
+  rc->window_size = 15;
+  rc->avg_st_encoding_bitrate = 0.0;
+  rc->variance_st_encoding_bitrate = 0.0;
+}
+
+static void printout_rate_control_summary(struct RateControlStats *rc,
+                                          vpx_codec_enc_cfg_t *cfg,
+                                          int frame_cnt) {
+  unsigned int sl, tl;
+  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
+  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
+  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
+      cfg->ss_number_layers, cfg->ts_number_layers);
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int num_dropped = (tl > 0) ?
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
+      if (!sl)
+        tot_num_frames += rc->layer_input_frames[layer];
+      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
+          rc->layer_encoding_bitrate[layer] / tot_num_frames;
+      rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
+          rc->layer_enc_frames[layer];
+      rc->layer_avg_rate_mismatch[layer] =
+          100.0 * rc->layer_avg_rate_mismatch[layer] /
+          rc->layer_enc_frames[layer];
+      printf("For layer#: sl%d tl%d \n", sl, tl);
+      printf("Bitrate (target vs actual): %d %f.0 kbps\n",
+             cfg->layer_target_bitrate[layer],
+             rc->layer_encoding_bitrate[layer]);
+      printf("Average frame size (target vs actual): %f %f bits\n",
+             rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
+      printf("Average rate_mismatch: %f\n",
+             rc->layer_avg_rate_mismatch[layer]);
+      printf("Number of input frames, encoded (non-key) frames, "
+          "and percent dropped frames: %d %d %f.0 \n",
+          rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
+          100.0 * num_dropped / rc->layer_input_frames[layer]);
+      printf("\n");
+    }
+  }
+  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+  rc->variance_st_encoding_bitrate =
+      rc->variance_st_encoding_bitrate / rc->window_count -
+      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+      rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
+  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+         rc->avg_st_encoding_bitrate,
+         sqrt(rc->variance_st_encoding_bitrate),
+         perc_fluctuation);
+  if (frame_cnt != tot_num_frames)
+    die("Error: Number of input frames not equal to output encoded frames != "
+        "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+}
+
+vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+                                       size_t data_sz,
+                                       uint32_t sizes[8], int *count) {
+  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+  // it is a super frame index. If the last byte of real video compression
+  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+  // not the associated matching marker byte at the front of the index we have
+  // an invalid bitstream and need to return an error.
+
+  uint8_t marker;
+
+  marker = *(data + data_sz - 1);
+  *count = 0;
+
+
+  if ((marker & 0xe0) == 0xc0) {
+    const uint32_t frames = (marker & 0x7) + 1;
+    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+    const size_t index_sz = 2 + mag * frames;
+
+    // This chunk is marked as having a superframe index but doesn't have
+    // enough data for it, thus it's an invalid superframe index.
+    if (data_sz < index_sz)
+      return VPX_CODEC_CORRUPT_FRAME;
+
+    {
+      const uint8_t marker2 = *(data + data_sz - index_sz);
+
+      // This chunk is marked as having a superframe index but doesn't have
+      // the matching marker byte at the front of the index therefore it's an
+      // invalid chunk.
+      if (marker != marker2)
+        return VPX_CODEC_CORRUPT_FRAME;
+    }
+
+    {
+      // Found a valid superframe index.
+      uint32_t i, j;
+      const uint8_t *x = &data[data_sz - index_sz + 1];
+
+      for (i = 0; i < frames; ++i) {
+        uint32_t this_sz = 0;
+
+        for (j = 0; j < mag; ++j)
+          this_sz |= (*x++) << (j * 8);
+        sizes[i] = this_sz;
+      }
+      *count = frames;
+    }
+  }
+  return VPX_CODEC_OK;
+}
+#endif
+
 int main(int argc, const char **argv) {
   AppInput app_input = {0};
   VpxVideoWriter *writer = NULL;
@@ -332,7 +555,15 @@ int main(int argc, const char **argv) {
   FILE *infile = NULL;
   int end_of_stream = 0;
   int frames_received = 0;
-
+#if OUTPUT_RC_STATS
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+  struct RateControlStats rc;
+  vpx_svc_layer_id_t layer_id;
+  int sl, tl;
+  double sum_bitrate = 0.0;
+  double sum_bitrate2 = 0.0;
+  double framerate  = 30.0;
+#endif
   memset(&svc_ctx, 0, sizeof(svc_ctx));
   svc_ctx.log_print = 1;
   exec_name = argv[0];
@@ -359,6 +590,13 @@ int main(int argc, const char **argv) {
       VPX_CODEC_OK)
     die("Failed to initialize encoder\n");
 
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    set_rate_control_stats(&rc, &enc_cfg);
+    framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
+  }
+#endif
+
   info.codec_fourcc = VP9_FOURCC;
   info.time_base.numerator = enc_cfg.g_timebase.num;
   info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -370,11 +608,31 @@ int main(int argc, const char **argv) {
     if (!writer)
       die("Failed to open %s for writing\n", app_input.output_filename);
   }
+#if OUTPUT_RC_STATS
+  // For now, just write temporal layer streams.
+  // TODO(wonkap): do spatial by re-writing superframe.
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      char file_name[PATH_MAX];
+
+      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
+               app_input.output_filename, tl);
+      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+      if (!outfile[tl])
+        die("Failed to open %s for writing", file_name);
+    }
+  }
+#endif
 
   // skip initial frames
   for (i = 0; i < app_input.frames_to_skip; ++i)
     vpx_img_read(&raw, infile);
 
+  if (svc_ctx.speed != -1)
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
+  if (svc_ctx.threads)
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+
   // Encode frames
   while (!end_of_stream) {
     vpx_codec_iter_t iter = NULL;
@@ -386,7 +644,9 @@ int main(int argc, const char **argv) {
     }
 
     res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, VPX_DL_GOOD_QUALITY);
+                         pts, frame_duration, svc_ctx.speed >= 5 ?
+                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+
     printf("%s", vpx_svc_get_message(&svc_ctx));
     if (res != VPX_CODEC_OK) {
       die_codec(&codec, "Failed to encode frame");
@@ -395,11 +655,90 @@ int main(int argc, const char **argv) {
     while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
       switch (cx_pkt->kind) {
         case VPX_CODEC_CX_FRAME_PKT: {
-          if (cx_pkt->data.frame.sz > 0)
+          if (cx_pkt->data.frame.sz > 0) {
+#if OUTPUT_RC_STATS
+            uint32_t sizes[8];
+            int count = 0;
+#endif
             vpx_video_writer_write_frame(writer,
                                          cx_pkt->data.frame.buf,
                                          cx_pkt->data.frame.sz,
                                          cx_pkt->data.frame.pts);
+#if OUTPUT_RC_STATS
+            // TODO(marpan/wonkap): Put this (to line728) in separate function.
+            if (svc_ctx.output_rc_stat) {
+              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
+              parse_superframe_index(cx_pkt->data.frame.buf,
+                                     cx_pkt->data.frame.sz, sizes, &count);
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                        layer_id.temporal_layer_id];
+              }
+              for (tl = layer_id.temporal_layer_id;
+                  tl < enc_cfg.ts_number_layers; ++tl) {
+                vpx_video_writer_write_frame(outfile[tl],
+                                             cx_pkt->data.frame.buf,
+                                             cx_pkt->data.frame.sz,
+                                             cx_pkt->data.frame.pts);
+              }
+
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                for (tl = layer_id.temporal_layer_id;
+                    tl < enc_cfg.ts_number_layers; ++tl) {
+                  const int layer = sl * enc_cfg.ts_number_layers + tl;
+                  ++rc.layer_tot_enc_frames[layer];
+                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+                  // Keep count of rate control stats per layer, for non-key
+                  // frames.
+                  if (tl == layer_id.temporal_layer_id &&
+                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+                    rc.layer_avg_rate_mismatch[layer] +=
+                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+                        rc.layer_pfb[layer];
+                    ++rc.layer_enc_frames[layer];
+                  }
+                }
+              }
+
+              // Update for short-time encoding bitrate states, for moving
+              // window of size rc->window, shifted by rc->window / 2.
+              // Ignore first window segment, due to key frame.
+              if (frame_cnt > rc.window_size) {
+                tl = layer_id.temporal_layer_id;
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+                }
+                if (frame_cnt % rc.window_size == 0) {
+                  rc.window_count += 1;
+                  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+                  rc.variance_st_encoding_bitrate +=
+                      (sum_bitrate / rc.window_size) *
+                      (sum_bitrate / rc.window_size);
+                  sum_bitrate = 0.0;
+                }
+              }
+
+              // Second shifted window.
+              if (frame_cnt > rc.window_size + rc.window_size / 2) {
+               tl = layer_id.temporal_layer_id;
+               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+               }
+
+               if (frame_cnt > 2 * rc.window_size &&
+                  frame_cnt % rc.window_size == 0) {
+                 rc.window_count += 1;
+                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+                 rc.variance_st_encoding_bitrate +=
+                    (sum_bitrate2 / rc.window_size) *
+                    (sum_bitrate2 / rc.window_size);
+                 sum_bitrate2 = 0.0;
+               }
+              }
+            }
+#endif
+          }
 
           printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
@@ -424,25 +763,30 @@ int main(int argc, const char **argv) {
       pts += frame_duration;
     }
   }
-
   printf("Processed %d frames\n", frame_cnt);
-
   fclose(infile);
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
+    printf("\n");
+  }
+#endif
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
   if (app_input.passes == 2)
     stats_close(&app_input.rc_stats, 1);
-
   if (writer) {
     vpx_video_writer_close(writer);
   }
-
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      vpx_video_writer_close(outfile[tl]);
+    }
+  }
+#endif
   vpx_img_free(&raw);
-
   // display average size, psnr
   printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-
   vpx_svc_release(&svc_ctx);
-
   return EXIT_SUCCESS;
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
index 349875997b5..484deb5b360 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -28,7 +28,7 @@
 
 static const char *exec_name;
 
-void usage_exit() {
+void usage_exit(void) {
   exit(EXIT_FAILURE);
 }
 
@@ -70,6 +70,7 @@ struct RateControlMetrics {
   int window_size;
   // Number of window measurements.
   int window_count;
+  int layer_target_bitrate[VPX_MAX_LAYERS];
 };
 
 // Note: these rate control metrics assume only 1 key frame in the
@@ -85,13 +86,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
   // per-frame-bandwidth, for the rate control encoding stats below.
   const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
   rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
-  rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+  rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
       rc->layer_framerate[0];
   for (i = 0; i < cfg->ts_number_layers; ++i) {
     if (i > 0) {
       rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
       rc->layer_pfb[i] = 1000.0 *
-          (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
           (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
     }
     rc->layer_input_frames[i] = 0;
@@ -128,7 +129,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
     rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
         rc->layer_enc_frames[i];
     printf("For layer#: %d \n", i);
-    printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+    printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
            rc->layer_encoding_bitrate[i]);
     printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
            rc->layer_avg_frame_size[i]);
@@ -597,13 +598,16 @@ int main(int argc, char **argv) {
   for (i = min_args_base;
        (int)i < min_args_base + mode_to_num_layers[layering_mode];
        ++i) {
-    cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    if (strncmp(encoder->name, "vp8", 3) == 0)
+      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+    else if (strncmp(encoder->name, "vp9", 3) == 0)
+      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
   }
 
   // Real time parameters.
   cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
   cfg.rc_end_usage = VPX_CBR;
-  cfg.rc_resize_allowed = 0;
   cfg.rc_min_quantizer = 2;
   cfg.rc_max_quantizer = 56;
   if (strncmp(encoder->name, "vp9", 3) == 0)
@@ -614,6 +618,9 @@ int main(int argc, char **argv) {
   cfg.rc_buf_optimal_sz = 600;
   cfg.rc_buf_sz = 1000;
 
+  // Disable dynamic resizing by default.
+  cfg.rc_resize_allowed = 0;
+
   // Use 1 thread as default.
   cfg.g_threads = 1;
 
@@ -625,6 +632,8 @@ int main(int argc, char **argv) {
   // Disable automatic keyframe placement.
   cfg.kf_min_dist = cfg.kf_max_dist = 3000;
 
+  cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
   set_temporal_layer_pattern(layering_mode,
                              &cfg,
                              layer_flags,
@@ -633,8 +642,8 @@ int main(int argc, char **argv) {
   set_rate_control_metrics(&rc, &cfg);
 
   // Target bandwidth for the whole stream.
-  // Set to ts_target_bitrate for highest layer (total bitrate).
-  cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
+  // Set to layer_target_bitrate for highest layer (total bitrate).
+  cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
 
   // Open input file.
   if (!(infile = fopen(argv[1], "rb"))) {
@@ -677,15 +686,22 @@ int main(int argc, char **argv) {
     vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
     vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
   } else if (strncmp(encoder->name, "vp9", 3) == 0) {
-      vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
-      vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-      vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-      vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-      vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
-      vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
-      if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
-        die_codec(&codec, "Failed to set SVC");
+    vpx_svc_extra_cfg_t svc_params;
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
+    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
+      die_codec(&codec, "Failed to set SVC");
+    for (i = 0; i < cfg.ts_number_layers; ++i) {
+      svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
+      svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
     }
+    svc_params.scaling_factor_num[0] = cfg.g_h;
+    svc_params.scaling_factor_den[0] = cfg.g_h;
+    vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
   }
   if (strncmp(encoder->name, "vp8", 3) == 0) {
     vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
diff --git a/chromium/third_party/libvpx/source/libvpx/libs.mk b/chromium/third_party/libvpx/source/libvpx/libs.mk
index 6eee0039c29..6215990c9bf 100644
--- a/chromium/third_party/libvpx/source/libvpx/libs.mk
+++ b/chromium/third_party/libvpx/source/libvpx/libs.mk
@@ -25,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
 	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
           --sym=$(1) \
-          --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+          --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
           $$(RTCD_OPTIONS) $$^ > $$@
 CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
@@ -34,13 +34,6 @@ endef
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk
 
-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_LIBVPX variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-
 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -140,18 +133,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
 INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif
 
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
-CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-yes += build/make/version.sh
+CODEC_SRCS-yes += build/make/rtcd.pl
+CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-yes += vpx_ports/mem_ops.h
+CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
+CODEC_SRCS-yes += vpx_ports/vpx_once.h
+CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 endif
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
 
@@ -218,7 +211,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
             $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
             --src-path-bare="$(SRC_PATH_BARE)" \
 
-PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
+PROJECTS-yes += vpx.$(VCPROJ_SFX)
 
 vpx.$(VCPROJ_SFX): vpx_config.asm
 vpx.$(VCPROJ_SFX): $(RTCD)
@@ -226,31 +219,39 @@ vpx.$(VCPROJ_SFX): $(RTCD)
 endif
 else
 LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
-LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
+OBJS-yes += $(LIBVPX_OBJS)
+LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
 
-
-BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
-
 SO_VERSION_MAJOR := 2
 SO_VERSION_MINOR := 0
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
 LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF          := .dylib
 EXPORT_FILE             := libvpx.syms
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                              libvpx.dylib  )
 else
+ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO               := libvpx$(SO_VERSION_MAJOR).dll
+SHARED_LIB_SUF          := _dll.a
+EXPORT_FILE             := libvpx.def
+LIBVPX_SO_SYMLINKS      :=
+LIBVPX_SO_IMPLIB        := libvpx_dll.a
+else
 LIBVPX_SO               := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
+SHARED_LIB_SUF          := .so
 EXPORT_FILE             := libvpx.ver
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                              libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
                              libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
 endif
+endif
 
-LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
-                           $(notdir $(LIBVPX_SO_SYMLINKS))
+LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
+                           $(notdir $(LIBVPX_SO_SYMLINKS)) \
+                           $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
 $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
 $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
 $(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
@@ -268,6 +269,19 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
 	$(qexec)awk '{print "_"$$2}' $^ >$@
 CLEAN-OBJS += libvpx.syms
 
+libvpx.def: $(call enabled,CODEC_EXPORTS)
+	@echo "    [CREATE] $@"
+	$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
+	$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
+	$(qexec)echo "EXPORTS" >> $@
+	$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
+CLEAN-OBJS += libvpx.def
+
+libvpx_dll.a: $(LIBVPX_SO)
+	@echo "    [IMPLIB] $@"
+	$(qexec)emximp -o $@ $<
+CLEAN-OBJS += libvpx_dll.a
+
 define libvpx_symlink_template
 $(1): $(2)
 	@echo "    [LN]     $(2) $$@"
@@ -283,11 +297,12 @@ $(eval $(call libvpx_symlink_template,\
     $(LIBVPX_SO)))
 
 
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))
 
 
-LIBS-$(BUILD_LIBVPX) += vpx.pc
+LIBS-yes += vpx.pc
 vpx.pc: config.mk libs.mk
 	@echo "    [CREATE] $@"
 	$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -313,9 +328,6 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
 CLEAN-OBJS += vpx.pc
 endif
 
-LIBS-$(LIPO_LIBVPX) += libvpx.a
-$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
-
 #
 # Rule to make assembler configuration file from C configuration file
 #
@@ -354,11 +366,15 @@ LIBVPX_TEST_DATA_PATH ?= .
 
 include $(SRC_PATH_BARE)/test/test.mk
 LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
+LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                      $(call enabled,LIBVPX_TEST_DATA))
 libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
 
+TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
+TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
+
 libvpx_test_srcs.txt:
 	@echo "    [CREATE] $@"
 	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
@@ -422,7 +438,25 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
 
 PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
 
-LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
+LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
+test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
+	@echo "    [CREATE] $@"
+	$(qexec)$(GEN_VCPROJ) \
+            --exe \
+            --target=$(TOOLCHAIN) \
+            --name=test_intra_pred_speed \
+            -D_VARIADIC_MAX=10 \
+            --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
+            --ver=$(CONFIG_VS_VERSION) \
+            --src-path-bare="$(SRC_PATH_BARE)" \
+            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
+            -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+            -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
+endif  # TEST_INTRA_PRED_SPEED
 endif
 else
 
@@ -433,45 +467,54 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
 # Disabling pthreads globally will cause issues on darwin and possibly elsewhere
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
 endif
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
-LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
+GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
+GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(GTEST_OBJS)
+LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
 $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
 
 LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
-BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(LIBVPX_TEST_OBJS)
+BINS-yes += $(LIBVPX_TEST_BIN)
 
 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
-$(foreach bin,$(LIBVPX_TEST_BINS),\
-    $(if $(BUILD_LIBVPX),$(eval $(bin): \
-        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
-    $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
-        $(LIBVPX_TEST_OBJS) \
-        -L. -lvpx -lgtest $(extralibs) -lm)\
-        )))\
-    $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
-
-endif
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
+TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
+$(LIBVPX_TEST_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
+              $(LIBVPX_TEST_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
+BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
+
+$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
+              $(TEST_INTRA_PRED_SPEED_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))
+endif  # TEST_INTRA_PRED_SPEED
+
+endif  # CONFIG_UNIT_TESTS
 
 # Install test sources only if codec source is included
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
     $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)
 
 define test_shard_template
 test:: test_shard.$(1)
-test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
+test-no-data-check:: test_shard_ndc.$(1)
+test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
 	@set -e; \
-	 for t in $(LIBVPX_TEST_BINS); do \
-	   export GTEST_SHARD_INDEX=$(1); \
-	   export GTEST_TOTAL_SHARDS=$(2); \
-	   $$$$t; \
-	 done
+	 export GTEST_SHARD_INDEX=$(1); \
+	 export GTEST_TOTAL_SHARDS=$(2); \
+	 $(LIBVPX_TEST_BIN)
+test_shard.$(1): testdata
 .PHONY: test_shard.$(1)
 endef
 
@@ -516,15 +559,16 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
 endif
-utiltest: testdata
+utiltest utiltest-no-data-check:
 	$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
 	$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
+utiltest: testdata
 else
-utiltest:
+utiltest utiltest-no-data-check:
 	@echo Unit tests must be enabled to make the utiltest target.
 endif
 
@@ -542,11 +586,12 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
 endif
-exampletest: examples testdata
+exampletest exampletest-no-data-check: examples
 	$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(EXAMPLES_BIN_PATH)
+exampletest: testdata
 else
-exampletest:
+exampletest exampletest-no-data-check:
 	@echo Unit tests must be enabled to make the exampletest target.
 endif
diff --git a/chromium/third_party/libvpx/source/libvpx/md5_utils.c b/chromium/third_party/libvpx/source/libvpx/md5_utils.c
index 8fb26e2084b..f4f893a2d67 100644
--- a/chromium/third_party/libvpx/source/libvpx/md5_utils.c
+++ b/chromium/third_party/libvpx/source/libvpx/md5_utils.c
@@ -24,7 +24,7 @@
 
 #include "md5_utils.h"
 
-void
+static void
 byteSwap(UWORD32 *buf, unsigned words) {
   md5byte *p;
 
diff --git a/chromium/third_party/libvpx/source/libvpx/rate_hist.c b/chromium/third_party/libvpx/source/libvpx/rate_hist.c
index 1cef19bdd64..a77222b1618 100644
--- a/chromium/third_party/libvpx/source/libvpx/rate_hist.c
+++ b/chromium/third_party/libvpx/source/libvpx/rate_hist.c
@@ -88,6 +88,9 @@ void update_rate_histogram(struct rate_hist *hist,
   if (now < cfg->rc_buf_initial_sz)
     return;
 
+  if (!cfg->rc_target_bitrate)
+    return;
+
   then = now;
 
   /* Sum the size over the past rc_buf_sz ms */
diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.c b/chromium/third_party/libvpx/source/libvpx/tools_common.c
index e243a91575a..901734e0f34 100644
--- a/chromium/third_party/libvpx/source/libvpx/tools_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/tools_common.c
@@ -140,7 +140,7 @@ static const VpxInterface vpx_encoders[] = {
 #endif
 };
 
-int get_vpx_encoder_count() {
+int get_vpx_encoder_count(void) {
   return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]);
 }
 
@@ -170,7 +170,7 @@ static const VpxInterface vpx_decoders[] = {
 #endif
 };
 
-int get_vpx_decoder_count() {
+int get_vpx_decoder_count(void) {
   return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]);
 }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.h b/chromium/third_party/libvpx/source/libvpx/tools_common.h
index de6c38f0f5b..adccec88bb3 100644
--- a/chromium/third_party/libvpx/source/libvpx/tools_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/tools_common.h
@@ -16,6 +16,7 @@
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_image.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/msvc.h"
 
 #if CONFIG_ENCODERS
 #include "./y4minput.h"
@@ -34,7 +35,6 @@
 #if CONFIG_OS_SUPPORT
 #if defined(_MSC_VER)
 #include <io.h>  /* NOLINT */
-#define snprintf _snprintf
 #define isatty   _isatty
 #define fileno   _fileno
 #else
@@ -89,6 +89,7 @@ struct VpxInputContext {
   enum VideoFileType file_type;
   uint32_t width;
   uint32_t height;
+  struct VpxRational pixel_aspect_ratio;
   vpx_img_fmt_t fmt;
   vpx_bit_depth_t bit_depth;
   int only_i420;
@@ -119,7 +120,7 @@ void warn(const char *fmt, ...);
 void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
 
 /* The tool including this file must define usage_exit() */
-void usage_exit() VPX_NO_RETURN;
+void usage_exit(void) VPX_NO_RETURN;
 
 #undef VPX_NO_RETURN
 
@@ -131,11 +132,11 @@ typedef struct VpxInterface {
   vpx_codec_iface_t *(*const codec_interface)();
 } VpxInterface;
 
-int get_vpx_encoder_count();
+int get_vpx_encoder_count(void);
 const VpxInterface *get_vpx_encoder_by_index(int i);
 const VpxInterface *get_vpx_encoder_by_name(const char *name);
 
-int get_vpx_decoder_count();
+int get_vpx_decoder_count(void);
 const VpxInterface *get_vpx_decoder_by_index(int i);
 const VpxInterface *get_vpx_decoder_by_name(const char *name);
 const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
index b9d875a2ff7..8dfd4ce203e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
@@ -10,6 +10,7 @@
 
 
 #include "vpx_config.h"
+#include "alloccommon.h"
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
deleted file mode 100644
index 39919579f80..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+++ /dev/null
@@ -1,154 +0,0 @@
-;
-;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_variance16x16_armv6|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-|vp8_variance16x16_armv6| PROC
-
-    stmfd   sp!, {r4-r12, lr}
-
-    pld     [r0, r1, lsl #0]
-    pld     [r2, r3, lsl #0]
-
-    mov     r8, #0              ; initialize sum = 0
-    mov     r11, #0             ; initialize sse = 0
-    mov     r12, #16            ; set loop counter to 16 (=block height)
-
-loop
-    ; 1st 4 pixels
-    ldr     r4, [r0, #0]        ; load 4 src pixels
-    ldr     r5, [r2, #0]        ; load 4 ref pixels
-
-    mov     lr, #0              ; constant zero
-
-    usub8   r6, r4, r5          ; calculate difference
-    pld     [r0, r1, lsl #1]
-    sel     r7, r6, lr          ; select bytes with positive difference
-    usub8   r9, r5, r4          ; calculate difference with reversed operands
-    pld     [r2, r3, lsl #1]
-    sel     r6, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r4, r7, lr          ; calculate sum of positive differences
-    usad8   r5, r6, lr          ; calculate sum of negative differences
-    orr     r6, r6, r7          ; differences of all 4 pixels
-    ; calculate total sum
-    adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r5, r6              ; byte (two pixels) to halfwords
-    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
-    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
-
-    ; 2nd 4 pixels
-    ldr     r4, [r0, #4]        ; load 4 src pixels
-    ldr     r5, [r2, #4]        ; load 4 ref pixels
-    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
-
-    usub8   r6, r4, r5          ; calculate difference
-    sel     r7, r6, lr          ; select bytes with positive difference
-    usub8   r9, r5, r4          ; calculate difference with reversed operands
-    sel     r6, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r4, r7, lr          ; calculate sum of positive differences
-    usad8   r5, r6, lr          ; calculate sum of negative differences
-    orr     r6, r6, r7          ; differences of all 4 pixels
-
-    ; calculate total sum
-    add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r5, r6              ; byte (two pixels) to halfwords
-    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
-    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
-
-    ; 3rd 4 pixels
-    ldr     r4, [r0, #8]        ; load 4 src pixels
-    ldr     r5, [r2, #8]        ; load 4 ref pixels
-    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
-
-    usub8   r6, r4, r5          ; calculate difference
-    sel     r7, r6, lr          ; select bytes with positive difference
-    usub8   r9, r5, r4          ; calculate difference with reversed operands
-    sel     r6, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r4, r7, lr          ; calculate sum of positive differences
-    usad8   r5, r6, lr          ; calculate sum of negative differences
-    orr     r6, r6, r7          ; differences of all 4 pixels
-
-    ; calculate total sum
-    add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r5, r6              ; byte (two pixels) to halfwords
-    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
-    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
-
-    ; 4th 4 pixels
-    ldr     r4, [r0, #12]       ; load 4 src pixels
-    ldr     r5, [r2, #12]       ; load 4 ref pixels
-    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
-
-    usub8   r6, r4, r5          ; calculate difference
-    add     r0, r0, r1          ; set src_ptr to next row
-    sel     r7, r6, lr          ; select bytes with positive difference
-    usub8   r9, r5, r4          ; calculate difference with reversed operands
-    add     r2, r2, r3          ; set dst_ptr to next row
-    sel     r6, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r4, r7, lr          ; calculate sum of positive differences
-    usad8   r5, r6, lr          ; calculate sum of negative differences
-    orr     r6, r6, r7          ; differences of all 4 pixels
-
-    ; calculate total sum
-    add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r5, r6              ; byte (two pixels) to halfwords
-    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
-    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
-    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
-
-
-    subs    r12, r12, #1
-
-    bne     loop
-
-    ; return stuff
-    ldr     r6, [sp, #40]       ; get address of sse
-    mul     r0, r8, r8          ; sum * sum
-    str     r11, [r6]           ; store sse
-    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
-
-    ldmfd   sp!, {r4-r12, pc}
-
-    ENDP
-
-    END
-
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
deleted file mode 100644
index 915ee499309..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+++ /dev/null
@@ -1,101 +0,0 @@
-;
-;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_variance8x8_armv6|
-
-    ARM
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-|vp8_variance8x8_armv6| PROC
-
-    push    {r4-r10, lr}
-
-    pld     [r0, r1, lsl #0]
-    pld     [r2, r3, lsl #0]
-
-    mov     r12, #8             ; set loop counter to 8 (=block height)
-    mov     r4, #0              ; initialize sum = 0
-    mov     r5, #0              ; initialize sse = 0
-
-loop
-    ; 1st 4 pixels
-    ldr     r6, [r0, #0x0]      ; load 4 src pixels
-    ldr     r7, [r2, #0x0]      ; load 4 ref pixels
-
-    mov     lr, #0              ; constant zero
-
-    usub8   r8, r6, r7          ; calculate difference
-    pld     [r0, r1, lsl #1]
-    sel     r10, r8, lr         ; select bytes with positive difference
-    usub8   r9, r7, r6          ; calculate difference with reversed operands
-    pld     [r2, r3, lsl #1]
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r6, r10, lr         ; calculate sum of positive differences
-    usad8   r7, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r10         ; differences of all 4 pixels
-    ; calculate total sum
-    add    r4, r4, r6           ; add positive differences to sum
-    sub    r4, r4, r7           ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r7, r8              ; byte (two pixels) to halfwords
-    uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
-    smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
-
-    ; 2nd 4 pixels
-    ldr     r6, [r0, #0x4]      ; load 4 src pixels
-    ldr     r7, [r2, #0x4]      ; load 4 ref pixels
-    smlad   r5, r10, r10, r5    ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r6, r7          ; calculate difference
-    add     r0, r0, r1          ; set src_ptr to next row
-    sel     r10, r8, lr         ; select bytes with positive difference
-    usub8   r9, r7, r6          ; calculate difference with reversed operands
-    add     r2, r2, r3          ; set dst_ptr to next row
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r6, r10, lr         ; calculate sum of positive differences
-    usad8   r7, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r10         ; differences of all 4 pixels
-
-    ; calculate total sum
-    add     r4, r4, r6          ; add positive differences to sum
-    sub     r4, r4, r7          ; subtract negative differences from sum
-
-    ; calculate sse
-    uxtb16  r7, r8              ; byte (two pixels) to halfwords
-    uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
-    smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
-    subs    r12, r12, #1        ; next row
-    smlad   r5, r10, r10, r5    ; dual signed multiply, add and accumulate (2)
-
-    bne     loop
-
-    ; return stuff
-    ldr     r8, [sp, #32]       ; get address of sse
-    mul     r1, r4, r4          ; sum * sum
-    str     r5, [r8]            ; store sse
-    sub     r0, r5, r1, ASR #6  ; return (sse - ((sum * sum) >> 6))
-
-    pop     {r4-r10, pc}
-
-    ENDP
-
-    END
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c
deleted file mode 100644
index 1b1979073e5..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c
+++ /dev/null
@@ -1,320 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-#include "vpx_ports/mem.h"
-
-unsigned int vp8_variance16x16_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-    uint32x2_t d0u32, d10u32;
-    int64x1_t d0s64, d1s64;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int32x4_t q8s32, q9s32, q10s32;
-    int64x2_t q0s64, q1s64, q5s64;
-
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 8; i++) {
-        q0u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q1u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        __builtin_prefetch(src_ptr);
-
-        q2u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        q3u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        __builtin_prefetch(ref_ptr);
-
-        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
-        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
-        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
-        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
-        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-    }
-
-    q10s32 = vaddq_s32(q10s32, q9s32);
-    q0s64 = vpaddlq_s32(q8s32);
-    q1s64 = vpaddlq_s32(q10s32);
-
-    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
-                      vreinterpret_s32_s64(d0s64));
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
-    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-    return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance16x8_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-    uint32x2_t d0u32, d10u32;
-    int64x1_t d0s64, d1s64;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int32x4_t q8s32, q9s32, q10s32;
-    int64x2_t q0s64, q1s64, q5s64;
-
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 4; i++) {  // variance16x8_neon_loop
-        q0u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q1u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        __builtin_prefetch(src_ptr);
-
-        q2u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        q3u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        __builtin_prefetch(ref_ptr);
-
-        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
-        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
-        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
-        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
-        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-    }
-
-    q10s32 = vaddq_s32(q10s32, q9s32);
-    q0s64 = vpaddlq_s32(q8s32);
-    q1s64 = vpaddlq_s32(q10s32);
-
-    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
-                      vreinterpret_s32_s64(d0s64));
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-    return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance8x16_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    uint8x8_t d0u8, d2u8, d4u8, d6u8;
-    int16x4_t d22s16, d23s16, d24s16, d25s16;
-    uint32x2_t d0u32, d10u32;
-    int64x1_t d0s64, d1s64;
-    uint16x8_t q11u16, q12u16;
-    int32x4_t q8s32, q9s32, q10s32;
-    int64x2_t q0s64, q1s64, q5s64;
-
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 8; i++) {  // variance8x16_neon_loop
-        d0u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-        d2u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-        __builtin_prefetch(src_ptr);
-
-        d4u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        d6u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        __builtin_prefetch(ref_ptr);
-
-        q11u16 = vsubl_u8(d0u8, d4u8);
-        q12u16 = vsubl_u8(d2u8, d6u8);
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-    }
-
-    q10s32 = vaddq_s32(q10s32, q9s32);
-    q0s64 = vpaddlq_s32(q8s32);
-    q1s64 = vpaddlq_s32(q10s32);
-
-    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
-                      vreinterpret_s32_s64(d0s64));
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-    return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance8x8_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-    uint32x2_t d0u32, d10u32;
-    int64x1_t d0s64, d1s64;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int32x4_t q8s32, q9s32, q10s32;
-    int64x2_t q0s64, q1s64, q5s64;
-
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 2; i++) {  // variance8x8_neon_loop
-        d0u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-        d1u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-        d2u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-        d3u8 = vld1_u8(src_ptr);
-        src_ptr += source_stride;
-
-        d4u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        d5u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        d6u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        d7u8 = vld1_u8(ref_ptr);
-        ref_ptr += recon_stride;
-
-        q11u16 = vsubl_u8(d0u8, d4u8);
-        q12u16 = vsubl_u8(d1u8, d5u8);
-        q13u16 = vsubl_u8(d2u8, d6u8);
-        q14u16 = vsubl_u8(d3u8, d7u8);
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
-        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
-        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
-        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
-        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-    }
-
-    q10s32 = vaddq_s32(q10s32, q9s32);
-    q0s64 = vpaddlq_s32(q8s32);
-    q1s64 = vpaddlq_s32(q10s32);
-
-    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
-                      vreinterpret_s32_s64(d0s64));
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
-    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-    return vget_lane_u32(d0u32, 0);
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
index 974d3b6532b..3c8ed11f070 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
@@ -12,7 +12,7 @@
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
-static const uint16_t bilinear_taps_coeff[8][2] = {
+static const uint8_t bilinear_taps_coeff[8][2] = {
     {128,   0},
     {112,  16},
     { 96,  32},
@@ -972,9 +972,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       int pixel_step,
                                       unsigned int output_height,
                                       unsigned int output_width,
-                                      const uint16_t *vpx_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]);
+                                      const uint8_t *vpx_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vpx_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vpx_filter[1]);
   unsigned int i;
   for (i = 0; i < output_height; ++i) {
     const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
index 467a509420e..0f293f03d94 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
@@ -9,10 +9,14 @@
  */
 
 #include "vpx_config.h"
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "vp8/common/variance.h"
 #include "vp8/common/filter.h"
 
+// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
+#if CONFIG_VP8_ENCODER
+
 #if HAVE_MEDIA
 #include "vp8/common/arm/bilinearfilter_arm.h"
 
@@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
     vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
                                              8, 8, 8, VFilter);
 
-    return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
-                                   dst_pixels_per_line, sse);
+    return vpx_variance8x8_media(second_pass, 8, dst_ptr,
+                                 dst_pixels_per_line, sse);
 }
 
 unsigned int vp8_sub_pixel_variance16x16_armv6
@@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
         vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
                                                  16, 16, 16, VFilter);
 
-        var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
-                                       dst_pixels_per_line, sse);
+        var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
+                                      dst_pixels_per_line, sse);
     }
     return var;
 }
 
-#endif /* HAVE_MEDIA */
+#endif  // HAVE_MEDIA
 
 
 #if HAVE_NEON
@@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon
     return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
 }
 
-#endif
+#endif  // HAVE_NEON
+#endif  // CONFIG_VP8_ENCODER
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
index fd96c863491..e3392913f63 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
@@ -11,11 +11,11 @@
 
 #include <string.h>
 
-#include "vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 
 /* Copy 2 macroblocks to a buffer */
-void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
+void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
                     unsigned char *dst_ptr, int dst_stride,
                     int height)
 {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
index 25266f86827..84c608effaa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
@@ -10,6 +10,7 @@
 
 
 #include "filter.h"
+#include "./vp8_rtcd.h"
 
 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
 {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
index d84df334810..4393ced48c8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
@@ -17,6 +17,7 @@
 #include "vpx_ports/x86.h"
 #endif
 #include "vp8/common/onyxc_int.h"
+#include "vp8/common/systemdependent.h"
 
 #if CONFIG_MULTITHREAD
 #if HAVE_UNISTD_H && !defined(__OS2__)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
index 47af52f04e7..f5403c5aaf7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp8_rtcd.h"
 
 /****************************************************************************
  * Notes:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
index d12dea19364..5c0680f42d4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
@@ -151,14 +151,14 @@ static void multiframe_quality_enhance_block
 
     if (blksize == 16)
     {
-        actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
-        act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+        actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+        act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
 #ifdef USE_SSD
-        vp8_variance16x16(y, y_stride, yd, yd_stride, &sse);
+        vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
         sad = (sse + 128)>>8;
-        vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
+        vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
         usad = (sse + 32)>>6;
-        vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
+        vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
         vsad = (sse + 32)>>6;
 #else
         sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
@@ -168,14 +168,14 @@ static void multiframe_quality_enhance_block
     }
     else /* if (blksize == 8) */
     {
-        actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
-        act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+        actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+        act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
 #ifdef USE_SSD
-        vp8_variance8x8(y, y_stride, yd, yd_stride, &sse);
+        vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
         sad = (sse + 32)>>6;
-        vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
+        vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
         usad = (sse + 8)>>4;
-        vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
+        vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
         vsad = (sse + 8)>>4;
 #else
         sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
index 266431a3240..a4e6ae170c9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON                 *cm,
     }
 }
 
-double vp8_gaussian(double sigma, double mu, double x)
+static double gaussian(double sigma, double mu, double x)
 {
     return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
            (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
 
         for (i = -32; i < 32; i++)
         {
-            const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
+            const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
 
             if (v)
             {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
index 56b7db7ec33..fed20887f8e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
@@ -215,7 +215,7 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
 $vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
 
 add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-# Disable neon while investigating https://code.google.com/p/webm/issues/detail?id=817
+#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
 specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2/;
 $vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
 $vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
@@ -233,35 +233,11 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
 $vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
 
 add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
+specialize qw/vp8_bilinear_predict4x4 mmx media/;
 $vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
 
 #
-# Whole-pixel Variance
-#
-add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance4x4 mmx sse2/;
-$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
-
-add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance8x8 mmx sse2 media neon/;
-$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
-$vp8_variance8x8_media=vp8_variance8x8_armv6;
-
-add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance8x16 mmx sse2 neon/;
-$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
-
-add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance16x8 mmx sse2 neon/;
-$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
-
-add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance16x16 mmx sse2 media neon/;
-$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
-$vp8_variance16x16_media=vp8_variance16x16_armv6;
-
-#
 # Sub-pixel Variance
 #
 add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
@@ -269,10 +245,9 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
 $vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
 
 add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media/;
 $vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
 $vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
-$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
 
 add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
 specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
@@ -309,31 +284,10 @@ $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
 if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
 
 #
-# Sum of squares (vector)
-#
-add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
-specialize qw/vp8_get_mb_ss mmx sse2/;
-
-#
-# SSE (Sum Squared Error)
-#
-add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
-$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
-
-add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_mse16x16 mmx sse2 media neon/;
-$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
-$vp8_mse16x16_media=vp8_mse16x16_armv6;
-
-add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
-specialize qw/vp8_get4x4sse_cs mmx neon/;
-
-#
 # Block copy
 #
 if ($opts{arch} =~ /x86/) {
-    add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+    add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
     specialize qw/vp8_copy32xn sse2 sse3/;
 }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
index 552a28025e6..c6c9f41bf6a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
@@ -29,7 +29,7 @@ typedef unsigned int(*vpx_sad_fn_t)(
 typedef void (*vp8_copy32xn_fn_t)(
     const unsigned char *src_ptr,
     int source_stride,
-    const unsigned char *ref_ptr,
+    unsigned char *ref_ptr,
     int ref_stride,
     int n);
 
@@ -39,6 +39,7 @@ typedef void (*vpx_sad_multi_fn_t)(
     const unsigned char *ref_array,
     int  ref_stride,
     unsigned int *sad_array);
+
 typedef void (*vpx_sad_multi_d_fn_t)
     (
      const unsigned char *src_ptr,
@@ -48,7 +49,7 @@ typedef void (*vpx_sad_multi_d_fn_t)
      unsigned int *sad_array
     );
 
-typedef unsigned int (*vp8_variance_fn_t)
+typedef unsigned int (*vpx_variance_fn_t)
     (
      const unsigned char *src_ptr,
      int source_stride,
@@ -68,37 +69,14 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
       unsigned int *sse
     );
 
-typedef void (*vp8_ssimpf_fn_t)
-      (
-        unsigned char *s,
-        int sp,
-        unsigned char *r,
-        int rp,
-        unsigned long *sum_s,
-        unsigned long *sum_r,
-        unsigned long *sum_sq_s,
-        unsigned long *sum_sq_r,
-        unsigned long *sum_sxr
-      );
-
-typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
-
-typedef unsigned int (*vp8_get16x16prederror_fn_t)
-    (
-     const unsigned char *src_ptr,
-     int source_stride,
-     const unsigned char *ref_ptr,
-     int  ref_stride
-    );
-
 typedef struct variance_vtable
 {
     vpx_sad_fn_t            sdf;
-    vp8_variance_fn_t       vf;
+    vpx_variance_fn_t       vf;
     vp8_subpixvariance_fn_t svf;
-    vp8_variance_fn_t       svf_halfpix_h;
-    vp8_variance_fn_t       svf_halfpix_v;
-    vp8_variance_fn_t       svf_halfpix_hv;
+    vpx_variance_fn_t       svf_halfpix_h;
+    vpx_variance_fn_t       svf_halfpix_v;
+    vpx_variance_fn_t       svf_halfpix_hv;
     vpx_sad_multi_fn_t      sdx3f;
     vpx_sad_multi_fn_t      sdx8f;
     vpx_sad_multi_d_fn_t    sdx4df;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
index 773b655efc5..02915a4defd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
@@ -8,43 +8,34 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
-#include "variance.h"
+#include "./vp8_rtcd.h"
 #include "filter.h"
+#include "variance.h"
 
-
-unsigned int vp8_get_mb_ss_c
-(
-    const short *src_ptr
-)
-{
-    unsigned int i = 0, sum = 0;
-
-    do
-    {
-        sum += (src_ptr[i] * src_ptr[i]);
-        i++;
-    }
-    while (i < 256);
-
-    return sum;
+/* This is a bad idea.
+ * ctz = count trailing zeros */
+static int ctz(int a) {
+  int b = 0;
+  while (a != 1) {
+    a >>= 1;
+    b++;
+  }
+  return b;
 }
 
-
-static void variance(
+static unsigned int variance(
     const unsigned char *src_ptr,
     int  source_stride,
     const unsigned char *ref_ptr,
     int  recon_stride,
     int  w,
     int  h,
-    unsigned int *sse,
-    int *sum)
+    unsigned int *sse)
 {
     int i, j;
-    int diff;
+    int diff, sum;
 
-    *sum = 0;
+    sum = 0;
     *sse = 0;
 
     for (i = 0; i < h; i++)
@@ -52,114 +43,17 @@ static void variance(
         for (j = 0; j < w; j++)
         {
             diff = src_ptr[j] - ref_ptr[j];
-            *sum += diff;
+            sum += diff;
             *sse += diff * diff;
         }
 
         src_ptr += source_stride;
         ref_ptr += recon_stride;
     }
-}
-
-
-unsigned int vp8_variance16x16_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
 
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vp8_variance8x16_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 7));
+    return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
 }
 
-unsigned int vp8_variance16x8_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 7));
-}
-
-
-unsigned int vp8_variance8x8_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 6));
-}
-
-unsigned int vp8_variance4x4_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 4));
-}
-
-
-unsigned int vp8_mse16x16_c(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
-    *sse = var;
-    return var;
-}
-
-
 /****************************************************************************
  *
  *  ROUTINE       : filter_block2d_bil_first_pass
@@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
     /* Now filter Verticaly */
     var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
 
-    return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+    return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
 }
 
 
@@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
     var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
 
-    return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+    return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
 }
 
 unsigned int vp8_sub_pixel_variance16x16_c
@@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
     var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
 
-    return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+    return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
 }
 
 
@@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
 }
 
 
-unsigned int vp8_sub_pixel_mse16x16_c
-(
-    const unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    const unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-)
-{
-    vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
-    return *sse;
-}
-
 unsigned int vp8_sub_pixel_variance16x8_c
 (
     const unsigned char  *src_ptr,
@@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
     var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
 
-    return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+    return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
 }
 
 unsigned int vp8_sub_pixel_variance8x16_c
@@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
     var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
     var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
 
-    return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+    return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
index 761433c11ea..26de5e86097 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
@@ -13,393 +13,6 @@
 
 %define xmm_filter_shift            7
 
-;unsigned int vp8_get_mb_ss_sse2
-;(
-;    short *src_ptr
-;)
-global sym(vp8_get_mb_ss_sse2) PRIVATE
-sym(vp8_get_mb_ss_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 1
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    sub         rsp, 16
-    ; end prolog
-
-
-        mov         rax, arg(0) ;[src_ptr]
-        mov         rcx, 8
-        pxor        xmm4, xmm4
-
-.NEXTROW:
-        movdqa      xmm0, [rax]
-        movdqa      xmm1, [rax+16]
-        movdqa      xmm2, [rax+32]
-        movdqa      xmm3, [rax+48]
-        pmaddwd     xmm0, xmm0
-        pmaddwd     xmm1, xmm1
-        pmaddwd     xmm2, xmm2
-        pmaddwd     xmm3, xmm3
-
-        paddd       xmm0, xmm1
-        paddd       xmm2, xmm3
-        paddd       xmm4, xmm0
-        paddd       xmm4, xmm2
-
-        add         rax, 0x40
-        dec         rcx
-        ja          .NEXTROW
-
-        movdqa      xmm3,xmm4
-        psrldq      xmm4,8
-        paddd       xmm4,xmm3
-        movdqa      xmm3,xmm4
-        psrldq      xmm4,4
-        paddd       xmm4,xmm3
-        movq        rax,xmm4
-
-
-    ; begin epilog
-    add rsp, 16
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;unsigned int vp8_get16x16var_sse2
-;(
-;    unsigned char   *  src_ptr,
-;    int             source_stride,
-;    unsigned char   *  ref_ptr,
-;    int             recon_stride,
-;    unsigned int    *  SSE,
-;    int             *  Sum
-;)
-global sym(vp8_get16x16var_sse2) PRIVATE
-sym(vp8_get16x16var_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    SAVE_XMM 7
-    push rbx
-    push rsi
-    push rdi
-    ; end prolog
-
-        mov         rsi,            arg(0) ;[src_ptr]
-        mov         rdi,            arg(2) ;[ref_ptr]
-
-        movsxd      rax,            DWORD PTR arg(1) ;[source_stride]
-        movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride]
-
-        ; Prefetch data
-        lea             rcx,    [rax+rax*2]
-        prefetcht0      [rsi]
-        prefetcht0      [rsi+rax]
-        prefetcht0      [rsi+rax*2]
-        prefetcht0      [rsi+rcx]
-        lea             rbx,    [rsi+rax*4]
-        prefetcht0      [rbx]
-        prefetcht0      [rbx+rax]
-        prefetcht0      [rbx+rax*2]
-        prefetcht0      [rbx+rcx]
-
-        lea             rcx,    [rdx+rdx*2]
-        prefetcht0      [rdi]
-        prefetcht0      [rdi+rdx]
-        prefetcht0      [rdi+rdx*2]
-        prefetcht0      [rdi+rcx]
-        lea             rbx,    [rdi+rdx*4]
-        prefetcht0      [rbx]
-        prefetcht0      [rbx+rdx]
-        prefetcht0      [rbx+rdx*2]
-        prefetcht0      [rbx+rcx]
-
-        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
-        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
-
-        pxor        xmm6,           xmm6                        ; clear xmm6 for accumulating sse
-        mov         rcx,            16
-
-.var16loop:
-        movdqu      xmm1,           XMMWORD PTR [rsi]
-        movdqu      xmm2,           XMMWORD PTR [rdi]
-
-        prefetcht0      [rsi+rax*8]
-        prefetcht0      [rdi+rdx*8]
-
-        movdqa      xmm3,           xmm1
-        movdqa      xmm4,           xmm2
-
-
-        punpcklbw   xmm1,           xmm0
-        punpckhbw   xmm3,           xmm0
-
-        punpcklbw   xmm2,           xmm0
-        punpckhbw   xmm4,           xmm0
-
-
-        psubw       xmm1,           xmm2
-        psubw       xmm3,           xmm4
-
-        paddw       xmm7,           xmm1
-        pmaddwd     xmm1,           xmm1
-
-        paddw       xmm7,           xmm3
-        pmaddwd     xmm3,           xmm3
-
-        paddd       xmm6,           xmm1
-        paddd       xmm6,           xmm3
-
-        add         rsi,            rax
-        add         rdi,            rdx
-
-        sub         rcx,            1
-        jnz         .var16loop
-
-
-        movdqa      xmm1,           xmm6
-        pxor        xmm6,           xmm6
-
-        pxor        xmm5,           xmm5
-        punpcklwd   xmm6,           xmm7
-
-        punpckhwd   xmm5,           xmm7
-        psrad       xmm5,           16
-
-        psrad       xmm6,           16
-        paddd       xmm6,           xmm5
-
-        movdqa      xmm2,           xmm1
-        punpckldq   xmm1,           xmm0
-
-        punpckhdq   xmm2,           xmm0
-        movdqa      xmm7,           xmm6
-
-        paddd       xmm1,           xmm2
-        punpckldq   xmm6,           xmm0
-
-        punpckhdq   xmm7,           xmm0
-        paddd       xmm6,           xmm7
-
-        movdqa      xmm2,           xmm1
-        movdqa      xmm7,           xmm6
-
-        psrldq      xmm1,           8
-        psrldq      xmm6,           8
-
-        paddd       xmm7,           xmm6
-        paddd       xmm1,           xmm2
-
-        mov         rax,            arg(5) ;[Sum]
-        mov         rdi,            arg(4) ;[SSE]
-
-        movd DWORD PTR [rax],       xmm7
-        movd DWORD PTR [rdi],       xmm1
-
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    pop rbx
-    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-
-
-;unsigned int vp8_get8x8var_sse2
-;(
-;    unsigned char   *  src_ptr,
-;    int             source_stride,
-;    unsigned char   *  ref_ptr,
-;    int             recon_stride,
-;    unsigned int    *  SSE,
-;    int             *  Sum
-;)
-global sym(vp8_get8x8var_sse2) PRIVATE
-sym(vp8_get8x8var_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    SAVE_XMM 7
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    sub         rsp, 16
-    ; end prolog
-
-        mov         rsi,            arg(0) ;[src_ptr]
-        mov         rdi,            arg(2) ;[ref_ptr]
-
-        movsxd      rax,            DWORD PTR arg(1) ;[source_stride]
-        movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride]
-
-        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
-        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
-
-        movq        xmm1,           QWORD PTR [rsi]
-        movq        xmm2,           QWORD PTR [rdi]
-
-        punpcklbw   xmm1,           xmm0
-        punpcklbw   xmm2,           xmm0
-
-        psubsw      xmm1,           xmm2
-        paddw       xmm7,           xmm1
-
-        pmaddwd     xmm1,           xmm1
-
-        movq        xmm2,           QWORD PTR[rsi + rax]
-        movq        xmm3,           QWORD PTR[rdi + rdx]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-
-        movq        xmm2,           QWORD PTR[rsi + rax * 2]
-        movq        xmm3,           QWORD PTR[rdi + rdx * 2]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-
-        lea         rsi,            [rsi + rax * 2]
-        lea         rdi,            [rdi + rdx * 2]
-        movq        xmm2,           QWORD PTR[rsi + rax]
-        movq        xmm3,           QWORD PTR[rdi + rdx]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-        movq        xmm2,           QWORD PTR[rsi + rax *2]
-        movq        xmm3,           QWORD PTR[rdi + rdx *2]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-
-        lea         rsi,            [rsi + rax * 2]
-        lea         rdi,            [rdi + rdx * 2]
-
-
-        movq        xmm2,           QWORD PTR[rsi + rax]
-        movq        xmm3,           QWORD PTR[rdi + rdx]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-        movq        xmm2,           QWORD PTR[rsi + rax *2]
-        movq        xmm3,           QWORD PTR[rdi + rdx *2]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-
-        lea         rsi,            [rsi + rax * 2]
-        lea         rdi,            [rdi + rdx * 2]
-
-        movq        xmm2,           QWORD PTR[rsi + rax]
-        movq        xmm3,           QWORD PTR[rdi + rdx]
-
-        punpcklbw   xmm2,           xmm0
-        punpcklbw   xmm3,           xmm0
-
-        psubsw      xmm2,           xmm3
-        paddw       xmm7,           xmm2
-
-        pmaddwd     xmm2,           xmm2
-        paddd       xmm1,           xmm2
-
-
-        movdqa      xmm6,           xmm7
-        punpcklwd   xmm6,           xmm0
-
-        punpckhwd   xmm7,           xmm0
-        movdqa      xmm2,           xmm1
-
-        paddw       xmm6,           xmm7
-        punpckldq   xmm1,           xmm0
-
-        punpckhdq   xmm2,           xmm0
-        movdqa      xmm7,           xmm6
-
-        paddd       xmm1,           xmm2
-        punpckldq   xmm6,           xmm0
-
-        punpckhdq   xmm7,           xmm0
-        paddw       xmm6,           xmm7
-
-        movdqa      xmm2,           xmm1
-        movdqa      xmm7,           xmm6
-
-        psrldq      xmm1,           8
-        psrldq      xmm6,           8
-
-        paddw       xmm7,           xmm6
-        paddd       xmm1,           xmm2
-
-        mov         rax,            arg(5) ;[Sum]
-        mov         rdi,            arg(4) ;[SSE]
-
-        movq        rdx,            xmm7
-        movsx       rcx,            dx
-
-        mov  dword ptr [rax],       ecx
-        movd DWORD PTR [rdi],       xmm1
-
-    ; begin epilog
-    add rsp, 16
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
 ;void vp8_filter_block2d_bil_var_sse2
 ;(
 ;    unsigned char *ref_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
index 73eb90df61f..2a0df640a90 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
@@ -8,19 +8,11 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp8_rtcd.h"
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
 #include "vpx_ports/mem.h"
 
-extern unsigned int vp8_get16x16var_sse2
-(
-    const unsigned char *src_ptr,
-    int source_stride,
-    const unsigned char *ref_ptr,
-    int recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
 extern void vp8_half_horiz_vert_variance16x_h_sse2
 (
     const unsigned char *ref_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm
new file mode 100644
index 00000000000..97f25275df2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm
@@ -0,0 +1,353 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define mmx_filter_shift            7
+
+;void vp8_filter_block2d_bil4x4_var_mmx
+;(
+;    unsigned char *ref_ptr,
+;    int ref_pixels_per_line,
+;    unsigned char *src_ptr,
+;    int src_pixels_per_line,
+;    unsigned short *HFilter,
+;    unsigned short *VFilter,
+;    int *sum,
+;    unsigned int *sumsquared
+;)
+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
+sym(vp8_filter_block2d_bil4x4_var_mmx):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 8
+    GET_GOT     rbx
+    push rsi
+    push rdi
+    sub         rsp, 16
+    ; end prolog
+
+
+        pxor            mm6,            mm6                 ;
+        pxor            mm7,            mm7                 ;
+
+        mov             rax,            arg(4) ;HFilter             ;
+        mov             rdx,            arg(5) ;VFilter             ;
+
+        mov             rsi,            arg(0) ;ref_ptr              ;
+        mov             rdi,            arg(2) ;src_ptr              ;
+
+        mov             rcx,            4                   ;
+        pxor            mm0,            mm0                 ;
+
+        movd            mm1,            [rsi]               ;
+        movd            mm3,            [rsi+1]             ;
+
+        punpcklbw       mm1,            mm0                 ;
+        pmullw          mm1,            [rax]               ;
+
+        punpcklbw       mm3,            mm0                 ;
+        pmullw          mm3,            [rax+8]             ;
+
+        paddw           mm1,            mm3                 ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+
+        psraw           mm1,            mmx_filter_shift    ;
+        movq            mm5,            mm1
+
+%if ABI_IS_32BIT
+        add             rsi, dword ptr  arg(1) ;ref_pixels_per_line    ;
+%else
+        movsxd          r8, dword ptr  arg(1) ;ref_pixels_per_line    ;
+        add             rsi, r8
+%endif
+
+.filter_block2d_bil4x4_var_mmx_loop:
+
+        movd            mm1,            [rsi]               ;
+        movd            mm3,            [rsi+1]             ;
+
+        punpcklbw       mm1,            mm0                 ;
+        pmullw          mm1,            [rax]               ;
+
+        punpcklbw       mm3,            mm0                 ;
+        pmullw          mm3,            [rax+8]             ;
+
+        paddw           mm1,            mm3                 ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+
+        psraw           mm1,            mmx_filter_shift    ;
+        movq            mm3,            mm5                 ;
+
+        movq            mm5,            mm1                 ;
+        pmullw          mm3,            [rdx]               ;
+
+        pmullw          mm1,            [rdx+8]             ;
+        paddw           mm1,            mm3                 ;
+
+
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+        psraw           mm1,            mmx_filter_shift    ;
+
+        movd            mm3,            [rdi]               ;
+        punpcklbw       mm3,            mm0                 ;
+
+        psubw           mm1,            mm3                 ;
+        paddw           mm6,            mm1                 ;
+
+        pmaddwd         mm1,            mm1                 ;
+        paddd           mm7,            mm1                 ;
+
+%if ABI_IS_32BIT
+        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
+        add             rdi,            dword ptr arg(3) ;src_pixels_per_line    ;
+%else
+        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line
+        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line
+        add             rsi,            r8
+        add             rdi,            r9
+%endif
+        sub             rcx,            1                   ;
+        jnz             .filter_block2d_bil4x4_var_mmx_loop       ;
+
+
+        pxor            mm3,            mm3                 ;
+        pxor            mm2,            mm2                 ;
+
+        punpcklwd       mm2,            mm6                 ;
+        punpckhwd       mm3,            mm6                 ;
+
+        paddd           mm2,            mm3                 ;
+        movq            mm6,            mm2                 ;
+
+        psrlq           mm6,            32                  ;
+        paddd           mm2,            mm6                 ;
+
+        psrad           mm2,            16                  ;
+        movq            mm4,            mm7                 ;
+
+        psrlq           mm4,            32                  ;
+        paddd           mm4,            mm7                 ;
+
+        mov             rdi,            arg(6) ;sum
+        mov             rsi,            arg(7) ;sumsquared
+
+        movd            dword ptr [rdi],          mm2                 ;
+        movd            dword ptr [rsi],          mm4                 ;
+
+
+
+    ; begin epilog
+    add rsp, 16
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+
+;void vp8_filter_block2d_bil_var_mmx
+;(
+;    unsigned char *ref_ptr,
+;    int ref_pixels_per_line,
+;    unsigned char *src_ptr,
+;    int src_pixels_per_line,
+;    unsigned int Height,
+;    unsigned short *HFilter,
+;    unsigned short *VFilter,
+;    int *sum,
+;    unsigned int *sumsquared
+;)
+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
+sym(vp8_filter_block2d_bil_var_mmx):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 9
+    GET_GOT     rbx
+    push rsi
+    push rdi
+    sub         rsp, 16
+    ; end prolog
+
+        pxor            mm6,            mm6                 ;
+        pxor            mm7,            mm7                 ;
+        mov             rax,            arg(5) ;HFilter             ;
+
+        mov             rdx,            arg(6) ;VFilter             ;
+        mov             rsi,            arg(0) ;ref_ptr              ;
+
+        mov             rdi,            arg(2) ;src_ptr              ;
+        movsxd          rcx,            dword ptr arg(4) ;Height              ;
+
+        pxor            mm0,            mm0                 ;
+        movq            mm1,            [rsi]               ;
+
+        movq            mm3,            [rsi+1]             ;
+        movq            mm2,            mm1                 ;
+
+        movq            mm4,            mm3                 ;
+        punpcklbw       mm1,            mm0                 ;
+
+        punpckhbw       mm2,            mm0                 ;
+        pmullw          mm1,            [rax]               ;
+
+        pmullw          mm2,            [rax]               ;
+        punpcklbw       mm3,            mm0                 ;
+
+        punpckhbw       mm4,            mm0                 ;
+        pmullw          mm3,            [rax+8]             ;
+
+        pmullw          mm4,            [rax+8]             ;
+        paddw           mm1,            mm3                 ;
+
+        paddw           mm2,            mm4                 ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+
+        psraw           mm1,            mmx_filter_shift    ;
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
+
+        psraw           mm2,            mmx_filter_shift    ;
+        movq            mm5,            mm1
+
+        packuswb        mm5,            mm2                 ;
+%if ABI_IS_32BIT
+        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line
+%else
+        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line
+        add             rsi,            r8
+%endif
+
+.filter_block2d_bil_var_mmx_loop:
+
+        movq            mm1,            [rsi]               ;
+        movq            mm3,            [rsi+1]             ;
+
+        movq            mm2,            mm1                 ;
+        movq            mm4,            mm3                 ;
+
+        punpcklbw       mm1,            mm0                 ;
+        punpckhbw       mm2,            mm0                 ;
+
+        pmullw          mm1,            [rax]               ;
+        pmullw          mm2,            [rax]               ;
+
+        punpcklbw       mm3,            mm0                 ;
+        punpckhbw       mm4,            mm0                 ;
+
+        pmullw          mm3,            [rax+8]             ;
+        pmullw          mm4,            [rax+8]             ;
+
+        paddw           mm1,            mm3                 ;
+        paddw           mm2,            mm4                 ;
+
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+        psraw           mm1,            mmx_filter_shift    ;
+
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
+        psraw           mm2,            mmx_filter_shift    ;
+
+        movq            mm3,            mm5                 ;
+        movq            mm4,            mm5                 ;
+
+        punpcklbw       mm3,            mm0                 ;
+        punpckhbw       mm4,            mm0                 ;
+
+        movq            mm5,            mm1                 ;
+        packuswb        mm5,            mm2                 ;
+
+        pmullw          mm3,            [rdx]               ;
+        pmullw          mm4,            [rdx]               ;
+
+        pmullw          mm1,            [rdx+8]             ;
+        pmullw          mm2,            [rdx+8]             ;
+
+        paddw           mm1,            mm3                 ;
+        paddw           mm2,            mm4                 ;
+
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
+
+        psraw           mm1,            mmx_filter_shift    ;
+        psraw           mm2,            mmx_filter_shift    ;
+
+        movq            mm3,            [rdi]               ;
+        movq            mm4,            mm3                 ;
+
+        punpcklbw       mm3,            mm0                 ;
+        punpckhbw       mm4,            mm0                 ;
+
+        psubw           mm1,            mm3                 ;
+        psubw           mm2,            mm4                 ;
+
+        paddw           mm6,            mm1                 ;
+        pmaddwd         mm1,            mm1                 ;
+
+        paddw           mm6,            mm2                 ;
+        pmaddwd         mm2,            mm2                 ;
+
+        paddd           mm7,            mm1                 ;
+        paddd           mm7,            mm2                 ;
+
+%if ABI_IS_32BIT
+        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
+        add             rdi,            dword ptr arg(3) ;src_pixels_per_line    ;
+%else
+        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line    ;
+        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line    ;
+        add             rsi,            r8
+        add             rdi,            r9
+%endif
+        sub             rcx,            1                   ;
+        jnz             .filter_block2d_bil_var_mmx_loop       ;
+
+
+        pxor            mm3,            mm3                 ;
+        pxor            mm2,            mm2                 ;
+
+        punpcklwd       mm2,            mm6                 ;
+        punpckhwd       mm3,            mm6                 ;
+
+        paddd           mm2,            mm3                 ;
+        movq            mm6,            mm2                 ;
+
+        psrlq           mm6,            32                  ;
+        paddd           mm2,            mm6                 ;
+
+        psrad           mm2,            16                  ;
+        movq            mm4,            mm7                 ;
+
+        psrlq           mm4,            32                  ;
+        paddd           mm4,            mm7                 ;
+
+        mov             rdi,            arg(7) ;sum
+        mov             rsi,            arg(8) ;sumsquared
+
+        movd            dword ptr [rdi],          mm2                 ;
+        movd            dword ptr [rsi],          mm4                 ;
+
+    ; begin epilog
+    add rsp, 16
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+SECTION_RODATA
+;short mmx_bi_rd[4] = { 64, 64, 64, 64};
+align 16
+mmx_bi_rd:
+    times 4 dw 64
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c
index 02e02420f46..e594b1e65ee 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp8_rtcd.h"
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
 #include "vpx_ports/mem.h"
@@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx
     short *filter
 );
 
-extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
-extern unsigned int vp8_get8x8var_mmx
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
-extern unsigned int vp8_get4x4var_mmx
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
 extern void vp8_filter_block2d_bil4x4_var_mmx
 (
     const unsigned char *ref_ptr,
@@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx
     unsigned int *sumsquared
 );
 
-
-unsigned int vp8_variance4x4_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-    vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 4));
-
-}
-
-unsigned int vp8_variance8x8_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
-    *sse = var;
-
-    return (var - (((unsigned int)avg * avg) >> 6));
-
-}
-
-unsigned int vp8_mse16x16_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, sse2, sse3, var;
-    int sum0, sum1, sum2, sum3;
-
-
-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
-    var = sse0 + sse1 + sse2 + sse3;
-    *sse = var;
-    return var;
-}
-
-
-unsigned int vp8_variance16x16_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, sse2, sse3, var;
-    int sum0, sum1, sum2, sum3, avg;
-
-
-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
-    var = sse0 + sse1 + sse2 + sse3;
-    avg = sum0 + sum1 + sum2 + sum3;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vp8_variance16x8_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, var;
-    int sum0, sum1, avg;
-
-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-
-    var = sse0 + sse1;
-    avg = sum0 + sum1;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-
-unsigned int vp8_variance8x16_mmx(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, var;
-    int sum0, sum1, avg;
-
-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
-
-    var = sse0 + sse1;
-    avg = sum0 + sum1;
-    *sse = var;
-
-    return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-
 unsigned int vp8_sub_pixel_variance4x4_mmx
 (
     const unsigned char  *src_ptr,
@@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
 
 }
 
-unsigned int vp8_sub_pixel_mse16x16_mmx(
-    const unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    const unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-)
-{
-    vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
-    return *sse;
-}
-
 unsigned int vp8_sub_pixel_variance16x8_mmx
 (
     const unsigned char  *src_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c
index 1fe127bf2c6..1c15ed88097 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp8_rtcd.h"
 #include "vpx_config.h"
 #include "vp8/common/variance.h"
 #include "vpx_ports/mem.h"
@@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
     unsigned int *sumsquared
 );
 
-extern unsigned int vp8_get4x4var_mmx
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
-
-unsigned int vp8_get_mb_ss_sse2
-(
-    const short *src_ptr
-);
-unsigned int vp8_get16x16var_sse2
-(
-    const unsigned char *src_ptr,
-    int source_stride,
-    const unsigned char *ref_ptr,
-    int recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
-unsigned int vp8_get8x8var_sse2
-(
-    const unsigned char *src_ptr,
-    int source_stride,
-    const unsigned char *ref_ptr,
-    int recon_stride,
-    unsigned int *SSE,
-    int *Sum
-);
 void vp8_filter_block2d_bil_var_sse2
 (
     const unsigned char *ref_ptr,
@@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2
     unsigned int *sumsquared
 );
 
-unsigned int vp8_variance4x4_wmt(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-    vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 4));
-
-}
-
-unsigned int vp8_variance8x8_wmt
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int var;
-    int avg;
-
-    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 6));
-
-}
-
-
-unsigned int vp8_variance16x16_wmt
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0;
-    int sum0;
-
-
-    vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    *sse = sse0;
-    return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
-}
-unsigned int vp8_mse16x16_wmt(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-
-    unsigned int sse0;
-    int sum0;
-    vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    *sse = sse0;
-    return sse0;
-
-}
-
-
-unsigned int vp8_variance16x8_wmt
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, var;
-    int sum0, sum1, avg;
-
-    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-
-    var = sse0 + sse1;
-    avg = sum0 + sum1;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-unsigned int vp8_variance8x16_wmt
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride,
-    unsigned int *sse)
-{
-    unsigned int sse0, sse1, var;
-    int sum0, sum1, avg;
-
-    vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
-    vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
-
-    var = sse0 + sse1;
-    avg = sum0 + sum1;
-    *sse = var;
-    return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
 unsigned int vp8_sub_pixel_variance4x4_wmt
 (
     const unsigned char  *src_ptr,
@@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
 }
 
-unsigned int vp8_sub_pixel_mse16x16_wmt(
-    const unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    const unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-)
-{
-    vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
-    return *sse;
-}
-
 unsigned int vp8_sub_pixel_variance16x8_wmt
 (
     const unsigned char  *src_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
index d7b8c76dc26..9015fcbb496 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
@@ -259,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
     return err;
 }
 
-int check_fragments_for_errors(VP8D_COMP *pbi)
+static int check_fragments_for_errors(VP8D_COMP *pbi)
 {
     if (!pbi->ec_active &&
         pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
deleted file mode 100644
index 000805d4fed..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-;
-;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mse16x16_armv6|
-
-    ARM
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-;
-;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
-;      So, we can remove this part of calculation.
-
-|vp8_mse16x16_armv6| PROC
-
-    push    {r4-r9, lr}
-
-    pld     [r0, r1, lsl #0]
-    pld     [r2, r3, lsl #0]
-
-    mov     r12, #16            ; set loop counter to 16 (=block height)
-    mov     r4, #0              ; initialize sse = 0
-
-loop
-    ; 1st 4 pixels
-    ldr     r5, [r0, #0x0]      ; load 4 src pixels
-    ldr     r6, [r2, #0x0]      ; load 4 ref pixels
-
-    mov     lr, #0              ; constant zero
-
-    usub8   r8, r5, r6          ; calculate difference
-    pld     [r0, r1, lsl #1]
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    pld     [r2, r3, lsl #1]
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    ldr     r5, [r0, #0x4]      ; load 4 src pixels
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 2nd 4 pixels
-    ldr     r6, [r2, #0x4]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-    ldr     r5, [r0, #0x8]      ; load 4 src pixels
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 3rd 4 pixels
-    ldr     r6, [r2, #0x8]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    ldr     r5, [r0, #0xc]      ; load 4 src pixels
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 4th 4 pixels
-    ldr     r6, [r2, #0xc]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    add     r0, r0, r1          ; set src_ptr to next row
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    add     r2, r2, r3          ; set dst_ptr to next row
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    subs    r12, r12, #1        ; next row
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    bne     loop
-
-    ; return stuff
-    ldr     r1, [sp, #28]       ; get address of sse
-    mov     r0, r4              ; return sse
-    str     r4, [r1]            ; store sse
-
-    pop     {r4-r9, pc}
-
-    ENDP
-
-    END
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
deleted file mode 100644
index f806809df5b..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-unsigned int vp8_mse16x16_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-    int64x1_t d0s64;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    int32x4_t q7s32, q8s32, q9s32, q10s32;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int64x2_t q1s64;
-
-    q7s32 = vdupq_n_s32(0);
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 8; i++) {  // mse16x16_neon_loop
-        q0u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q1u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q2u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        q3u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-
-        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
-        q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-        q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
-        q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
-        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-    }
-
-    q7s32 = vaddq_s32(q7s32, q8s32);
-    q9s32 = vaddq_s32(q9s32, q10s32);
-    q10s32 = vaddq_s32(q7s32, q9s32);
-
-    q1s64 = vpaddlq_s32(q10s32);
-    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
-    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
-
-unsigned int vp8_get4x4sse_cs_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride) {
-    int16x4_t d22s16, d24s16, d26s16, d28s16;
-    int64x1_t d0s64;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-    int32x4_t q7s32, q8s32, q9s32, q10s32;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int64x2_t q1s64;
-
-    d0u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d4u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d1u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d5u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d2u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d6u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d3u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d7u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-
-    q11u16 = vsubl_u8(d0u8, d4u8);
-    q12u16 = vsubl_u8(d1u8, d5u8);
-    q13u16 = vsubl_u8(d2u8, d6u8);
-    q14u16 = vsubl_u8(d3u8, d7u8);
-
-    d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
-    d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
-    d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
-    d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
-    q7s32 = vmull_s16(d22s16, d22s16);
-    q8s32 = vmull_s16(d24s16, d24s16);
-    q9s32 = vmull_s16(d26s16, d26s16);
-    q10s32 = vmull_s16(d28s16, d28s16);
-
-    q7s32 = vaddq_s32(q7s32, q8s32);
-    q9s32 = vaddq_s32(q9s32, q10s32);
-    q9s32 = vaddq_s32(q7s32, q9s32);
-
-    q1s64 = vpaddlq_s32(q9s32);
-    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
index 091554a5d50..0c7198d5d3a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
@@ -11,6 +11,8 @@
 
 #include <math.h>
 
+#include "./vp8_rtcd.h"
+
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
 {
     int i;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
index 378e902c6a4..d381d8ddf45 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
@@ -11,6 +11,7 @@
 
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "encodemb.h"
 #include "encodemv.h"
 #include "vp8/common/common.h"
@@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
      *  lambda using a non-linear combination (e.g., the smallest, or second
      *  smallest, etc.).
      */
-    act =  vp8_variance16x16(x->src.y_buffer,
+    act =  vpx_variance16x16(x->src.y_buffer,
                     x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
     act = act<<4;
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
index cfa4cb927f6..e2de5eecbc4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
@@ -11,6 +11,7 @@
 
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "quantize.h"
 #include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
         }
     }
 
-    intra_pred_var = vp8_get_mb_ss(x->src_diff);
+    intra_pred_var = vpx_get_mb_ss(x->src_diff);
 
     return intra_pred_var;
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
index 977b0b0321e..4e234ccd58b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
@@ -19,8 +19,6 @@
 
 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
 
-extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
-
 static THREAD_FUNCTION thread_loopfilter(void *p_data)
 {
     VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
index 75c1362610f..3deb4abb337 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "block.h"
 #include "onyx_int.h"
@@ -34,8 +35,6 @@
 /* #define OUTPUT_FPF 1 */
 
 extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
-extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
-extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
 
 #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
 extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
@@ -424,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
     /* Set up pointers for this macro block raw buffer */
     raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
                                 + d->offset);
-    vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
-                   (unsigned int *)(raw_motion_err));
+    vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
+                 (unsigned int *)(raw_motion_err));
 
     /* Set up pointers for this macro block recon buffer */
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
     ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
-    vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
-                   (unsigned int *)(best_motion_err));
+    vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+                 (unsigned int *)(best_motion_err));
 }
 
 static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -455,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
     int new_mv_mode_penalty = 256;
 
     /* override the default variance function to use MSE */
-    v_fn_ptr.vf    = vp8_mse16x16;
+    v_fn_ptr.vf    = vpx_mse16x16;
 
     /* Set up pointers for this macro block recon buffer */
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -1329,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
     return Q;
 }
 
-extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
-
 void vp8_init_second_pass(VP8_COMP *cpi)
 {
     FIRSTPASS_STATS this_frame;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
index c61563c56f7..ad0e9308dc1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
@@ -10,6 +10,7 @@
 
 
 #include "vp8/common/blockd.h"
+#include "modecosts.h"
 #include "onyx_int.h"
 #include "treewriter.h"
 #include "vp8/common/entropymode.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
index 9281551c8d5..9871bfffdf9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
@@ -16,7 +16,9 @@
 extern "C" {
 #endif
 
-void vp8_init_mode_costs(VP8_COMP *x);
+struct VP8_COMP;
+
+void vp8_init_mode_costs(struct VP8_COMP *x);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
index 5b452312ed2..40e29e191af 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
@@ -587,7 +587,8 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
       // Turn-off under certain conditions (i.e., away from key frame, and if
       // we are at good quality (low Q) and most of the blocks were skipped-encoded
       // in previous frame.
-      if (Q >= 100) {
+      int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100;
+      if (Q >= qp_thresh) {
         cpi->cyclic_refresh_mode_max_mbs_perframe =
             (cpi->common.mb_rows * cpi->common.mb_cols) / 10;
       } else if (cpi->frames_since_key > 250 &&
@@ -2011,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->source_alt_ref_active = 0;
     cpi->common.refresh_alt_ref_frame = 0;
 
+    cpi->force_maxqp = 0;
+
     cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
 #if CONFIG_INTERNAL_STATS
     cpi->b_calculate_ssimg = 0;
@@ -2128,7 +2131,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
 #endif
 
     cpi->fn_ptr[BLOCK_16X16].sdf            = vpx_sad16x16;
-    cpi->fn_ptr[BLOCK_16X16].vf             = vp8_variance16x16;
+    cpi->fn_ptr[BLOCK_16X16].vf             = vpx_variance16x16;
     cpi->fn_ptr[BLOCK_16X16].svf            = vp8_sub_pixel_variance16x16;
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = vp8_variance_halfpixvar16x16_h;
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = vp8_variance_halfpixvar16x16_v;
@@ -2138,7 +2141,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X16].sdx4df         = vpx_sad16x16x4d;
 
     cpi->fn_ptr[BLOCK_16X8].sdf            = vpx_sad16x8;
-    cpi->fn_ptr[BLOCK_16X8].vf             = vp8_variance16x8;
+    cpi->fn_ptr[BLOCK_16X8].vf             = vpx_variance16x8;
     cpi->fn_ptr[BLOCK_16X8].svf            = vp8_sub_pixel_variance16x8;
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
@@ -2148,7 +2151,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X8].sdx4df         = vpx_sad16x8x4d;
 
     cpi->fn_ptr[BLOCK_8X16].sdf            = vpx_sad8x16;
-    cpi->fn_ptr[BLOCK_8X16].vf             = vp8_variance8x16;
+    cpi->fn_ptr[BLOCK_8X16].vf             = vpx_variance8x16;
     cpi->fn_ptr[BLOCK_8X16].svf            = vp8_sub_pixel_variance8x16;
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
@@ -2158,7 +2161,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X16].sdx4df         = vpx_sad8x16x4d;
 
     cpi->fn_ptr[BLOCK_8X8].sdf            = vpx_sad8x8;
-    cpi->fn_ptr[BLOCK_8X8].vf             = vp8_variance8x8;
+    cpi->fn_ptr[BLOCK_8X8].vf             = vpx_variance8x8;
     cpi->fn_ptr[BLOCK_8X8].svf            = vp8_sub_pixel_variance8x8;
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
@@ -2168,7 +2171,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X8].sdx4df         = vpx_sad8x8x4d;
 
     cpi->fn_ptr[BLOCK_4X4].sdf            = vpx_sad4x4;
-    cpi->fn_ptr[BLOCK_4X4].vf             = vp8_variance4x4;
+    cpi->fn_ptr[BLOCK_4X4].vf             = vpx_variance4x4;
     cpi->fn_ptr[BLOCK_4X4].svf            = vp8_sub_pixel_variance4x4;
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
@@ -2555,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
         {
             unsigned int sse;
 
-            vp8_mse16x16(orig + col, orig_stride,
+            vpx_mse16x16(orig + col, orig_stride,
                                             recon + col, recon_stride,
                                             &sse);
             total_sse += sse;
@@ -3381,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
                 int index = block_index_row + (j >> 4);
                 if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
                   unsigned int sse;
-                  Total += vp8_mse16x16(src + j,
+                  Total += vpx_mse16x16(src + j,
                                         source->y_stride,
                                         dst + j, dest->y_stride,
                                         &sse);
@@ -3445,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
       int index = block_index_row + (j >> 4);
       if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
         unsigned int sse;
-        const unsigned int var = vp8_variance16x16(src + j,
+        const unsigned int var = vpx_variance16x16(src + j,
                                                    ystride,
                                                    dst + j,
                                                    ystride,
@@ -3455,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
         // is small (to avoid effects from lighting change).
         if ((sse - var) < 128) {
           unsigned int sse2;
-          const unsigned int act = vp8_variance16x16(src + j,
+          const unsigned int act = vpx_variance16x16(src + j,
                                                      ystride,
                                                      const_source,
                                                      0,
@@ -4184,7 +4187,10 @@ static void encode_frame_to_data_rate
     */
     if (cpi->cyclic_refresh_mode_enabled)
     {
-      if (cpi->current_layer==0)
+      // Special case for screen_content_mode with golden frame updates.
+      int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 &&
+                           cm->refresh_golden_frame);
+      if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf)
         cyclic_background_refresh(cpi, Q, 0);
       else
         disable_segmentation(cpi);
@@ -4406,6 +4412,11 @@ static void encode_frame_to_data_rate
         /* transform / motion compensation build reconstruction frame */
         vp8_encode_frame(cpi);
 
+        if (cpi->oxcf.screen_content_mode == 2) {
+          if (vp8_drop_encodedframe_overshoot(cpi, Q))
+            return;
+        }
+
         cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
         cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
 #endif
@@ -5982,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
         for (j = 0; j < source->y_width; j += 16)
         {
             unsigned int sse;
-            Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+            Total += vpx_mse16x16(src + j, source->y_stride,
+                                  dst + j, dest->y_stride, &sse);
         }
 
         src += 16 * source->y_stride;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
index 82d7453902c..c48e2f4478b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
@@ -526,6 +526,8 @@ typedef struct VP8_COMP
     // Measure of average squared difference between source and denoised signal.
     int mse_source_denoised;
 
+    int force_maxqp;
+
 #if CONFIG_MULTITHREAD
     /* multithread data */
     int * mt_current_mb_col;
@@ -714,6 +716,11 @@ typedef struct VP8_COMP
     } rd_costs;
 } VP8_COMP;
 
+void vp8_alloc_compressor_data(VP8_COMP *cpi);
+int vp8_reverse_trans(int x);
+void vp8_new_framerate(VP8_COMP *cpi, double framerate);
+void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
+
 void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
                         unsigned char *dest_end, unsigned long *size);
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
index c4c0e7e9e23..053bf119aa9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
@@ -11,6 +11,7 @@
 
 #include <limits.h>
 #include "vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "onyx_int.h"
 #include "modecosts.h"
 #include "encodeintra.h"
@@ -29,8 +30,6 @@
 #include "denoising.h"
 #endif
 
-extern int VP8_UVSSE(MACROBLOCK *x);
-
 #ifdef SPEEDSTATS
 extern unsigned int cnt_pm;
 #endif
@@ -38,8 +37,6 @@ extern unsigned int cnt_pm;
 extern const int vp8_ref_frame_order[MAX_MODES];
 extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
 
-extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
-
 // Fixed point implementation of a skin color classifier. Skin color
 // is model by a Gaussian distribution in the CbCr color space.
 // See ../../test/skin_color_detector_test.cc where the reference
@@ -219,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
 
 }
 
-
-unsigned int vp8_get4x4sse_cs_c
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride
-)
-{
-    int distortion = 0;
-    int r, c;
-
-    for (r = 0; r < 4; r++)
-    {
-        for (c = 0; c < 4; c++)
-        {
-            int diff = src_ptr[c] - ref_ptr[c];
-            distortion += diff * diff;
-        }
-
-        src_ptr += source_stride;
-        ref_ptr += recon_stride;
-    }
-
-    return distortion;
-}
-
 static int get_prediction_error(BLOCK *be, BLOCKD *b)
 {
     unsigned char *sptr;
@@ -253,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
     sptr = (*(be->base_src) + be->src);
     dptr = b->predictor;
 
-    return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
+    return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
 
 }
 
@@ -1041,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
             else
             {
                 rate2 += rate;
-                distortion2 = vp8_variance16x16(
+                distortion2 = vpx_variance16x16(
                                     *(b->base_src), b->src_stride,
                                     x->e_mbd.predictor, 16, &sse);
                 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -1070,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                                              xd->dst.y_stride,
                                              xd->predictor,
                                              16);
-            distortion2 = vp8_variance16x16
+            distortion2 = vpx_variance16x16
                                           (*(b->base_src), b->src_stride,
                                           x->e_mbd.predictor, 16, &sse);
             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -1551,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
                                          xd->dst.y_stride,
                                          xd->predictor,
                                          16);
-        distortion = vp8_variance16x16
+        distortion = vpx_variance16x16
             (*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
         rate = x->mbmode_cost[xd->frame_type][mode];
         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
index 890053dcfdc..875b37f6841 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
@@ -9,6 +9,7 @@
  */
 
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
@@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
         for (j = 0; j < source->y_width; j += 16)
         {
             unsigned int sse;
-            Total += vp8_mse16x16(src + j, source->y_stride,
+            Total += vpx_mse16x16(src + j, source->y_stride,
                                                      dst + j, dest->y_stride,
                                                      &sse);
         }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
index 25d7a4998cb..e8796a1fcfb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
@@ -1215,6 +1215,11 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
 {
     int Q = cpi->active_worst_quality;
 
+    if (cpi->force_maxqp == 1) {
+      cpi->active_worst_quality = cpi->worst_quality;
+      return cpi->worst_quality;
+    }
+
     /* Reset Zbin OQ value */
     cpi->mb.zbin_over_quant = 0;
 
@@ -1559,3 +1564,46 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
     }
     return 1;
 }
+// If this just encoded frame (mcomp/transform/quant, but before loopfilter and
+// pack_bitstream) has large overshoot, and was not being encoded close to the
+// max QP, then drop this frame and force next frame to be encoded at max QP.
+// Condition this on 1 pass CBR with screen content mode and frame dropper off.
+// TODO(marpan): Should do this exit condition during the encode_frame
+// (i.e., halfway during the encoding of the frame) to save cycles.
+int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
+  if (cpi->pass == 0 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
+      cpi->drop_frames_allowed == 0 &&
+      cpi->common.frame_type != KEY_FRAME) {
+    // Note: the "projected_frame_size" from encode_frame() only gives estimate
+    // of mode/motion vector rate (in non-rd mode): so below we only require
+    // that projected_frame_size is somewhat greater than per-frame-bandwidth,
+    // but add additional condition with high threshold on prediction residual.
+
+    // QP threshold: only allow dropping if we are not close to qp_max.
+    int thresh_qp = 3 * cpi->worst_quality >> 2;
+    // Rate threshold, in bytes.
+    int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3);
+    // Threshold for the average (over all macroblocks) of the pixel-sum
+    // residual error over 16x16 block. Should add QP dependence on threshold?
+    int thresh_pred_err_mb = (256 << 4);
+    int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs);
+    if (Q < thresh_qp &&
+        cpi->projected_frame_size > thresh_rate &&
+        pred_err_mb > thresh_pred_err_mb) {
+      // Drop this frame: advance frame counters, and set force_maxqp flag.
+      cpi->common.current_video_frame++;
+      cpi->frames_since_key++;
+      // Flag to indicate we will force next frame to be encoded at max QP.
+      cpi->force_maxqp = 1;
+      return 1;
+    } else {
+      cpi->force_maxqp = 0;
+      return 0;
+    }
+    cpi->force_maxqp = 0;
+    return 0;
+  }
+  cpi->force_maxqp = 0;
+  return 0;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
index 829697f391f..703de9ff550 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
@@ -30,6 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
 /* return of 0 means drop frame */
 extern int vp8_pick_frame_size(VP8_COMP *cpi);
 
+extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
index 9ccd85eb93f..17194f0d449 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
@@ -15,6 +15,7 @@
 #include <assert.h>
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "tokenize.h"
 #include "treewriter.h"
 #include "onyx_int.h"
@@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x)
     }
     else
     {
-        vp8_variance8x8(uptr, pre_stride,
+        vpx_variance8x8(uptr, pre_stride,
             upred_ptr, uv_stride, &sse2);
-        vp8_variance8x8(vptr, pre_stride,
+        vpx_variance8x8(vptr, pre_stride,
             vpred_ptr, uv_stride, &sse1);
         sse2 += sse1;
     }
@@ -1783,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
         if(threshold < x->encode_breakout)
             threshold = x->encode_breakout;
 
-        var = vp8_variance16x16
+        var = vpx_variance16x16
                 (*(b->base_src), b->src_stride,
                 x->e_mbd.predictor, 16, &sse);
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
index e0da35e203c..b4fcd10b61e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
@@ -136,6 +136,9 @@ extern void vp8_mv_pred
     int near_sadidx[]
 );
 void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
+int VP8_UVSSE(MACROBLOCK *x);
+int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
+void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
index 448217ff412..14282db8016 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
@@ -17,7 +17,7 @@
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse)
 static int bsr(int mask) {
-  int eob;
+  unsigned long eob;
   _BitScanReverse(&eob, mask);
   eob++;
   if (mask == 0)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
index b4c814075c7..236d8a5d803 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
@@ -86,8 +86,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_mmx.c
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_impl_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp8_variance_mmx.c
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp8_variance_impl_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
@@ -96,7 +96,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
-VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_sse2.c
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp8_variance_sse2.c
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_impl_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/copy_sse3.asm
 VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
@@ -145,8 +145,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/intra4x4_predict_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequant_idct_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/dequantize_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/idct_blk_v6.c
-VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance8x8_armv6$(ASM)
-VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance16x16_armv6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
@@ -168,7 +166,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/mbloopfilter_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/reconintra_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/shortidct4x4llm_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/variance_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance_neon.c
 
 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
index af9cc7320b9..8697377892e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
@@ -135,7 +135,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     RANGE_CHECK(cfg, g_w,                   1, 16383); /* 14 bits available */
     RANGE_CHECK(cfg, g_h,                   1, 16383); /* 14 bits available */
     RANGE_CHECK(cfg, g_timebase.den,        1, 1000000000);
-    RANGE_CHECK(cfg, g_timebase.num,        1, cfg->g_timebase.den);
+    RANGE_CHECK(cfg, g_timebase.num,        1, 1000000000);
     RANGE_CHECK_HI(cfg, g_profile,          3);
     RANGE_CHECK_HI(cfg, rc_max_quantizer,   63);
     RANGE_CHECK_HI(cfg, rc_min_quantizer,   cfg->rc_max_quantizer);
@@ -199,7 +199,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
     RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
     RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
-    RANGE_CHECK_BOOL(vp8_cfg, screen_content_mode);
+    RANGE_CHECK_HI(vp8_cfg, screen_content_mode, 2);
     if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
         RANGE_CHECK(vp8_cfg, cq_level,
                     cfg->rc_min_quantizer, cfg->rc_max_quantizer);
@@ -478,8 +478,6 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t       *ctx,
     return res;
 }
 
-int vp8_reverse_trans(int);
-
 static vpx_codec_err_t get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args)
 {
   int *const arg = va_arg(args, int *);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk b/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
index 05003017982..0b0f6a70a12 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
@@ -18,7 +18,6 @@ VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/dct_arm.c
 #File list for media
 # encoder
 VP8_CX_SRCS-$(HAVE_MEDIA)  += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM)
-VP8_CX_SRCS-$(HAVE_MEDIA)  += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_MEDIA)  += encoder/arm/armv6/walsh_v6$(ASM)
 
 #File list for neon
@@ -27,5 +26,4 @@ VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/denoising_neon.c
 VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/fastquantizeb_neon.c
 VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/shortfdct_neon.c
 VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/subtract_neon.c
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/vp8_mse16x16_neon.c
 VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/vp8_shortwalsh4x4_neon.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
index 3c8c6a9348d..0233877dd38 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
@@ -9,6 +9,8 @@
  */
 
 #include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_idct.h"
 
 void vp9_idct16x16_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
index d0e4b4f4014..0ce45f2bfa8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
@@ -9,10 +9,12 @@
  */
 
 #include <arm_neon.h>
-#include "vp9/common/vp9_idct.h"
 
 #include "./vpx_config.h"
 
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+
 static INLINE void LD_16x8(
         uint8_t *d,
         int d_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
index 7c8a930b645..f0457358e6c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
@@ -9,6 +9,8 @@
  */
 
 #include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_idct.h"
 
 void vp9_idct4x4_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
index 24c29fb77f6..5369697c7d1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
@@ -9,6 +9,8 @@
  */
 
 #include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_idct.h"
 
 void vp9_idct8x8_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
index d0beaa7208f..92706bf2c69 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -8,466 +8,815 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include <stddef.h>
 #include <arm_neon.h>
 
-void vp9_v_predictor_4x4_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+//------------------------------------------------------------------------------
+// DC 4x4
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride,
+                          const uint8_t *above, const uint8_t *left,
+                          int do_above, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_above) {
+    const uint8x8_t A = vld1_u8(above);  // top row
+    const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    sum_top = vcombine_u16(p1, p1);
+  }
+
+  if (do_left) {
+    const uint8x8_t L = vld1_u8(left);  // left border
+    const uint16x4_t p0 = vpaddl_u8(L);  // cascading summation of the left
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    sum_left = vcombine_u16(p1, p1);
+  }
+
+  if (do_above && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 3);
+  } else if (do_above) {
+    dc0 = vrshrn_n_u16(sum_top, 2);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 2);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x8_t dc = vdup_lane_u8(dc0, 0);
     int i;
-    uint32x2_t d0u32 = vdup_n_u32(0);
-    (void)left;
+    for (i = 0; i < 4; ++i) {
+      vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
+    }
+  }
+}
+
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  dc_4x4(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  dc_4x4(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  dc_4x4(dst, stride, above, NULL, 1, 0);
+}
 
-    d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
-    for (i = 0; i < 4; i++, dst += y_stride)
-        vst1_lane_u32((uint32_t *)dst, d0u32, 0);
-    return;
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  dc_4x4(dst, stride, NULL, NULL, 0, 0);
 }
 
-void vp9_v_predictor_8x8_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
+//------------------------------------------------------------------------------
+// DC 8x8
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride,
+                          const uint8_t *above, const uint8_t *left,
+                          int do_above, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_above) {
+    const uint8x8_t A = vld1_u8(above);  // top row
+    const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    sum_top = vcombine_u16(p2, p2);
+  }
+
+  if (do_left) {
+    const uint8x8_t L = vld1_u8(left);  // left border
+    const uint16x4_t p0 = vpaddl_u8(L);  // cascading summation of the left
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    sum_left = vcombine_u16(p2, p2);
+  }
+
+  if (do_above && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 4);
+  } else if (do_above) {
+    dc0 = vrshrn_n_u16(sum_top, 3);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 3);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x8_t dc = vdup_lane_u8(dc0, 0);
     int i;
-    uint8x8_t d0u8 = vdup_n_u8(0);
-    (void)left;
+    for (i = 0; i < 8; ++i) {
+      vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc));
+    }
+  }
+}
 
-    d0u8 = vld1_u8(above);
-    for (i = 0; i < 8; i++, dst += y_stride)
-        vst1_u8(dst, d0u8);
-    return;
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  dc_8x8(dst, stride, above, left, 1, 1);
 }
 
-void vp9_v_predictor_16x16_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  dc_8x8(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  dc_8x8(dst, stride, above, NULL, 1, 0);
+}
+
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  dc_8x8(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+// DC 16x16
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride,
+                            const uint8_t *above, const uint8_t *left,
+                            int do_above, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_above) {
+    const uint8x16_t A = vld1q_u8(above);  // top row
+    const uint16x8_t p0 = vpaddlq_u8(A);  // cascading summation of the top
+    const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    const uint16x4_t p3 = vpadd_u16(p2, p2);
+    sum_top = vcombine_u16(p3, p3);
+  }
+
+  if (do_left) {
+    const uint8x16_t L = vld1q_u8(left);  // left row
+    const uint16x8_t p0 = vpaddlq_u8(L);  // cascading summation of the left
+    const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    const uint16x4_t p3 = vpadd_u16(p2, p2);
+    sum_left = vcombine_u16(p3, p3);
+  }
+
+  if (do_above && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 5);
+  } else if (do_above) {
+    dc0 = vrshrn_n_u16(sum_top, 4);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 4);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
     int i;
-    uint8x16_t q0u8 = vdupq_n_u8(0);
-    (void)left;
+    for (i = 0; i < 16; ++i) {
+      vst1q_u8(dst + i * stride, dc);
+    }
+  }
+}
+
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  dc_16x16(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  (void)above;
+  dc_16x16(dst, stride, NULL, left, 0, 1);
+}
 
-    q0u8 = vld1q_u8(above);
-    for (i = 0; i < 16; i++, dst += y_stride)
-        vst1q_u8(dst, q0u8);
-    return;
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)left;
+  dc_16x16(dst, stride, above, NULL, 1, 0);
 }
 
-void vp9_v_predictor_32x32_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  dc_16x16(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+// DC 32x32
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
+                            const uint8_t *above, const uint8_t *left,
+                            int do_above, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_above) {
+    const uint8x16_t A0 = vld1q_u8(above);  // top row
+    const uint8x16_t A1 = vld1q_u8(above + 16);
+    const uint16x8_t p0 = vpaddlq_u8(A0);  // cascading summation of the top
+    const uint16x8_t p1 = vpaddlq_u8(A1);
+    const uint16x8_t p2 = vaddq_u16(p0, p1);
+    const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+    const uint16x4_t p4 = vpadd_u16(p3, p3);
+    const uint16x4_t p5 = vpadd_u16(p4, p4);
+    sum_top = vcombine_u16(p5, p5);
+  }
+
+  if (do_left) {
+    const uint8x16_t L0 = vld1q_u8(left);  // left row
+    const uint8x16_t L1 = vld1q_u8(left + 16);
+    const uint16x8_t p0 = vpaddlq_u8(L0);  // cascading summation of the left
+    const uint16x8_t p1 = vpaddlq_u8(L1);
+    const uint16x8_t p2 = vaddq_u16(p0, p1);
+    const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+    const uint16x4_t p4 = vpadd_u16(p3, p3);
+    const uint16x4_t p5 = vpadd_u16(p4, p4);
+    sum_left = vcombine_u16(p5, p5);
+  }
+
+  if (do_above && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 6);
+  } else if (do_above) {
+    dc0 = vrshrn_n_u16(sum_top, 5);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 5);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
     int i;
-    uint8x16_t q0u8 = vdupq_n_u8(0);
-    uint8x16_t q1u8 = vdupq_n_u8(0);
-    (void)left;
-
-    q0u8 = vld1q_u8(above);
-    q1u8 = vld1q_u8(above + 16);
-    for (i = 0; i < 32; i++, dst += y_stride) {
-        vst1q_u8(dst, q0u8);
-        vst1q_u8(dst + 16, q1u8);
+    for (i = 0; i < 32; ++i) {
+      vst1q_u8(dst + i * stride, dc);
+      vst1q_u8(dst + i * stride + 16, dc);
     }
-    return;
+  }
 }
 
-void vp9_h_predictor_4x4_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    uint8x8_t d0u8 = vdup_n_u8(0);
-    uint32x2_t d1u32 = vdup_n_u32(0);
-    (void)above;
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  dc_32x32(dst, stride, above, left, 1, 1);
+}
 
-    d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  (void)above;
+  dc_32x32(dst, stride, NULL, left, 0, 1);
+}
 
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
-    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
-    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
-    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
-    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-    return;
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)left;
+  dc_32x32(dst, stride, above, NULL, 1, 0);
 }
 
-void vp9_h_predictor_8x8_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    uint8x8_t d0u8 = vdup_n_u8(0);
-    uint64x1_t d1u64 = vdup_n_u64(0);
-    (void)above;
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  dc_32x32(dst, stride, NULL, NULL, 0, 0);
+}
 
-    d1u64 = vld1_u64((const uint64_t *)left);
+// -----------------------------------------------------------------------------
 
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
-    vst1_u8(dst, d0u8);
-    dst += y_stride;
-    d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above));  // top row
+  const uint64x1_t A1 = vshr_n_u64(A0, 8);
+  const uint64x1_t A2 = vshr_n_u64(A0, 16);
+  const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+  const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
+  const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
+  const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00);
+  const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
+  const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+  const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
+  const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+  const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+  const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+  (void)left;
+  vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+  vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+  vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+  vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+  dst[3 * stride + 3] = above[7];
+}
+
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 };
+  static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 };
+  const uint8x8_t sh_12345677 = vld1_u8(shuffle1);
+  const uint8x8_t sh_23456777 = vld1_u8(shuffle2);
+  const uint8x8_t A0 = vld1_u8(above);  // top row
+  const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677);
+  const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777);
+  const uint8x8_t avg1 = vhadd_u8(A0, A2);
+  uint8x8_t row = vrhadd_u8(avg1, A1);
+  int i;
+  (void)left;
+  for (i = 0; i < 7; ++i) {
+    vst1_u8(dst + i * stride, row);
+    row = vtbl1_u8(row, sh_12345677);
+  }
+  vst1_u8(dst + i * stride, row);
+}
+
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  const uint8x16_t A0 = vld1q_u8(above);  // top row
+  const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+  const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+  const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+  const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+  uint8x16_t row = vrhaddq_u8(avg1, A1);
+  int i;
+  (void)left;
+  for (i = 0; i < 15; ++i) {
+    vst1q_u8(dst + i * stride, row);
+    row = vextq_u8(row, above_right, 1);
+  }
+  vst1q_u8(dst + i * stride, row);
+}
+
+// -----------------------------------------------------------------------------
+
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
+  const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
+  const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+  const uint32x2_t zero = vdup_n_u32(0);
+  const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
+  const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
+  const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
+  const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
+  const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
+  const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
+  const uint8_t D = vget_lane_u8(XABCD_u8, 4);
+  const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
+  const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
+  const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
+  const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+  const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+  const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
+  const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+  const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+  const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+  vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+  vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+  vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+  vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+}
+
+#if !HAVE_NEON_ASM
+
+void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  int i;
+  uint32x2_t d0u32 = vdup_n_u32(0);
+  (void)left;
+
+  d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
+  for (i = 0; i < 4; i++, dst += stride)
+    vst1_lane_u32((uint32_t *)dst, d0u32, 0);
+}
+
+void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  int i;
+  uint8x8_t d0u8 = vdup_n_u8(0);
+  (void)left;
+
+  d0u8 = vld1_u8(above);
+  for (i = 0; i < 8; i++, dst += stride)
     vst1_u8(dst, d0u8);
-    return;
 }
 
-void vp9_h_predictor_16x16_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int j;
-    uint8x8_t d2u8 = vdup_n_u8(0);
-    uint8x16_t q0u8 = vdupq_n_u8(0);
-    uint8x16_t q1u8 = vdupq_n_u8(0);
-    (void)above;
+void vp9_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  int i;
+  uint8x16_t q0u8 = vdupq_n_u8(0);
+  (void)left;
+
+  q0u8 = vld1q_u8(above);
+  for (i = 0; i < 16; i++, dst += stride)
+    vst1q_u8(dst, q0u8);
+}
+
+void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  int i;
+  uint8x16_t q0u8 = vdupq_n_u8(0);
+  uint8x16_t q1u8 = vdupq_n_u8(0);
+  (void)left;
+
+  q0u8 = vld1q_u8(above);
+  q1u8 = vld1q_u8(above + 16);
+  for (i = 0; i < 32; i++, dst += stride) {
+    vst1q_u8(dst, q0u8);
+    vst1q_u8(dst + 16, q1u8);
+  }
+}
+
+void vp9_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  uint8x8_t d0u8 = vdup_n_u8(0);
+  uint32x2_t d1u32 = vdup_n_u32(0);
+  (void)above;
+
+  d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
+
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
+  vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
+  vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
+  vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
+  vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+}
+
+void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  uint8x8_t d0u8 = vdup_n_u8(0);
+  uint64x1_t d1u64 = vdup_n_u64(0);
+  (void)above;
+
+  d1u64 = vld1_u64((const uint64_t *)left);
+
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
+  vst1_u8(dst, d0u8);
+  dst += stride;
+  d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
+  vst1_u8(dst, d0u8);
+}
+
+void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  int j;
+  uint8x8_t d2u8 = vdup_n_u8(0);
+  uint8x16_t q0u8 = vdupq_n_u8(0);
+  uint8x16_t q1u8 = vdupq_n_u8(0);
+  (void)above;
+
+  q1u8 = vld1q_u8(left);
+  d2u8 = vget_low_u8(q1u8);
+  for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
+    q0u8 = vdupq_lane_u8(d2u8, 0);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 1);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 2);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 3);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 4);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 5);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 6);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+    q0u8 = vdupq_lane_u8(d2u8, 7);
+    vst1q_u8(dst, q0u8);
+    dst += stride;
+  }
+}
+
+void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  int j, k;
+  uint8x8_t d2u8 = vdup_n_u8(0);
+  uint8x16_t q0u8 = vdupq_n_u8(0);
+  uint8x16_t q1u8 = vdupq_n_u8(0);
+  (void)above;
 
+  for (k = 0; k < 2; k++, left += 16) {
     q1u8 = vld1q_u8(left);
     d2u8 = vget_low_u8(q1u8);
     for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
-        q0u8 = vdupq_lane_u8(d2u8, 0);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 1);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 2);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 3);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 4);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 5);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 6);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-        q0u8 = vdupq_lane_u8(d2u8, 7);
-        vst1q_u8(dst, q0u8);
-        dst += y_stride;
-    }
-    return;
-}
-
-void vp9_h_predictor_32x32_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int j, k;
-    uint8x8_t d2u8 = vdup_n_u8(0);
-    uint8x16_t q0u8 = vdupq_n_u8(0);
-    uint8x16_t q1u8 = vdupq_n_u8(0);
-    (void)above;
-
-    for (k = 0; k < 2; k++, left += 16) {
-        q1u8 = vld1q_u8(left);
-        d2u8 = vget_low_u8(q1u8);
-        for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
-            q0u8 = vdupq_lane_u8(d2u8, 0);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 1);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 2);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 3);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 4);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 5);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 6);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-            q0u8 = vdupq_lane_u8(d2u8, 7);
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q0u8);
-            dst += y_stride;
-        }
+      q0u8 = vdupq_lane_u8(d2u8, 0);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 1);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 2);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 3);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 4);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 5);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 6);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
+      q0u8 = vdupq_lane_u8(d2u8, 7);
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q0u8);
+      dst += stride;
     }
-    return;
+  }
 }
 
-void vp9_tm_predictor_4x4_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int i;
-    uint16x8_t q1u16, q3u16;
-    int16x8_t q1s16;
-    uint8x8_t d0u8 = vdup_n_u8(0);
-    uint32x2_t d2u32 = vdup_n_u32(0);
-
-    d0u8 = vdup_n_u8(above[-1]);
-    d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
-    q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
-    for (i = 0; i < 4; i++, dst += y_stride) {
-        q1u16 = vdupq_n_u16((uint16_t)left[i]);
-        q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16),
-                          vreinterpretq_s16_u16(q3u16));
-        d0u8 = vqmovun_s16(q1s16);
-        vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-    }
-    return;
-}
-
-void vp9_tm_predictor_8x8_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int j;
-    uint16x8_t q0u16, q3u16, q10u16;
-    int16x8_t q0s16;
-    uint16x4_t d20u16;
-    uint8x8_t d0u8, d2u8, d30u8;
-
-    d0u8 = vdup_n_u8(above[-1]);
-    d30u8 = vld1_u8(left);
-    d2u8 = vld1_u8(above);
-    q10u16 = vmovl_u8(d30u8);
-    q3u16 = vsubl_u8(d2u8, d0u8);
+void vp9_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  int i;
+  uint16x8_t q1u16, q3u16;
+  int16x8_t q1s16;
+  uint8x8_t d0u8 = vdup_n_u8(0);
+  uint32x2_t d2u32 = vdup_n_u32(0);
+
+  d0u8 = vld1_dup_u8(above - 1);
+  d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
+  q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
+  for (i = 0; i < 4; i++, dst += stride) {
+    q1u16 = vdupq_n_u16((uint16_t)left[i]);
+    q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16),
+                      vreinterpretq_s16_u16(q3u16));
+    d0u8 = vqmovun_s16(q1s16);
+    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+  }
+}
+
+void vp9_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  int j;
+  uint16x8_t q0u16, q3u16, q10u16;
+  int16x8_t q0s16;
+  uint16x4_t d20u16;
+  uint8x8_t d0u8, d2u8, d30u8;
+
+  d0u8 = vld1_dup_u8(above - 1);
+  d30u8 = vld1_u8(left);
+  d2u8 = vld1_u8(above);
+  q10u16 = vmovl_u8(d30u8);
+  q3u16 = vsubl_u8(d2u8, d0u8);
+  d20u16 = vget_low_u16(q10u16);
+  for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
+    q0u16 = vdupq_lane_u16(d20u16, 0);
+    q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+                      vreinterpretq_s16_u16(q0u16));
+    d0u8 = vqmovun_s16(q0s16);
+    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+    dst += stride;
+    q0u16 = vdupq_lane_u16(d20u16, 1);
+    q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+                      vreinterpretq_s16_u16(q0u16));
+    d0u8 = vqmovun_s16(q0s16);
+    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+    dst += stride;
+    q0u16 = vdupq_lane_u16(d20u16, 2);
+    q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+                      vreinterpretq_s16_u16(q0u16));
+    d0u8 = vqmovun_s16(q0s16);
+    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+    dst += stride;
+    q0u16 = vdupq_lane_u16(d20u16, 3);
+    q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+                      vreinterpretq_s16_u16(q0u16));
+    d0u8 = vqmovun_s16(q0s16);
+    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+    dst += stride;
+  }
+}
+
+void vp9_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  int j, k;
+  uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
+  uint8x16_t q0u8, q1u8;
+  int16x8_t q0s16, q1s16, q8s16, q11s16;
+  uint16x4_t d20u16;
+  uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
+
+  q0u8 = vld1q_dup_u8(above - 1);
+  q1u8 = vld1q_u8(above);
+  q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
+  q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
+  for (k = 0; k < 2; k++, left += 8) {
+    d18u8 = vld1_u8(left);
+    q10u16 = vmovl_u8(d18u8);
     d20u16 = vget_low_u16(q10u16);
     for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
-        q0u16 = vdupq_lane_u16(d20u16, 0);
-        q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
-                          vreinterpretq_s16_u16(q0u16));
-        d0u8 = vqmovun_s16(q0s16);
-        vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-        dst += y_stride;
-        q0u16 = vdupq_lane_u16(d20u16, 1);
-        q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
-                          vreinterpretq_s16_u16(q0u16));
-        d0u8 = vqmovun_s16(q0s16);
-        vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-        dst += y_stride;
-        q0u16 = vdupq_lane_u16(d20u16, 2);
-        q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
-                          vreinterpretq_s16_u16(q0u16));
-        d0u8 = vqmovun_s16(q0s16);
-        vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-        dst += y_stride;
-        q0u16 = vdupq_lane_u16(d20u16, 3);
-        q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
-                          vreinterpretq_s16_u16(q0u16));
-        d0u8 = vqmovun_s16(q0s16);
-        vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-        dst += y_stride;
-    }
-    return;
-}
-
-void vp9_tm_predictor_16x16_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int j, k;
-    uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
-    uint8x16_t q0u8, q1u8;
-    int16x8_t q0s16, q1s16, q8s16, q11s16;
-    uint16x4_t d20u16;
-    uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
-
-    q0u8 = vdupq_n_u8(above[-1]);
-    q1u8 = vld1q_u8(above);
-    q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
-    q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
-    for (k = 0; k < 2; k++, left += 8) {
-        d18u8 = vld1_u8(left);
-        q10u16 = vmovl_u8(d18u8);
-        d20u16 = vget_low_u16(q10u16);
-        for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
-            q0u16 = vdupq_lane_u16(d20u16, 0);
-            q8u16 = vdupq_lane_u16(d20u16, 1);
-            q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                              vreinterpretq_s16_u16(q2u16));
-            q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                              vreinterpretq_s16_u16(q3u16));
-            q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
-                              vreinterpretq_s16_u16(q2u16));
-            q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
-                              vreinterpretq_s16_u16(q3u16));
-            d2u8 = vqmovun_s16(q1s16);
-            d3u8 = vqmovun_s16(q0s16);
-            d22u8 = vqmovun_s16(q11s16);
-            d23u8 = vqmovun_s16(q8s16);
-            vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
-            vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
-            dst += y_stride;
-            vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
-            vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
-            dst += y_stride;
-
-            q0u16 = vdupq_lane_u16(d20u16, 2);
-            q8u16 = vdupq_lane_u16(d20u16, 3);
-            q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                              vreinterpretq_s16_u16(q2u16));
-            q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                              vreinterpretq_s16_u16(q3u16));
-            q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
-                              vreinterpretq_s16_u16(q2u16));
-            q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
-                              vreinterpretq_s16_u16(q3u16));
-            d2u8 = vqmovun_s16(q1s16);
-            d3u8 = vqmovun_s16(q0s16);
-            d22u8 = vqmovun_s16(q11s16);
-            d23u8 = vqmovun_s16(q8s16);
-            vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
-            vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
-            dst += y_stride;
-            vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
-            vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
-            dst += y_stride;
-        }
+      q0u16 = vdupq_lane_u16(d20u16, 0);
+      q8u16 = vdupq_lane_u16(d20u16, 1);
+      q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                        vreinterpretq_s16_u16(q2u16));
+      q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                        vreinterpretq_s16_u16(q3u16));
+      q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+                         vreinterpretq_s16_u16(q2u16));
+      q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+                        vreinterpretq_s16_u16(q3u16));
+      d2u8 = vqmovun_s16(q1s16);
+      d3u8 = vqmovun_s16(q0s16);
+      d22u8 = vqmovun_s16(q11s16);
+      d23u8 = vqmovun_s16(q8s16);
+      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
+      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
+      dst += stride;
+      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
+      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
+      dst += stride;
+
+      q0u16 = vdupq_lane_u16(d20u16, 2);
+      q8u16 = vdupq_lane_u16(d20u16, 3);
+      q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                        vreinterpretq_s16_u16(q2u16));
+      q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                        vreinterpretq_s16_u16(q3u16));
+      q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+                         vreinterpretq_s16_u16(q2u16));
+      q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+                        vreinterpretq_s16_u16(q3u16));
+      d2u8 = vqmovun_s16(q1s16);
+      d3u8 = vqmovun_s16(q0s16);
+      d22u8 = vqmovun_s16(q11s16);
+      d23u8 = vqmovun_s16(q8s16);
+      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
+      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
+      dst += stride;
+      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
+      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
+      dst += stride;
     }
-    return;
-}
-
-void vp9_tm_predictor_32x32_neon(
-        uint8_t *dst,
-        ptrdiff_t y_stride,
-        const uint8_t *above,
-        const uint8_t *left) {
-    int j, k;
-    uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
-    uint8x16_t q0u8, q1u8, q2u8;
-    int16x8_t q12s16, q13s16, q14s16, q15s16;
-    uint16x4_t d6u16;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
-
-    q0u8 = vdupq_n_u8(above[-1]);
-    q1u8 = vld1q_u8(above);
-    q2u8 = vld1q_u8(above + 16);
-    q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
-    q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
-    q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
-    q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
-    for (k = 0; k < 4; k++, left += 8) {
-        d26u8 = vld1_u8(left);
-        q3u16 = vmovl_u8(d26u8);
-        d6u16 = vget_low_u16(q3u16);
-        for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
-            q0u16 = vdupq_lane_u16(d6u16, 0);
-            q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q8u16));
-            q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q9u16));
-            q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q10u16));
-            q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q11u16));
-            d0u8 = vqmovun_s16(q12s16);
-            d1u8 = vqmovun_s16(q13s16);
-            d2u8 = vqmovun_s16(q14s16);
-            d3u8 = vqmovun_s16(q15s16);
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-            vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-            dst += y_stride;
-
-            q0u16 = vdupq_lane_u16(d6u16, 1);
-            q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q8u16));
-            q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q9u16));
-            q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q10u16));
-            q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q11u16));
-            d0u8 = vqmovun_s16(q12s16);
-            d1u8 = vqmovun_s16(q13s16);
-            d2u8 = vqmovun_s16(q14s16);
-            d3u8 = vqmovun_s16(q15s16);
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-            vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-            dst += y_stride;
-
-            q0u16 = vdupq_lane_u16(d6u16, 2);
-            q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q8u16));
-            q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q9u16));
-            q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q10u16));
-            q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q11u16));
-            d0u8 = vqmovun_s16(q12s16);
-            d1u8 = vqmovun_s16(q13s16);
-            d2u8 = vqmovun_s16(q14s16);
-            d3u8 = vqmovun_s16(q15s16);
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-            vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-            dst += y_stride;
-
-            q0u16 = vdupq_lane_u16(d6u16, 3);
-            q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q8u16));
-            q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q9u16));
-            q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q10u16));
-            q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                               vreinterpretq_s16_u16(q11u16));
-            d0u8 = vqmovun_s16(q12s16);
-            d1u8 = vqmovun_s16(q13s16);
-            d2u8 = vqmovun_s16(q14s16);
-            d3u8 = vqmovun_s16(q15s16);
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-            vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-            dst += y_stride;
-        }
+  }
+}
+
+void vp9_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  int j, k;
+  uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
+  uint8x16_t q0u8, q1u8, q2u8;
+  int16x8_t q12s16, q13s16, q14s16, q15s16;
+  uint16x4_t d6u16;
+  uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
+
+  q0u8 = vld1q_dup_u8(above - 1);
+  q1u8 = vld1q_u8(above);
+  q2u8 = vld1q_u8(above + 16);
+  q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
+  q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
+  q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
+  q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
+  for (k = 0; k < 4; k++, left += 8) {
+    d26u8 = vld1_u8(left);
+    q3u16 = vmovl_u8(d26u8);
+    d6u16 = vget_low_u16(q3u16);
+    for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
+      q0u16 = vdupq_lane_u16(d6u16, 0);
+      q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q8u16));
+      q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q9u16));
+      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q10u16));
+      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q11u16));
+      d0u8 = vqmovun_s16(q12s16);
+      d1u8 = vqmovun_s16(q13s16);
+      d2u8 = vqmovun_s16(q14s16);
+      d3u8 = vqmovun_s16(q15s16);
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+      dst += stride;
+
+      q0u16 = vdupq_lane_u16(d6u16, 1);
+      q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q8u16));
+      q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q9u16));
+      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q10u16));
+      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q11u16));
+      d0u8 = vqmovun_s16(q12s16);
+      d1u8 = vqmovun_s16(q13s16);
+      d2u8 = vqmovun_s16(q14s16);
+      d3u8 = vqmovun_s16(q15s16);
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+      dst += stride;
+
+      q0u16 = vdupq_lane_u16(d6u16, 2);
+      q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q8u16));
+      q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q9u16));
+      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q10u16));
+      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q11u16));
+      d0u8 = vqmovun_s16(q12s16);
+      d1u8 = vqmovun_s16(q13s16);
+      d2u8 = vqmovun_s16(q14s16);
+      d3u8 = vqmovun_s16(q15s16);
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+      dst += stride;
+
+      q0u16 = vdupq_lane_u16(d6u16, 3);
+      q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q8u16));
+      q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q9u16));
+      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q10u16));
+      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+                         vreinterpretq_s16_u16(q11u16));
+      d0u8 = vqmovun_s16(q12s16);
+      d1u8 = vqmovun_s16(q13s16);
+      d2u8 = vqmovun_s16(q14s16);
+      d3u8 = vqmovun_s16(q15s16);
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+      dst += stride;
     }
-    return;
+  }
 }
+#endif  // !HAVE_NEON_ASM
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
index dc9856fa887..14f574a50e1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
@@ -298,8 +298,7 @@ loop_h
 |vp9_tm_predictor_4x4_neon| PROC
     ; Load ytop_left = above[-1];
     sub                 r12, r2, #1
-    ldrb                r12, [r12]
-    vdup.u8             d0, r12
+    vld1.u8             {d0[]}, [r12]
 
     ; Load above 4 pixels
     vld1.32             {d2[0]}, [r2]
@@ -309,10 +308,10 @@ loop_h
 
     ; Load left row by row and compute left + (above - ytop_left)
     ; 1st row and 2nd row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
+    vld1.u8             {d2[]}, [r3]!
+    vld1.u8             {d4[]}, [r3]!
+    vmovl.u8            q1, d2
+    vmovl.u8            q2, d4
     vadd.s16            q1, q1, q3
     vadd.s16            q2, q2, q3
     vqmovun.s16         d0, q1
@@ -321,10 +320,10 @@ loop_h
     vst1.32             {d1[0]}, [r0], r1
 
     ; 3rd row and 4th row
-    ldrb                r12, [r3], #1
-    ldrb                r2, [r3], #1
-    vdup.u16            q1, r12
-    vdup.u16            q2, r2
+    vld1.u8             {d2[]}, [r3]!
+    vld1.u8             {d4[]}, [r3]
+    vmovl.u8            q1, d2
+    vmovl.u8            q2, d4
     vadd.s16            q1, q1, q3
     vadd.s16            q2, q2, q3
     vqmovun.s16         d0, q1
@@ -345,8 +344,7 @@ loop_h
 |vp9_tm_predictor_8x8_neon| PROC
     ; Load ytop_left = above[-1];
     sub                 r12, r2, #1
-    ldrb                r12, [r12]
-    vdup.u8             d0, r12
+    vld1.8              {d0[]}, [r12]
 
     ; preload 8 left
     vld1.8              {d30}, [r3]
@@ -418,8 +416,7 @@ loop_h
 |vp9_tm_predictor_16x16_neon| PROC
     ; Load ytop_left = above[-1];
     sub                 r12, r2, #1
-    ldrb                r12, [r12]
-    vdup.u8             q0, r12
+    vld1.8              {d0[]}, [r12]
 
     ; Load above 8 pixels
     vld1.8              {q1}, [r2]
@@ -429,7 +426,7 @@ loop_h
 
     ; Compute above - ytop_left
     vsubl.u8            q2, d2, d0
-    vsubl.u8            q3, d3, d1
+    vsubl.u8            q3, d3, d0
 
     vmovl.u8            q10, d18
 
@@ -512,8 +509,7 @@ loop_16x16_neon
 |vp9_tm_predictor_32x32_neon| PROC
     ; Load ytop_left = above[-1];
     sub                 r12, r2, #1
-    ldrb                r12, [r12]
-    vdup.u8             q0, r12
+    vld1.8              {d0[]}, [r12]
 
     ; Load above 32 pixels
     vld1.8              {q1}, [r2]!
@@ -524,9 +520,9 @@ loop_16x16_neon
 
     ; Compute above - ytop_left
     vsubl.u8            q8, d2, d0
-    vsubl.u8            q9, d3, d1
+    vsubl.u8            q9, d3, d0
     vsubl.u8            q10, d4, d0
-    vsubl.u8            q11, d5, d1
+    vsubl.u8            q11, d5, d0
 
     vmovl.u8            q3, d26
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 19c582fd109..202d9138199 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 132d88ce5f7..7ceebb6d88c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -12,6 +12,7 @@
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 1990348b83a..280190a39ba 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index fc44ffa311d..04d226663f9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
index 008cf8cacd9..675db654ab9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
@@ -14,6 +14,7 @@
 #include <stdlib.h>
 
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
new file mode 100644
index 00000000000..89364cb95cb
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
@@ -0,0 +1,782 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 dst0, dst1, dst2, dst3, res2, res3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, res0, res1;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, res0, res1);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  SRARI_H2_SH(res0, res1, FILTER_BITS);
+  SAT_SH2_SH(res0, res1, 7);
+  PCKEV_B2_UB(res0, res0, res1, res1, res2, res3);
+  ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+  XORI_B2_128_UB(res2, res3);
+  AVER_UB2_UB(res2, dst0, res3, dst2, res2, res3);
+  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, res0, res1, res2, res3;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v8i16 filt, vec0, vec1, vec2, vec3;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
+  src += (4 * src_stride);
+  LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, vec0, vec1);
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, vec2, vec3);
+  SRARI_H4_SH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  SAT_SH4_SH(vec0, vec1, vec2, vec3, 7);
+  PCKEV_B4_UB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, res0, res1, res2,
+              res3);
+  ILVR_D2_UB(res1, res0, res3, res2, res0, res2);
+  XORI_B2_128_UB(res0, res2);
+  ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4,
+             dst6);
+  ILVR_D2_UB(dst2, dst0, dst6, dst4, dst0, dst4);
+  AVER_UB2_UB(res0, dst0, res2, dst4, res0, res2);
+  ST4x8_UB(res0, res2, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  if (4 == height) {
+    common_hz_8t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else if (8 == height) {
+    common_hz_8t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+  }
+}
+
+static void common_hz_8t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  int32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, dst0, dst1, dst2, dst3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                            dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void common_hz_8t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  int32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, dst0, dst1;
+  v8i16 filt, out0, out1, out2, out3;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = height >> 1; loop_cnt--;) {
+    LD_SB2(src, src_stride, src0, src2);
+    LD_SB2(src + 8, src_stride, src1, src3);
+    src += (2 * src_stride);
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12);
+    VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13);
+    VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+               vec14);
+    VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+               vec15);
+    DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+                vec9, vec10, vec11);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1,
+                 vec2, vec3);
+    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+                 vec9, vec10, vec11);
+    ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+                out2, out3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    PCKEV_XORI128_AVG_ST_UB(out1, out0, dst0, dst);
+    dst += dst_stride;
+    PCKEV_XORI128_AVG_ST_UB(out3, out2, dst1, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_8t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = height; loop_cnt--;) {
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
+    src += src_stride;
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12);
+    VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13);
+    VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+               vec14);
+    VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+               vec15);
+    DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+                vec9, vec10, vec11);
+    DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1,
+                 vec2, vec3);
+    DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+                 vec9, vec10, vec11);
+    ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+                out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    LD_UB2(dst, 16, dst1, dst2);
+    PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, dst);
+    PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, dst + 16);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_8t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt, cnt;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = height; loop_cnt--;) {
+    for (cnt = 0; cnt < 2; ++cnt) {
+      src0 = LD_SB(&src[cnt << 5]);
+      src2 = LD_SB(&src[16 + (cnt << 5)]);
+      src3 = LD_SB(&src[24 + (cnt << 5)]);
+      src1 = __msa_sldi_b(src2, src0, 8);
+
+      XORI_B4_128_SB(src0, src1, src2, src3);
+      VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8,
+                 vec12);
+      VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9,
+                 vec13);
+      VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+                 vec14);
+      VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+                 vec15);
+      DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+                  vec1, vec2, vec3);
+      DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+                  vec9, vec10, vec11);
+      DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0,
+                   vec1, vec2, vec3);
+      DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+                   vec9, vec10, vec11);
+      ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+                  out2, out3);
+      SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+      SAT_SH4_SH(out0, out1, out2, out3, 7);
+      LD_UB2(&dst[cnt << 5], 16, dst1, dst2);
+      PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, &dst[cnt << 5]);
+      PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, &dst[16 + (cnt << 5)]);
+    }
+
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3, vec0, vec1, res0, res1;
+  v8u16 vec2, vec3, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+  DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+  SRARI_H2_UH(vec2, vec3, FILTER_BITS);
+  MIN_UH2_UH(vec2, vec3, const255);
+  PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+  ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+  AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v8u16 vec4, vec5, vec6, vec7, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+  LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+  VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
+              vec6, vec7);
+  SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+  MIN_UH4_UH(vec4, vec5, vec6, vec7, const255);
+  PCKEV_B4_UB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2,
+              res3);
+  ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4,
+             dst6);
+  AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1, res2,
+              res3);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+  dst += (4 * dst_stride);
+  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  if (4 == height) {
+    common_hz_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else if (8 == height) {
+    common_hz_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+  }
+}
+
+static void common_hz_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3;
+  v8u16 vec0, vec1, vec2, vec3, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+  PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                     dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter,
+                                                  int32_t height) {
+  v16i8 src0, src1, src2, src3, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3;
+  v8u16 vec0, vec1, vec2, vec3, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+  PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                     dst, dst_stride);
+  dst += (4 * dst_stride);
+
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+  PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                     dst, dst_stride);
+  dst += (4 * dst_stride);
+
+  if (16 == height) {
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+    PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                       dst, dst_stride);
+  }
+}
+
+static void common_hz_2t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  if (4 == height) {
+    common_hz_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else {
+    common_hz_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                          filter, height);
+  }
+}
+
+static void common_hz_2t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 res0, res1, res2, res3, res4, res5, res6, res7, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src2, src4, src6);
+  LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+  src += (4 * src_stride);
+
+  VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+  VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+              res2, res3);
+  DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+              res6, res7);
+  SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+  SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  MIN_UH4_UH(res0, res1, res2, res3, const255);
+  MIN_UH4_UH(res4, res5, res6, res7, const255);
+  PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+  dst += dst_stride;
+  PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+  dst += dst_stride;
+  PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+  dst += dst_stride;
+  PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+  dst += dst_stride;
+
+  for (loop_cnt = (height >> 2) - 1; loop_cnt--;) {
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+                res2, res3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+                res6, res7);
+    SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+    SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    MIN_UH4_UH(res0, res1, res2, res3, const255);
+    MIN_UH4_UH(res4, res5, res6, res7, const255);
+    PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 res0, res1, res2, res3, res4, res5, res6, res7, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  for (loop_cnt = (height >> 1); loop_cnt--;) {
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
+    src += src_stride;
+    src4 = LD_SB(src);
+    src6 = LD_SB(src + 16);
+    src7 = LD_SB(src + 24);
+    src5 = __msa_sldi_b(src6, src4, 8);
+    src += src_stride;
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+                res2, res3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+                res6, res7);
+    SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+    SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+    MIN_UH4_UH(res0, res1, res2, res3, const255);
+    MIN_UH4_UH(res4, res5, res6, res7, const255);
+    LD_UB2(dst, 16, dst0, dst1);
+    PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+    PCKEV_AVG_ST_UB(res3, res2, dst1, (dst + 16));
+    dst += dst_stride;
+    LD_UB2(dst, 16, dst2, dst3);
+    PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+    PCKEV_AVG_ST_UB(res7, res6, dst3, (dst + 16));
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0, dst0, dst1, dst2, dst3;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, const255, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  for (loop_cnt = height; loop_cnt--;) {
+    LD_SB4(src, 16, src0, src2, src4, src6);
+    src7 = LD_SB(src + 56);
+    SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
+    src += src_stride;
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+                out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+                out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+    LD_UB4(dst, 16, dst0, dst1, dst2, dst3);
+    MIN_UH4_UH(out0, out1, out2, out3, const255);
+    MIN_UH4_UH(out4, out5, out6, out7, const255);
+    PCKEV_AVG_ST_UB(out1, out0, dst0, dst);
+    PCKEV_AVG_ST_UB(out3, out2, dst1, dst + 16);
+    PCKEV_AVG_ST_UB(out5, out4, dst2, dst + 32);
+    PCKEV_AVG_ST_UB(out7, out6, dst3, dst + 48);
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+                                 uint8_t *dst, ptrdiff_t dst_stride,
+                                 const int16_t *filter_x, int x_step_q4,
+                                 const int16_t *filter_y, int y_step_q4,
+                                 int w, int h) {
+  int8_t cnt, filt_hor[8];
+
+  if (16 != x_step_q4) {
+    vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+                              filter_x, x_step_q4, filter_y, y_step_q4,
+                              w, h);
+    return;
+  }
+
+  if (((const int32_t *)filter_x)[1] == 0x800000) {
+    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+                     filter_x, x_step_q4, filter_y, y_step_q4,
+                     w, h);
+    return;
+  }
+
+  for (cnt = 0; cnt < 8; ++cnt) {
+    filt_hor[cnt] = filter_x[cnt];
+  }
+
+  if (((const int32_t *)filter_x)[0] == 0) {
+    switch (w) {
+      case 4:
+        common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         &filt_hor[3], h);
+        break;
+      case 8:
+        common_hz_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         &filt_hor[3], h);
+        break;
+      case 16:
+        common_hz_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_hor[3], h);
+        break;
+      case 32:
+        common_hz_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_hor[3], h);
+        break;
+      case 64:
+        common_hz_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_hor[3], h);
+        break;
+      default:
+        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+                                  filter_x, x_step_q4, filter_y, y_step_q4,
+                                  w, h);
+        break;
+    }
+  } else {
+    switch (w) {
+      case 4:
+        common_hz_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         filt_hor, h);
+        break;
+      case 8:
+        common_hz_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         filt_hor, h);
+        break;
+      case 16:
+        common_hz_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_hor, h);
+        break;
+      case 32:
+        common_hz_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_hor, h);
+        break;
+      case 64:
+        common_hz_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_hor, h);
+        break;
+      default:
+        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+                                  filter_x, x_step_q4, filter_y, y_step_q4,
+                                  w, h);
+        break;
+    }
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c
new file mode 100644
index 00000000000..e9f3a9dc3f2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c
@@ -0,0 +1,679 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter_horiz,
+                                                  int8_t *filter_vert,
+                                                  int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3, tmp0, tmp1;
+  v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+  v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8i16 hz_out7, hz_out8, hz_out9, res0, res1, vec0, vec1, vec2, vec3, vec4;
+  v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
+  src -= (3 + 3 * src_stride);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
+
+  filt = LD_SH(filter_vert);
+  SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+  ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+  vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8);
+    vec3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+    res0 = FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8);
+    vec4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+    res1 = FILT_8TAP_DPADD_S_H(vec1, vec2, vec3, vec4, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+    SRARI_H2_SH(res0, res1, FILTER_BITS);
+    SAT_SH2_SH(res0, res1, 7);
+    PCKEV_B2_UB(res0, res0, res1, res1, tmp0, tmp1);
+    XORI_B2_128_UB(tmp0, tmp1);
+    AVER_UB2_UB(tmp0, dst0, tmp1, dst2, tmp0, tmp1);
+    ST4x4_UB(tmp0, tmp1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    hz_out5 = hz_out9;
+    vec0 = vec2;
+    vec1 = vec3;
+    vec2 = vec4;
+  }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_8w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter_horiz,
+                                                  int8_t *filter_vert,
+                                                  int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+  v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+  v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3;
+  v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
+  v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
+
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
+  src -= (3 + 3 * src_stride);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+  hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+
+  filt = LD_SH(filter_vert);
+  SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+  ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+  ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+  ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+    hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7);
+    tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+    tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+                               filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9);
+    tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    CONVERT_UB_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst2, dst3,
+                            dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    hz_out6 = hz_out10;
+    out0 = out2;
+    out1 = out3;
+    out2 = out8;
+    out4 = out6;
+    out5 = out7;
+    out6 = out9;
+  }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_16w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 2; multiple8_cnt--;) {
+    common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                          filter_horiz, filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_32w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 4; multiple8_cnt--;) {
+    common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                          filter_horiz, filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_64w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 8; multiple8_cnt--;) {
+    common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                          filter_horiz, filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x4_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert) {
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 filt_hz, filt_vt, vec0, vec1;
+  v16u8 dst0, dst1, dst2, dst3, res0, res1;
+  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1, filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LD_UH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  filt = LD_UH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+  hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+  hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
+  hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
+  ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+  DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+  SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+  SAT_UH2_UH(tmp0, tmp1, 7);
+  PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+  AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert) {
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+  v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8u16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+  src += (8 * src_stride);
+  src8 = LD_SB(src);
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+  hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS);
+  hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS);
+  hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS);
+  SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+             hz_out3, hz_out5, 8);
+  hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
+
+  LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+  ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2,
+             dst4, dst6);
+  ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+  ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+              tmp0, tmp1, tmp2, tmp3);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_B4_UB(tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, tmp3, tmp3, res0, res1,
+              res2, res3);
+  AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1,
+              res2, res3);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+  dst += (4 * dst_stride);
+  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter_horiz,
+                                                  int8_t *filter_vert,
+                                                  int32_t height) {
+  if (4 == height) {
+    common_hv_2ht_2vt_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+                                           filter_horiz, filter_vert);
+  } else if (8 == height) {
+    common_hv_2ht_2vt_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+                                           filter_horiz, filter_vert);
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x4_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert) {
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 filt_hz, filt_vt, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
+  v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+  src += (5 * src_stride);
+
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+  hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+  vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+  tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+  vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+  tmp1 = __msa_dotp_u_h(vec1, filt_vt);
+
+  hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+  vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+  tmp2 = __msa_dotp_u_h(vec2, filt_vt);
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+  vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+  tmp3 = __msa_dotp_u_h(vec3, filt_vt);
+
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                     dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       int8_t *filter_horiz,
+                                                       int8_t *filter_vert,
+                                                       int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 filt_hz, filt_vt, vec0, dst0, dst1, dst2, dst3;
+  v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+  src0 = LD_SB(src);
+  src += src_stride;
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src1, src2, src3, src4);
+    src += (4 * src_stride);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter_horiz,
+                                                  int8_t *filter_vert,
+                                                  int32_t height) {
+  if (4 == height) {
+    common_hv_2ht_2vt_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride,
+                                           filter_horiz, filter_vert);
+  } else {
+    common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                               filter_horiz, filter_vert,
+                                               height);
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_16w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt_hz, filt_vt, vec0, vec1, dst0, dst1, dst2, dst3;
+  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
+  v8i16 filt;
+
+  mask = LD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_SB2(src, 8, src0, src1);
+  src += src_stride;
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+    hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+    dst += dst_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst1, dst);
+    dst += dst_stride;
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+    hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+    dst += dst_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst3, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_32w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 2; multiple8_cnt--;) {
+    common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride,
+                                           filter_horiz, filter_vert, height);
+    src += 16;
+    dst += 16;
+  }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_64w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter_horiz,
+                                                   int8_t *filter_vert,
+                                                   int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 4; multiple8_cnt--;) {
+    common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride,
+                                           filter_horiz, filter_vert, height);
+    src += 16;
+    dst += 16;
+  }
+}
+
+void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h) {
+  int8_t cnt, filt_hor[8], filt_ver[8];
+
+  if (16 != x_step_q4 || 16 != y_step_q4) {
+    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+                        filter_x, x_step_q4, filter_y, y_step_q4,
+                        w, h);
+    return;
+  }
+
+  if (((const int32_t *)filter_x)[1] == 0x800000 &&
+      ((const int32_t *)filter_y)[1] == 0x800000) {
+    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+                     filter_x, x_step_q4, filter_y, y_step_q4,
+                     w, h);
+    return;
+  }
+
+  for (cnt = 0; cnt < 8; ++cnt) {
+    filt_hor[cnt] = filter_x[cnt];
+    filt_ver[cnt] = filter_y[cnt];
+  }
+
+  if (((const int32_t *)filter_x)[0] == 0 &&
+      ((const int32_t *)filter_y)[0] == 0) {
+    switch (w) {
+      case 4:
+        common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                              dst, (int32_t)dst_stride,
+                                              &filt_hor[3], &filt_ver[3], h);
+        break;
+      case 8:
+        common_hv_2ht_2vt_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                              dst, (int32_t)dst_stride,
+                                              &filt_hor[3], &filt_ver[3], h);
+        break;
+      case 16:
+        common_hv_2ht_2vt_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               &filt_hor[3], &filt_ver[3], h);
+        break;
+      case 32:
+        common_hv_2ht_2vt_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               &filt_hor[3], &filt_ver[3], h);
+        break;
+      case 64:
+        common_hv_2ht_2vt_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               &filt_hor[3], &filt_ver[3], h);
+        break;
+      default:
+        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+                            filter_x, x_step_q4, filter_y, y_step_q4,
+                            w, h);
+        break;
+    }
+  } else if (((const int32_t *)filter_x)[0] == 0 ||
+             ((const int32_t *)filter_y)[0] == 0) {
+    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+                        filter_x, x_step_q4, filter_y, y_step_q4,
+                        w, h);
+  } else {
+    switch (w) {
+      case 4:
+        common_hv_8ht_8vt_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                              dst, (int32_t)dst_stride,
+                                              filt_hor, filt_ver, h);
+        break;
+      case 8:
+        common_hv_8ht_8vt_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                              dst, (int32_t)dst_stride,
+                                              filt_hor, filt_ver, h);
+        break;
+      case 16:
+        common_hv_8ht_8vt_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               filt_hor, filt_ver, h);
+        break;
+      case 32:
+        common_hv_8ht_8vt_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               filt_hor, filt_ver, h);
+        break;
+      case 64:
+        common_hv_8ht_8vt_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                               dst, (int32_t)dst_stride,
+                                               filt_hor, filt_ver, h);
+        break;
+      default:
+        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+                            filter_x, x_step_q4, filter_y, y_step_q4,
+                            w, h);
+        break;
+    }
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
new file mode 100644
index 00000000000..85dfd30c7de
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
@@ -0,0 +1,753 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16u8 dst0, dst1, dst2, dst3, out;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+  v16i8 src10998, filt0, filt1, filt2, filt3;
+  v8i16 filt, out10, out32;
+
+  src -= (3 * src_stride);
+
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+             src54_r, src21_r);
+  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+  ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+             src4332, src6554);
+  XORI_B3_128_SB(src2110, src4332, src6554);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+    XORI_B2_128_SB(src8776, src10998);
+    out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+                                filt1, filt2, filt3);
+    out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+                                filt1, filt2, filt3);
+    SRARI_H2_SH(out10, out32, FILTER_BITS);
+    SAT_SH2_SH(out10, out32, 7);
+    out = PCKEV_XORI128_UB(out10, out32);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+    dst0 = (v16u8)__msa_ilvr_d((v2i64)dst2, (v2i64)dst0);
+    out = __msa_aver_u_b(out, dst0);
+
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    src2110 = src6554;
+    src4332 = src8776;
+    src6554 = src10998;
+    src6 = src10;
+  }
+}
+
+static void common_vt_8t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16u8 dst0, dst1, dst2, dst3;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  src -= (3 * src_stride);
+
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+             src54_r, src21_r);
+  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    out0 = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                               filt1, filt2, filt3);
+    out1 = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                               filt1, filt2, filt3);
+    out2 = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                               filt1, filt2, filt3);
+    out3 = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                               filt1, filt2, filt3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                            dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    src10_r = src54_r;
+    src32_r = src76_r;
+    src54_r = src98_r;
+    src21_r = src65_r;
+    src43_r = src87_r;
+    src65_r = src109_r;
+    src6 = src10;
+  }
+}
+
+static void common_vt_8t_and_aver_dst_16w_mult_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int8_t *filter,
+                                                   int32_t height,
+                                                   int32_t width) {
+  const uint8_t *src_tmp;
+  uint8_t *dst_tmp;
+  uint32_t loop_cnt, cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+  v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+  v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt;
+
+  src -= (3 * src_stride);
+
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  for (cnt = (width >> 4); cnt--;) {
+    src_tmp = src;
+    dst_tmp = dst;
+
+    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    src_tmp += (7 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+               src54_l, src21_l);
+    ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+      LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+      src_tmp += (4 * src_stride);
+
+      LD_UB4(dst_tmp, dst_stride, dst0, dst1, dst2, dst3);
+      XORI_B4_128_SB(src7, src8, src9, src10);
+      ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                 src87_r, src98_r, src109_r);
+      ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                 src87_l, src98_l, src109_l);
+      out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                   filt1, filt2, filt3);
+      out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                   filt1, filt2, filt3);
+      out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                   filt1, filt2, filt3);
+      out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                   filt1, filt2, filt3);
+      out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+                                   filt1, filt2, filt3);
+      out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+                                   filt1, filt2, filt3);
+      out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+                                   filt1, filt2, filt3);
+      out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+                                   filt1, filt2, filt3);
+      SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+      SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+      SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+      SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+      PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                  out3_r, tmp0, tmp1, tmp2, tmp3);
+      XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+      AVER_UB4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3, dst0, dst1,
+                  dst2, dst3);
+      ST_UB4(dst0, dst1, dst2, dst3, dst_tmp, dst_stride);
+      dst_tmp += (4 * dst_stride);
+
+      src10_r = src54_r;
+      src32_r = src76_r;
+      src54_r = src98_r;
+      src21_r = src65_r;
+      src43_r = src87_r;
+      src65_r = src109_r;
+      src10_l = src54_l;
+      src32_l = src76_l;
+      src54_l = src98_l;
+      src21_l = src65_l;
+      src43_l = src87_l;
+      src65_l = src109_l;
+      src6 = src10;
+    }
+
+    src += 16;
+    dst += 16;
+  }
+}
+
+static void common_vt_8t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                         filter, height, 16);
+}
+
+static void common_vt_8t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                         filter, height, 32);
+}
+
+static void common_vt_8t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                         filter, height, 64);
+}
+
+static void common_vt_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16i8 src0, src1, src2, src3, src4;
+  v16u8 dst0, dst1, dst2, dst3, out, filt0, src2110, src4332;
+  v16i8 src10_r, src32_r, src21_r, src43_r;
+  v8i16 filt;
+  v8u16 tmp0, tmp1;
+
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+
+  src4 = LD_SB(src);
+  src += src_stride;
+
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+  dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0);
+  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+             src32_r, src43_r);
+  ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+  DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+  SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+  SAT_UH2_UH(tmp0, tmp1, 7);
+
+  out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+  out = __msa_aver_u_b(out, dst0);
+
+  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src87_r;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+  v16u8 src2110, src4332, src6554, src8776, filt0;
+  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+  src += (8 * src_stride);
+  src8 = LD_SB(src);
+
+  LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+  ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1,
+             dst2, dst3);
+  ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+             src32_r, src43_r);
+  ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+             src76_r, src87_r);
+  ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+             src87_r, src76_r, src2110, src4332, src6554, src8776);
+  DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+              tmp0, tmp1, tmp2, tmp3);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+  AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332);
+  ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+  dst += (4 * dst_stride);
+  ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  if (4 == height) {
+    common_vt_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else if (8 == height) {
+    common_vt_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+  }
+}
+
+static void common_vt_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter) {
+  v16u8 src0, src1, src2, src3, src4;
+  v16u8 dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3, filt0;
+  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  /* rearranging filter_y */
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+  LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+  ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+  ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+              tmp2, tmp3);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                     dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int8_t *filter,
+                                                  int32_t height) {
+  uint32_t loop_cnt;
+  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+  v16u8 dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  /* rearranging filter_y */
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+  src0 = LD_UB(src);
+  src += src_stride;
+
+  for (loop_cnt = (height >> 3); loop_cnt--;) {
+    LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+    src += (8 * src_stride);
+    LD_UB8(dst, dst_stride, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8);
+
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1,
+               vec2, vec3);
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5,
+               vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+                tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_AVG_ST8x4_UB(tmp0, dst1, tmp1, dst2, tmp2, dst3, tmp3, dst4,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
+                tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_AVG_ST8x4_UB(tmp0, dst5, tmp1, dst6, tmp2, dst7, tmp3, dst8,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    src0 = src8;
+  }
+}
+
+static void common_vt_2t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int8_t *filter,
+                                             int32_t height) {
+  if (4 == height) {
+    common_vt_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else {
+    common_vt_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                          filter, height);
+  }
+}
+
+static void common_vt_2t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2, dst3, filt0;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+  /* rearranging filter_y */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  src0 = LD_UB(src);
+  src += src_stride;
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_UB4(src, src_stride, src1, src2, src3, src4);
+    src += (4 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+    dst += dst_stride;
+
+    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst);
+    dst += dst_stride;
+
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+    dst += dst_stride;
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst);
+    dst += dst_stride;
+
+    src0 = src4;
+  }
+}
+
+static void common_vt_2t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+  v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+  /* rearranging filter_y */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LD_UB2(src, 16, src0, src5);
+  src += src_stride;
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_UB4(src, src_stride, src1, src2, src3, src4);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+
+    LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
+    LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7);
+    src += (4 * src_stride);
+
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride);
+
+    ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+    ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride);
+
+    ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+    ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride);
+    dst += (4 * dst_stride);
+
+    src0 = src4;
+    src5 = src9;
+  }
+}
+
+static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int8_t *filter,
+                                              int32_t height) {
+  uint32_t loop_cnt;
+  v16u8 src0, src1, src2, src3, src4, src5;
+  v16u8 src6, src7, src8, src9, src10, src11, filt0;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  v8u16 filt;
+
+  /* rearranging filter_y */
+  filt = LD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LD_UB4(src, 16, src0, src3, src6, src9);
+  src += src_stride;
+
+  for (loop_cnt = (height >> 1); loop_cnt--;) {
+    LD_UB2(src, src_stride, src1, src2);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB2(src + 16, src_stride, src4, src5);
+    LD_UB2(dst + 16, dst_stride, dst2, dst3);
+    LD_UB2(src + 32, src_stride, src7, src8);
+    LD_UB2(dst + 32, dst_stride, dst4, dst5);
+    LD_UB2(src + 48, src_stride, src10, src11);
+    LD_UB2(dst + 48, dst_stride, dst6, dst7);
+    src += (2 * src_stride);
+
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+    ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+    ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+    SAT_UH2_UH(tmp4, tmp5, 7);
+    PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+    SAT_UH2_UH(tmp6, tmp7, 7);
+    PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride);
+
+    ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+    ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride);
+
+    ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+    ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+    SAT_UH2_UH(tmp4, tmp5, 7);
+    PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48));
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+    SAT_UH2_UH(tmp6, tmp7, 7);
+    PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride);
+    dst += (2 * dst_stride);
+
+    src0 = src2;
+    src3 = src5;
+    src6 = src8;
+    src9 = src11;
+  }
+}
+
+void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+                                uint8_t *dst, ptrdiff_t dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  int8_t cnt, filt_ver[8];
+
+  if (16 != y_step_q4) {
+    vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+                             filter_x, x_step_q4, filter_y, y_step_q4,
+                             w, h);
+    return;
+  }
+
+  if (((const int32_t *)filter_y)[1] == 0x800000) {
+    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+                     filter_x, x_step_q4, filter_y, y_step_q4,
+                     w, h);
+    return;
+  }
+
+  for (cnt = 0; cnt < 8; ++cnt) {
+    filt_ver[cnt] = filter_y[cnt];
+  }
+
+  if (((const int32_t *)filter_y)[0] == 0) {
+    switch (w) {
+      case 4:
+        common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         &filt_ver[3], h);
+        break;
+      case 8:
+        common_vt_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         &filt_ver[3], h);
+        break;
+      case 16:
+        common_vt_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_ver[3], h);
+        break;
+      case 32:
+        common_vt_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_ver[3], h);
+        break;
+      case 64:
+        common_vt_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          &filt_ver[3], h);
+        break;
+      default:
+        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+                                 filter_x, x_step_q4, filter_y, y_step_q4,
+                                 w, h);
+        break;
+    }
+  } else {
+    switch (w) {
+      case 4:
+        common_vt_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         filt_ver, h);
+        break;
+      case 8:
+        common_vt_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+                                         dst, (int32_t)dst_stride,
+                                         filt_ver, h);
+        break;
+      case 16:
+        common_vt_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_ver, h);
+
+        break;
+      case 32:
+        common_vt_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_ver, h);
+        break;
+      case 64:
+        common_vt_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+                                          dst, (int32_t)dst_stride,
+                                          filt_ver, h);
+        break;
+      default:
+        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+                                 filter_x, x_step_q4, filter_y, y_step_q4,
+                                 w, h);
+        break;
+    }
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
index e2247435e88..f175bf9b663 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
@@ -14,37 +14,29 @@
 static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src0, src1, src2, src3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16u8 mask0, mask1, mask2, mask3, out;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
   v8i16 filt, out0, out1;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
-  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
   HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
                              filt0, filt1, filt2, filt3, out0, out1);
-
-  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-
-  PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+  SRARI_H2_SH(out0, out1, FILTER_BITS);
+  SAT_SH2_SH(out0, out1, 7);
+  out = PCKEV_XORI128_UB(out0, out1);
+  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
@@ -52,47 +44,36 @@ static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
                                  int8_t *filter) {
   v16i8 filt0, filt1, filt2, filt3;
   v16i8 src0, src1, src2, src3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16u8 mask0, mask1, mask2, mask3, out;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
   src += (4 * src_stride);
-
-  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
   HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
                              filt0, filt1, filt2, filt3, out0, out1);
-
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
-  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
   HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
                              filt0, filt1, filt2, filt3, out2, out3);
-
-  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-  out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-  out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-  PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+  SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+  SAT_SH4_SH(out0, out1, out2, out3, 7);
+  out = PCKEV_XORI128_UB(out0, out1);
+  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
   dst += (4 * dst_stride);
-  PCKEV_2B_XORI128_STORE_4_BYTES_4(out2, out3, dst, dst_stride);
+  out = PCKEV_XORI128_UB(out2, out3);
+  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -108,82 +89,64 @@ static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
 static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src0, src1, src2, src3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
-  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  XORI_B4_128_SB(src0, src1, src2, src3);
   HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
                              filt0, filt1, filt2, filt3, out0, out1, out2,
                              out3);
-
-  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-  out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-  out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-  PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+  SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+  SAT_SH4_SH(out0, out1, out2, out3, 7);
+  tmp0 = PCKEV_XORI128_UB(out0, out1);
+  tmp1 = PCKEV_XORI128_UB(out2, out3);
+  ST8x4_UB(tmp0, tmp1, dst, dst_stride);
 }
 
 static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
                                      uint8_t *dst, int32_t dst_stride,
                                      int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src0, src1, src2, src3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
     src += (4 * src_stride);
-
-    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
     HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                mask3, filt0, filt1, filt2, filt3, out0, out1,
                                out2, out3);
-
-    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-    PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    tmp0 = PCKEV_XORI128_UB(out0, out1);
+    tmp1 = PCKEV_XORI128_UB(out2, out3);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
     dst += (4 * dst_stride);
   }
 }
@@ -202,48 +165,36 @@ static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, out;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
   for (loop_cnt = (height >> 1); loop_cnt--;) {
-    src0 = LOAD_SB(src);
-    src1 = LOAD_SB(src + 8);
-    src += src_stride;
-    src2 = LOAD_SB(src);
-    src3 = LOAD_SB(src + 8);
-    src += src_stride;
-
-    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+    LD_SB2(src, src_stride, src0, src2);
+    LD_SB2(src + 8, src_stride, src1, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (2 * src_stride);
     HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                mask3, filt0, filt1, filt2, filt3, out0, out1,
                                out2, out3);
-
-    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST_UB(out, dst);
     dst += dst_stride;
-    PCKEV_B_XORI128_STORE_VEC(out3, out2, dst);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST_UB(out, dst);
     dst += dst_stride;
   }
 }
@@ -252,68 +203,56 @@ static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, out;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
   for (loop_cnt = (height >> 1); loop_cnt--;) {
-    src0 = LOAD_SB(src);
-    src2 = LOAD_SB(src + 16);
-    src3 = LOAD_SB(src + 24);
-    src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
     src += src_stride;
-
-    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+    XORI_B4_128_SB(src0, src1, src2, src3);
     HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                mask3, filt0, filt1, filt2, filt3, out0, out1,
                                out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
 
-    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-    src0 = LOAD_SB(src);
-    src2 = LOAD_SB(src + 16);
-    src3 = LOAD_SB(src + 24);
-    src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
+    src += src_stride;
 
-    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
-    PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST_UB(out, dst);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST_UB(out, dst + 16);
     dst += dst_stride;
 
-    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+    XORI_B4_128_SB(src0, src1, src2, src3);
     HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
                                mask3, filt0, filt1, filt2, filt3, out0, out1,
                                out2, out3);
-
-    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
-    PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
-
-    src += src_stride;
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST_UB(out, dst);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST_UB(out, dst + 16);
     dst += dst_stride;
   }
 }
@@ -321,50 +260,55 @@ static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
 static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
-  uint32_t loop_cnt, cnt;
-  v16i8 src0, src1, src2, src3;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3;
+  int32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3, out;
   v8i16 filt, out0, out1, out2, out3;
 
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= 3;
 
   /* rearranging filter */
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
   for (loop_cnt = height; loop_cnt--;) {
-    for (cnt = 0; cnt < 2; ++cnt) {
-      src0 = LOAD_SB(&src[cnt << 5]);
-      src2 = LOAD_SB(&src[16 + (cnt << 5)]);
-      src3 = LOAD_SB(&src[24 + (cnt << 5)]);
-      src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
-
-      XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
-      HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                                 mask3, filt0, filt1, filt2, filt3, out0, out1,
-                                 out2, out3);
-
-      out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
-      out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-      out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
-      out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
-      PCKEV_B_XORI128_STORE_VEC(out1, out0, &dst[cnt << 5]);
-      PCKEV_B_XORI128_STORE_VEC(out3, out2, &dst[16 + (cnt << 5)]);
-    }
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
 
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST_UB(out, dst);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST_UB(out, dst + 16);
+
+    src0 = LD_SB(src + 32);
+    src2 = LD_SB(src + 48);
+    src3 = LD_SB(src + 56);
+    src1 = __msa_sldi_b(src2, src0, 8);
     src += src_stride;
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST_UB(out, dst + 32);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST_UB(out, dst + 48);
     dst += dst_stride;
   }
 }
@@ -372,124 +316,55 @@ static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
 static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  uint32_t out0, out1, out2, out3;
   v16i8 src0, src1, src2, src3, mask;
-  v16u8 vec0, vec1, filt0;
-  v16i8 res0, res1;
+  v16u8 filt0, vec0, vec1, res0, res1;
   v8u16 vec2, vec3, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+  mask = LD_SB(&mc_filt_mask_arr[16]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  const255 = (v8u16)__msa_ldi_h(255);
-
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
-  vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
-  vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
-
-  vec2 = __msa_dotp_u_h(vec0, filt0);
-  vec3 = __msa_dotp_u_h(vec1, filt0);
-
-  vec2 = (v8u16)__msa_srari_h((v8i16)vec2, FILTER_BITS);
-  vec3 = (v8u16)__msa_srari_h((v8i16)vec3, FILTER_BITS);
-
-  vec2 = __msa_min_u_h(vec2, const255);
-  vec3 = __msa_min_u_h(vec3, const255);
-
-  res0 = __msa_pckev_b((v16i8)vec2, (v16i8)vec2);
-  res1 = __msa_pckev_b((v16i8)vec3, (v16i8)vec3);
-
-  out0 = __msa_copy_u_w((v4i32)res0, 0);
-  out1 = __msa_copy_u_w((v4i32)res0, 1);
-  out2 = __msa_copy_u_w((v4i32)res1, 0);
-  out3 = __msa_copy_u_w((v4i32)res1, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+  const255 = (v8u16) __msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+  DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+  SRARI_H2_UH(vec2, vec3, FILTER_BITS);
+  MIN_UH2_UH(vec2, vec3, const255);
+  PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
 }
 
 static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  uint32_t out0, out1, out2, out3;
-  v16u8 filt0;
+  v16u8 vec0, vec1, vec2, vec3, filt0;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 vec0, vec1, vec2, vec3;
-  v8u16 vec4, vec5, vec6, vec7;
   v16i8 res0, res1, res2, res3;
-  v8u16 filt, const255;
+  v8u16 vec4, vec5, vec6, vec7, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+  mask = LD_SB(&mc_filt_mask_arr[16]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  const255 = (v8u16)__msa_ldi_h(255);
-
-  LOAD_8VECS_SB(src, src_stride,
-                src0, src1, src2, src3, src4, src5, src6, src7);
-
-  vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
-  vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
-  vec2 = (v16u8)__msa_vshf_b(mask, src5, src4);
-  vec3 = (v16u8)__msa_vshf_b(mask, src7, src6);
-
-  vec4 = __msa_dotp_u_h(vec0, filt0);
-  vec5 = __msa_dotp_u_h(vec1, filt0);
-  vec6 = __msa_dotp_u_h(vec2, filt0);
-  vec7 = __msa_dotp_u_h(vec3, filt0);
-
-  vec4 = (v8u16)__msa_srari_h((v8i16)vec4, FILTER_BITS);
-  vec5 = (v8u16)__msa_srari_h((v8i16)vec5, FILTER_BITS);
-  vec6 = (v8u16)__msa_srari_h((v8i16)vec6, FILTER_BITS);
-  vec7 = (v8u16)__msa_srari_h((v8i16)vec7, FILTER_BITS);
-
-  vec4 = __msa_min_u_h(vec4, const255);
-  vec5 = __msa_min_u_h(vec5, const255);
-  vec6 = __msa_min_u_h(vec6, const255);
-  vec7 = __msa_min_u_h(vec7, const255);
-
-  res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
-  res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
-  res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
-  res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
-
-  out0 = __msa_copy_u_w((v4i32)res0, 0);
-  out1 = __msa_copy_u_w((v4i32)res0, 1);
-  out2 = __msa_copy_u_w((v4i32)res1, 0);
-  out3 = __msa_copy_u_w((v4i32)res1, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
-  dst += dst_stride;
-
-  out0 = __msa_copy_u_w((v4i32)res2, 0);
-  out1 = __msa_copy_u_w((v4i32)res2, 1);
-  out2 = __msa_copy_u_w((v4i32)res3, 0);
-  out3 = __msa_copy_u_w((v4i32)res3, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+  const255 = (v8u16) __msa_ldi_h(255);
+
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+  VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
+              vec6, vec7);
+  SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+  MIN_UH4_UH(vec4, vec5, vec6, vec7, const255);
+  PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
+              res2, res3);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+  dst += (4 * dst_stride);
+  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
 }
 
 static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -507,149 +382,93 @@ static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
                                  int8_t *filter) {
   v16u8 filt0;
   v16i8 src0, src1, src2, src3, mask;
-  v8u16 vec0, vec1, vec2, vec3;
-  v8u16 out0, out1, out2, out3;
-  v8u16 const255, filt;
+  v8u16 vec0, vec1, vec2, vec3, const255, filt;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  const255 = (v8u16)__msa_ldi_h(255);
-
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
-  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
-  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
-  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
-  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
-  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
-  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
-  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
-  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
-  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
-  out0 = __msa_min_u_h(vec0, const255);
-  out1 = __msa_min_u_h(vec1, const255);
-  out2 = __msa_min_u_h(vec2, const255);
-  out3 = __msa_min_u_h(vec3, const255);
-
-  PCKEV_B_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+  const255 = (v8u16) __msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+  PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
+  ST8x4_UB(src0, src1, dst, dst_stride);
 }
 
 static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
                                      uint8_t *dst, int32_t dst_stride,
                                      int8_t *filter, int32_t height) {
   v16u8 filt0;
-  v16i8 src0, src1, src2, src3, mask;
-  v8u16 vec0, vec1, vec2, vec3;
-  v8u16 filt, const255;
+  v16i8 src0, src1, src2, src3, mask, out0, out1;
+  v8u16 vec0, vec1, vec2, vec3, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
 
-  const255 = (v8u16)__msa_ldi_h(255);
+  const255 = (v8u16) __msa_ldi_h(255);
 
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
   src += (4 * src_stride);
 
-  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
-  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
-  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
-  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
-  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
-  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
-  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
-  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
 
-  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
-  vec0 = __msa_min_u_h(vec0, const255);
-  vec1 = __msa_min_u_h(vec1, const255);
-  vec2 = __msa_min_u_h(vec2, const255);
-  vec3 = __msa_min_u_h(vec3, const255);
-
-  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  LD_SB4(src, src_stride, src0, src1, src2, src3);
   src += (4 * src_stride);
 
-  PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+  PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+  ST8x4_UB(out0, out1, dst, dst_stride);
   dst += (4 * dst_stride);
 
-  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
-  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
-  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
-  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
-  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
-  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
-  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
-  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
-  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
-  vec0 = __msa_min_u_h(vec0, const255);
-  vec1 = __msa_min_u_h(vec1, const255);
-  vec2 = __msa_min_u_h(vec2, const255);
-  vec3 = __msa_min_u_h(vec3, const255);
-
-  PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+              vec2, vec3);
+  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+  MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+  PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+  ST8x4_UB(out0, out1, dst, dst_stride);
   dst += (4 * dst_stride);
 
   if (16 == height) {
-    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
     src += (4 * src_stride);
 
-    vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
-    vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
-    vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
-    vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
-    vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
-    vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
-    vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
-    vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
-    SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
-                     vec0, vec1, vec2, vec3, FILTER_BITS);
-
-    vec0 = __msa_min_u_h(vec0, const255);
-    vec1 = __msa_min_u_h(vec1, const255);
-    vec2 = __msa_min_u_h(vec2, const255);
-    vec3 = __msa_min_u_h(vec3, const255);
-
-    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+    MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
     src += (4 * src_stride);
 
-    PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
-    vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
-    vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
-    vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
-    vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
-    vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
-    vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
-    vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
-    SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
-                     vec0, vec1, vec2, vec3, FILTER_BITS);
-
-    vec0 = __msa_min_u_h(vec0, const255);
-    vec1 = __msa_min_u_h(vec1, const255);
-    vec2 = __msa_min_u_h(vec2, const255);
-    vec3 = __msa_min_u_h(vec3, const255);
-
-    PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+    MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
   }
 }
 
@@ -668,136 +487,68 @@ static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
-  v8u16 filt, const255;
+  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   loop_cnt = (height >> 2) - 1;
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  const255 = (v8u16)__msa_ldi_h(255);
-
-  src0 = LOAD_SB(src);
-  src1 = LOAD_SB(src + 8);
-  src += src_stride;
-  src2 = LOAD_SB(src);
-  src3 = LOAD_SB(src + 8);
-  src += src_stride;
-  src4 = LOAD_SB(src);
-  src5 = LOAD_SB(src + 8);
-  src += src_stride;
-  src6 = LOAD_SB(src);
-  src7 = LOAD_SB(src + 8);
-  src += src_stride;
-
-  vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
-  vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
-  vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
-  vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
-  vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
-  vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
-  vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
-  vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
-  out0 = __msa_dotp_u_h(vec0, filt0);
-  out1 = __msa_dotp_u_h(vec1, filt0);
-  out2 = __msa_dotp_u_h(vec2, filt0);
-  out3 = __msa_dotp_u_h(vec3, filt0);
-  out4 = __msa_dotp_u_h(vec4, filt0);
-  out5 = __msa_dotp_u_h(vec5, filt0);
-  out6 = __msa_dotp_u_h(vec6, filt0);
-  out7 = __msa_dotp_u_h(vec7, filt0);
-
-  out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
-  out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
-  out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
-  out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
-  out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
-  out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
-  out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
-  out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
-  out0 = __msa_min_u_h(out0, const255);
-  out1 = __msa_min_u_h(out1, const255);
-  out2 = __msa_min_u_h(out2, const255);
-  out3 = __msa_min_u_h(out3, const255);
-  out4 = __msa_min_u_h(out4, const255);
-  out5 = __msa_min_u_h(out5, const255);
-  out6 = __msa_min_u_h(out6, const255);
-  out7 = __msa_min_u_h(out7, const255);
-
-  PCKEV_B_STORE_VEC(out1, out0, dst);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+  const255 = (v8u16) __msa_ldi_h(255);
+
+  LD_SB4(src, src_stride, src0, src2, src4, src6);
+  LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+  src += (4 * src_stride);
+
+  VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+  VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+  VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+  VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+              out2, out3);
+  DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+              out6, out7);
+  SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+  SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+  MIN_UH4_UH(out0, out1, out2, out3, const255);
+  MIN_UH4_UH(out4, out5, out6, out7, const255);
+  PCKEV_ST_SB(out0, out1, dst);
   dst += dst_stride;
-  PCKEV_B_STORE_VEC(out3, out2, dst);
+  PCKEV_ST_SB(out2, out3, dst);
   dst += dst_stride;
-  PCKEV_B_STORE_VEC(out5, out4, dst);
+  PCKEV_ST_SB(out4, out5, dst);
   dst += dst_stride;
-  PCKEV_B_STORE_VEC(out7, out6, dst);
+  PCKEV_ST_SB(out6, out7, dst);
   dst += dst_stride;
 
   for (; loop_cnt--;) {
-    src0 = LOAD_SB(src);
-    src1 = LOAD_SB(src + 8);
-    src += src_stride;
-    src2 = LOAD_SB(src);
-    src3 = LOAD_SB(src + 8);
-    src += src_stride;
-    src4 = LOAD_SB(src);
-    src5 = LOAD_SB(src + 8);
-    src += src_stride;
-    src6 = LOAD_SB(src);
-    src7 = LOAD_SB(src + 8);
-    src += src_stride;
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
 
-    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
-    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
-    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
-    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
-    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
-    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
-    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
-    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
-    out0 = __msa_dotp_u_h(vec0, filt0);
-    out1 = __msa_dotp_u_h(vec1, filt0);
-    out2 = __msa_dotp_u_h(vec2, filt0);
-    out3 = __msa_dotp_u_h(vec3, filt0);
-    out4 = __msa_dotp_u_h(vec4, filt0);
-    out5 = __msa_dotp_u_h(vec5, filt0);
-    out6 = __msa_dotp_u_h(vec6, filt0);
-    out7 = __msa_dotp_u_h(vec7, filt0);
-
-    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
-    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
-    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
-    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
-    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
-    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
-    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
-    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
-    out0 = __msa_min_u_h(out0, const255);
-    out1 = __msa_min_u_h(out1, const255);
-    out2 = __msa_min_u_h(out2, const255);
-    out3 = __msa_min_u_h(out3, const255);
-    out4 = __msa_min_u_h(out4, const255);
-    out5 = __msa_min_u_h(out5, const255);
-    out6 = __msa_min_u_h(out6, const255);
-    out7 = __msa_min_u_h(out7, const255);
-
-    PCKEV_B_STORE_VEC(out1, out0, dst);
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+                out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+                out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+    MIN_UH4_UH(out0, out1, out2, out3, const255);
+    MIN_UH4_UH(out4, out5, out6, out7, const255);
+    PCKEV_ST_SB(out0, out1, dst);
     dst += dst_stride;
-    PCKEV_B_STORE_VEC(out3, out2, dst);
+    PCKEV_ST_SB(out2, out3, dst);
     dst += dst_stride;
-    PCKEV_B_STORE_VEC(out5, out4, dst);
+    PCKEV_ST_SB(out4, out5, dst);
     dst += dst_stride;
-    PCKEV_B_STORE_VEC(out7, out6, dst);
+    PCKEV_ST_SB(out6, out7, dst);
     dst += dst_stride;
   }
 }
@@ -807,72 +558,46 @@ static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
-  v8u16 filt, const255;
+  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
 
-  const255 = (v8u16)__msa_ldi_h(255);
+  const255 = (v8u16) __msa_ldi_h(255);
 
   for (loop_cnt = height >> 1; loop_cnt--;) {
-    src0 = LOAD_SB(src);
-    src2 = LOAD_SB(src + 16);
-    src3 = LOAD_SB(src + 24);
-    src1 = __msa_sld_b(src2, src0, 8);
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src3 = LD_SB(src + 24);
+    src1 = __msa_sldi_b(src2, src0, 8);
     src += src_stride;
-    src4 = LOAD_SB(src);
-    src6 = LOAD_SB(src + 16);
-    src7 = LOAD_SB(src + 24);
-    src5 = __msa_sld_b(src6, src4, 8);
+    src4 = LD_SB(src);
+    src6 = LD_SB(src + 16);
+    src7 = LD_SB(src + 24);
+    src5 = __msa_sldi_b(src6, src4, 8);
     src += src_stride;
 
-    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
-    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
-    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
-    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
-    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
-    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
-    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
-    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
-    out0 = __msa_dotp_u_h(vec0, filt0);
-    out1 = __msa_dotp_u_h(vec1, filt0);
-    out2 = __msa_dotp_u_h(vec2, filt0);
-    out3 = __msa_dotp_u_h(vec3, filt0);
-    out4 = __msa_dotp_u_h(vec4, filt0);
-    out5 = __msa_dotp_u_h(vec5, filt0);
-    out6 = __msa_dotp_u_h(vec6, filt0);
-    out7 = __msa_dotp_u_h(vec7, filt0);
-
-    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
-    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
-    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
-    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
-    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
-    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
-    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
-    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
-    out0 = __msa_min_u_h(out0, const255);
-    out1 = __msa_min_u_h(out1, const255);
-    out2 = __msa_min_u_h(out2, const255);
-    out3 = __msa_min_u_h(out3, const255);
-    out4 = __msa_min_u_h(out4, const255);
-    out5 = __msa_min_u_h(out5, const255);
-    out6 = __msa_min_u_h(out6, const255);
-    out7 = __msa_min_u_h(out7, const255);
-
-    PCKEV_B_STORE_VEC(out1, out0, dst);
-    PCKEV_B_STORE_VEC(out3, out2, dst + 16);
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+                out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+                out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+    MIN_UH4_UH(out0, out1, out2, out3, const255);
+    MIN_UH4_UH(out4, out5, out6, out7, const255);
+    PCKEV_ST_SB(out0, out1, dst);
+    PCKEV_ST_SB(out2, out3, dst + 16);
     dst += dst_stride;
-    PCKEV_B_STORE_VEC(out5, out4, dst);
-    PCKEV_B_STORE_VEC(out7, out6, dst + 16);
+    PCKEV_ST_SB(out4, out5, dst);
+    PCKEV_ST_SB(out6, out7, dst + 16);
     dst += dst_stride;
   }
 }
@@ -882,70 +607,42 @@ static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
-  v8u16 filt, const255;
+  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_UH(filter);
+  filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
 
-  const255 = (v8u16)__msa_ldi_h(255);
+  const255 = (v8u16) __msa_ldi_h(255);
 
   for (loop_cnt = height; loop_cnt--;) {
-    src0 = LOAD_SB(src);
-    src2 = LOAD_SB(src + 16);
-    src4 = LOAD_SB(src + 32);
-    src6 = LOAD_SB(src + 48);
-    src7 = LOAD_SB(src + 56);
-    src1 = __msa_sld_b(src2, src0, 8);
-    src3 = __msa_sld_b(src4, src2, 8);
-    src5 = __msa_sld_b(src6, src4, 8);
+    src0 = LD_SB(src);
+    src2 = LD_SB(src + 16);
+    src4 = LD_SB(src + 32);
+    src6 = LD_SB(src + 48);
+    src7 = LD_SB(src + 56);
+    SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
     src += src_stride;
 
-    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
-    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
-    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
-    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
-    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
-    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
-    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
-    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
-    out0 = __msa_dotp_u_h(vec0, filt0);
-    out1 = __msa_dotp_u_h(vec1, filt0);
-    out2 = __msa_dotp_u_h(vec2, filt0);
-    out3 = __msa_dotp_u_h(vec3, filt0);
-    out4 = __msa_dotp_u_h(vec4, filt0);
-    out5 = __msa_dotp_u_h(vec5, filt0);
-    out6 = __msa_dotp_u_h(vec6, filt0);
-    out7 = __msa_dotp_u_h(vec7, filt0);
-
-    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
-    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
-    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
-    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
-    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
-    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
-    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
-    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
-    out0 = __msa_min_u_h(out0, const255);
-    out1 = __msa_min_u_h(out1, const255);
-    out2 = __msa_min_u_h(out2, const255);
-    out3 = __msa_min_u_h(out3, const255);
-    out4 = __msa_min_u_h(out4, const255);
-    out5 = __msa_min_u_h(out5, const255);
-    out6 = __msa_min_u_h(out6, const255);
-    out7 = __msa_min_u_h(out7, const255);
-
-    PCKEV_B_STORE_VEC(out1, out0, dst);
-    PCKEV_B_STORE_VEC(out3, out2, dst + 16);
-    PCKEV_B_STORE_VEC(out5, out4, dst + 32);
-    PCKEV_B_STORE_VEC(out7, out6, dst + 48);
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+                out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+                out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+    MIN_UH4_UH(out0, out1, out2, out3, const255);
+    MIN_UH4_UH(out4, out5, out6, out7, const255);
+    PCKEV_ST_SB(out0, out1, dst);
+    PCKEV_ST_SB(out2, out3, dst + 16);
+    PCKEV_ST_SB(out4, out5, dst + 32);
+    PCKEV_ST_SB(out6, out7, dst + 48);
     dst += dst_stride;
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
index d0c374648c9..b1279d97ce5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
@@ -26,93 +26,68 @@ static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride,
                                      int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
-  v16u8 mask0, mask1, mask2, mask3;
-  v8i16 filt_horiz;
-  v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
-  v8i16 horiz_out5, horiz_out6, horiz_out7, horiz_out8, horiz_out9;
-  v8i16 tmp0, tmp1, out0, out1, out2, out3, out4;
-  v8i16 filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
-
-  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
+  v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+  v16u8 mask0, mask1, mask2, mask3, out;
+  v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8i16 hz_out7, hz_out8, hz_out9, tmp0, tmp1, out0, out1, out2, out3, out4;
+  v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
 
+  mask0 = LD_UB(&mc_filt_mask_arr[16]);
   src -= (3 + 3 * src_stride);
 
   /* rearranging filter */
-  filt_horiz = LOAD_SH(filter_horiz);
-  filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
-  filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
-  filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
-  filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+  filt = LD_SH(filter_horiz);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
-  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
   src += (7 * src_stride);
 
-  XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
-                  src0, src1, src2, src3, src4, src5, src6, 128);
-
-  horiz_out0 = HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3,
-                                     filt_horiz0, filt_horiz1, filt_horiz2,
-                                     filt_horiz3);
-  horiz_out2 = HORIZ_8TAP_FILT_2VECS(src2, src3, mask0, mask1, mask2, mask3,
-                                     filt_horiz0, filt_horiz1, filt_horiz2,
-                                     filt_horiz3);
-  horiz_out4 = HORIZ_8TAP_FILT_2VECS(src4, src5, mask0, mask1, mask2, mask3,
-                                     filt_horiz0, filt_horiz1, filt_horiz2,
-                                     filt_horiz3);
-  horiz_out5 = HORIZ_8TAP_FILT_2VECS(src5, src6, mask0, mask1, mask2, mask3,
-                                     filt_horiz0, filt_horiz1, filt_horiz2,
-                                     filt_horiz3);
-  horiz_out1 = (v8i16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
-  horiz_out3 = (v8i16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
-
-  filt = LOAD_SH(filter_vert);
-  filt_vert0 = __msa_splati_h(filt, 0);
-  filt_vert1 = __msa_splati_h(filt, 1);
-  filt_vert2 = __msa_splati_h(filt, 2);
-  filt_vert3 = __msa_splati_h(filt, 3);
-
-  out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-  out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
-    src += (4 * src_stride);
-
-    XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+  hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
 
-    horiz_out7 = HORIZ_8TAP_FILT_2VECS(src7, src8, mask0, mask1, mask2, mask3,
-                                       filt_horiz0, filt_horiz1, filt_horiz2,
-                                       filt_horiz3);
-    horiz_out6 = (v8i16)__msa_sldi_b((v16i8)horiz_out7, (v16i8)horiz_out5, 8);
+  filt = LD_SH(filter_vert);
+  SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
 
-    out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
+  ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+  out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4);
 
-    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-
-    horiz_out9 = HORIZ_8TAP_FILT_2VECS(src9, src10, mask0, mask1, mask2, mask3,
-                                       filt_horiz0, filt_horiz1, filt_horiz2,
-                                       filt_horiz3);
-    horiz_out8 = (v8i16)__msa_sldi_b((v16i8)horiz_out9, (v16i8)horiz_out7, 8);
-
-    out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
-
-    tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-    tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    src += (4 * src_stride);
 
-    PCKEV_2B_XORI128_STORE_4_BYTES_4(tmp0, tmp1, dst, dst_stride);
+    hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8);
+    out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8);
+    out4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+    tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+    SRARI_H2_SH(tmp0, tmp1, FILTER_BITS);
+    SAT_SH2_SH(tmp0, tmp1, 7);
+    out = PCKEV_XORI128_UB(tmp0, tmp1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
     dst += (4 * dst_stride);
 
-    horiz_out5 = horiz_out9;
-
+    hz_out5 = hz_out9;
     out0 = out2;
     out1 = out3;
     out2 = out4;
@@ -125,108 +100,87 @@ static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride,
                                      int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
-  v8i16 filt_horiz, filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
-  v16u8 mask0, mask1, mask2, mask3;
-  v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
-  v8i16 horiz_out4, horiz_out5, horiz_out6, horiz_out7;
-  v8i16 horiz_out8, horiz_out9, horiz_out10;
+  v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+  v16u8 mask0, mask1, mask2, mask3, vec0, vec1;
+  v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+  v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
   v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
-  v8i16 tmp0, tmp1, tmp2, tmp3;
-
-  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
 
+  mask0 = LD_UB(&mc_filt_mask_arr[0]);
   src -= (3 + 3 * src_stride);
 
   /* rearranging filter */
-  filt_horiz = LOAD_SH(filter_horiz);
-  filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
-  filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
-  filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
-  filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+  filt = LD_SH(filter_horiz);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
 
   mask1 = mask0 + 2;
   mask2 = mask0 + 4;
   mask3 = mask0 + 6;
 
-  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
   src += (7 * src_stride);
 
-  XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
-                  src0, src1, src2, src3, src4, src5, src6, 128);
-
-  horiz_out0 = HORIZ_8TAP_FILT(src0, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out1 = HORIZ_8TAP_FILT(src1, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out2 = HORIZ_8TAP_FILT(src2, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out3 = HORIZ_8TAP_FILT(src3, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out4 = HORIZ_8TAP_FILT(src4, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out5 = HORIZ_8TAP_FILT(src5, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-  horiz_out6 = HORIZ_8TAP_FILT(src6, mask0, mask1, mask2, mask3, filt_horiz0,
-                               filt_horiz1, filt_horiz2, filt_horiz3);
-
-  filt = LOAD_SH(filter_vert);
-  filt_vert0 = __msa_splati_h(filt, 0);
-  filt_vert1 = __msa_splati_h(filt, 1);
-  filt_vert2 = __msa_splati_h(filt, 2);
-  filt_vert3 = __msa_splati_h(filt, 3);
-
-  out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-  out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
-  out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out1);
-  out5 = (v8i16)__msa_ilvev_b((v16i8)horiz_out4, (v16i8)horiz_out3);
-  out6 = (v8i16)__msa_ilvev_b((v16i8)horiz_out6, (v16i8)horiz_out5);
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+  hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+  hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                            filt_hz1, filt_hz2, filt_hz3);
+
+  filt = LD_SH(filter_vert);
+  SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+  ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+  ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+  ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
     src += (4 * src_stride);
 
-    XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
-
-    horiz_out7 = HORIZ_8TAP_FILT(src7, mask0, mask1, mask2, mask3, filt_horiz0,
-                                 filt_horiz1, filt_horiz2, filt_horiz3);
-
-    out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
-    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-    tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
-
-    horiz_out8 = HORIZ_8TAP_FILT(src8, mask0, mask1, mask2, mask3, filt_horiz0,
-                                 filt_horiz1, filt_horiz2, filt_horiz3);
-
-    out7 = (v8i16)__msa_ilvev_b((v16i8)horiz_out8, (v16i8)horiz_out7);
-    tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-    tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
-
-    horiz_out9 = HORIZ_8TAP_FILT(src9, mask0, mask1, mask2, mask3, filt_horiz0,
-                                 filt_horiz1, filt_horiz2, filt_horiz3);
-
-    out8 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
-    tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-    tmp2 = SRARI_SATURATE_SIGNED_H(tmp2, FILTER_BITS, 7);
-
-    horiz_out10 = HORIZ_8TAP_FILT(src10, mask0, mask1, mask2, mask3,
-                                  filt_horiz0, filt_horiz1, filt_horiz2,
-                                  filt_horiz3);
-
-    out9 = (v8i16)__msa_ilvev_b((v16i8)horiz_out10, (v16i8)horiz_out9);
-    tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vert0, filt_vert1,
-                               filt_vert2, filt_vert3);
-    tmp3 = SRARI_SATURATE_SIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_4_XORI128_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+
+    hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7);
+    tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+                              filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+    tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+
+    hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+                               filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+    out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9);
+    tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+                               filt_vt2, filt_vt3);
+    SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+    vec0 = PCKEV_XORI128_UB(tmp0, tmp1);
+    vec1 = PCKEV_XORI128_UB(tmp2, tmp3);
+    ST8x4_UB(vec0, vec1, dst, dst_stride);
     dst += (4 * dst_stride);
 
-    horiz_out6 = horiz_out10;
-
+    hz_out6 = hz_out10;
     out0 = out2;
     out1 = out3;
     out2 = out8;
@@ -279,175 +233,89 @@ static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride,
                                       uint8_t *dst, int32_t dst_stride,
                                       int8_t *filter_horiz,
                                       int8_t *filter_vert) {
-  uint32_t out0, out1, out2, out3;
   v16i8 src0, src1, src2, src3, src4, mask;
-  v16u8 res0, res1, horiz_vec;
-  v16u8 filt_vert, filt_horiz, vec0, vec1;
-  v8u16 filt, tmp0, tmp1;
-  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
+  v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
+  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+  mask = LD_SB(&mc_filt_mask_arr[16]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter_horiz);
-  filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  filt = LOAD_UH(filter_vert);
-  filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
-  horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
-  horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
-
-  horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
-  horiz_out3 = (v8u16)__msa_pckod_d((v2i64)horiz_out4, (v2i64)horiz_out2);
-
-  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-
-  tmp0 = __msa_dotp_u_h(vec0, filt_vert);
-  tmp1 = __msa_dotp_u_h(vec1, filt_vert);
-  tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-  tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-  res0 = (v16u8)__msa_pckev_b((v16i8)tmp0, (v16i8)tmp0);
-  res1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp1);
-
-  out0 = __msa_copy_u_w((v4i32)res0, 0);
-  out1 = __msa_copy_u_w((v4i32)res0, 1);
-  out2 = __msa_copy_u_w((v4i32)res1, 0);
-  out3 = __msa_copy_u_w((v4i32)res1, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
+  filt = LD_UH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  filt = LD_UH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+  hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+  hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
+  hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
+
+  ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+  DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+  SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+  SAT_UH2_UH(tmp0, tmp1, 7);
+  PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
 }
 
 static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
                                       uint8_t *dst, int32_t dst_stride,
                                       int8_t *filter_horiz,
                                       int8_t *filter_vert) {
-  uint32_t out0, out1, out2, out3;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
-  v16u8 filt_horiz, filt_vert, horiz_vec;
-  v16u8 vec0, vec1, vec2, vec3;
-  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
-  v8u16 vec4, vec5, vec6, vec7, filt;
-  v8u16 horiz_out4, horiz_out5, horiz_out6, horiz_out7, horiz_out8;
   v16i8 res0, res1, res2, res3;
+  v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+  v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+  mask = LD_SB(&mc_filt_mask_arr[16]);
 
   /* rearranging filter */
-  filt = LOAD_UH(filter_horiz);
-  filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_UH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
 
-  filt = LOAD_UH(filter_vert);
-  filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_UH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
 
-  LOAD_8VECS_SB(src, src_stride,
-                src0, src1, src2, src3, src4, src5, src6, src7);
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
   src += (8 * src_stride);
-  src8 = LOAD_SB(src);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
-  horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src4);
-  horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src6);
-  horiz_out6 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out6 = SRARI_SATURATE_UNSIGNED_H(horiz_out6, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src8, src8);
-  horiz_out8 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out8 = SRARI_SATURATE_UNSIGNED_H(horiz_out8, FILTER_BITS, 7);
-
-  horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
-  horiz_out3 = (v8u16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
-  horiz_out5 = (v8u16)__msa_sldi_b((v16i8)horiz_out6, (v16i8)horiz_out4, 8);
-  horiz_out7 = (v8u16)__msa_pckod_d((v2i64)horiz_out8, (v2i64)horiz_out6);
-
-  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-  vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
-  vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
-
-  vec4 = __msa_dotp_u_h(vec0, filt_vert);
-  vec5 = __msa_dotp_u_h(vec1, filt_vert);
-  vec6 = __msa_dotp_u_h(vec2, filt_vert);
-  vec7 = __msa_dotp_u_h(vec3, filt_vert);
-
-  vec4 = SRARI_SATURATE_UNSIGNED_H(vec4, FILTER_BITS, 7);
-  vec5 = SRARI_SATURATE_UNSIGNED_H(vec5, FILTER_BITS, 7);
-  vec6 = SRARI_SATURATE_UNSIGNED_H(vec6, FILTER_BITS, 7);
-  vec7 = SRARI_SATURATE_UNSIGNED_H(vec7, FILTER_BITS, 7);
-
-  res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
-  res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
-  res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
-  res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
-
-  out0 = __msa_copy_u_w((v4i32)res0, 0);
-  out1 = __msa_copy_u_w((v4i32)res0, 1);
-  out2 = __msa_copy_u_w((v4i32)res1, 0);
-  out3 = __msa_copy_u_w((v4i32)res1, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
-  dst += dst_stride;
-
-  out0 = __msa_copy_u_w((v4i32)res2, 0);
-  out1 = __msa_copy_u_w((v4i32)res2, 1);
-  out2 = __msa_copy_u_w((v4i32)res3, 0);
-  out3 = __msa_copy_u_w((v4i32)res3, 1);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
+  src8 = LD_SB(src);
+
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+  hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS);
+  hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS);
+  hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS);
+  SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+             hz_out3, hz_out5, 8);
+  hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
+
+  ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+  ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+              vec4, vec5, vec6, vec7);
+  SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+  SAT_UH4_UH(vec4, vec5, vec6, vec7, 7);
+  PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
+              res2, res3);
+  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+  dst += (4 * dst_stride);
+  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
 }
 
 static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride,
                                      uint8_t *dst, int32_t dst_stride,
-                                     int8_t *filter_horiz,
-                                     int8_t *filter_vert,
+                                     int8_t *filter_horiz, int8_t *filter_vert,
                                      int32_t height) {
   if (4 == height) {
-    common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
-                              filter_horiz, filter_vert);
+    common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                              filter_vert);
   } else if (8 == height) {
-    common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
-                              filter_horiz, filter_vert);
+    common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                              filter_vert);
   }
 }
 
@@ -455,63 +323,43 @@ static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride,
                                       uint8_t *dst, int32_t dst_stride,
                                       int8_t *filter_horiz,
                                       int8_t *filter_vert) {
-  v16i8 src0, src1, src2, src3, src4, mask;
-  v16u8 filt_horiz, filt_vert, horiz_vec;
-  v16u8 vec0, vec1, vec2, vec3;
-  v8u16 horiz_out0, horiz_out1;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+  v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+  v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
   v8i16 filt;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_SH(filter_horiz);
-  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
-
-  filt = LOAD_SH(filter_vert);
-  filt_vert = (v16u8)__msa_splati_h(filt, 0);
-
-  LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
-  src += (5 * src_stride);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
-  horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
 
-  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  tmp0 = __msa_dotp_u_h(vec0, filt_vert);
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
 
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
 
-  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-  tmp1 = __msa_dotp_u_h(vec1, filt_vert);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+  hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+  vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+  tmp0 = __msa_dotp_u_h(vec0, filt_vt);
 
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
-  horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+  vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+  tmp1 = __msa_dotp_u_h(vec1, filt_vt);
 
-  vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-  tmp2 = __msa_dotp_u_h(vec2, filt_vert);
+  hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+  vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+  tmp2 = __msa_dotp_u_h(vec2, filt_vt);
 
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+  vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+  tmp3 = __msa_dotp_u_h(vec3, filt_vt);
 
-  vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-  tmp3 = __msa_dotp_u_h(vec3, filt_vert);
-
-  tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-  tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-  tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-  tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-  PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+  ST8x4_UB(out0, out1, dst, dst_stride);
 }
 
 static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
@@ -522,106 +370,76 @@ static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
                                           int8_t *filter_vert,
                                           int32_t height) {
   uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, mask;
-  v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
-  v8u16 horiz_out0, horiz_out1;
-  v8u16 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+  v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+  v16u8 filt_hz, filt_vt, vec0;
+  v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
   v8i16 filt;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_SH(filter_horiz);
-  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
 
-  filt = LOAD_SH(filter_vert);
-  filt_vert = (v16u8)__msa_splati_h(filt, 0);
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
 
-  src0 = LOAD_SB(src);
+  src0 = LD_SB(src);
   src += src_stride;
 
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
-  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
 
   for (loop_cnt = (height >> 3); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+    LD_SB4(src, src_stride, src1, src2, src3, src4);
     src += (4 * src_stride);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
-    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vt);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
-    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vt);
 
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp2 = (v8u16)__msa_dotp_u_h(vec0, filt_vert);
+    SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+    SAT_UH2_UH(tmp1, tmp2, 7);
 
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp3 = __msa_dotp_u_h(vec0, filt_vt);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
-    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp3 = __msa_dotp_u_h(vec0, filt_vert);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
-    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
-    LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+    LD_SB4(src, src_stride, src1, src2, src3, src4);
     src += (4 * src_stride);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp4 = __msa_dotp_u_h(vec0, filt_vt);
 
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp4 = __msa_dotp_u_h(vec0, filt_vert);
-
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-    tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_8_BYTES_4(tmp1, tmp2, tmp3, tmp4, dst, dst_stride);
+    SRARI_H2_UH(tmp3, tmp4, FILTER_BITS);
+    SAT_UH2_UH(tmp3, tmp4, 7);
+    PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
-    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp5 = __msa_dotp_u_h(vec0, filt_vert);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
-    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp6 = __msa_dotp_u_h(vec0, filt_vert);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
-    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp5 = __msa_dotp_u_h(vec0, filt_vt);
 
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp7 = __msa_dotp_u_h(vec0, filt_vert);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp6 = __msa_dotp_u_h(vec0, filt_vt);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
-    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+    tmp7 = __msa_dotp_u_h(vec0, filt_vt);
 
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp8 = __msa_dotp_u_h(vec0, filt_vert);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+    tmp8 = __msa_dotp_u_h(vec0, filt_vt);
 
-    tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
-    tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
-    tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
-    tmp8 = SRARI_SATURATE_UNSIGNED_H(tmp8, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_8_BYTES_4(tmp5, tmp6, tmp7, tmp8, dst, dst_stride);
+    SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, FILTER_BITS);
+    SAT_UH4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+    PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
   }
 }
@@ -645,108 +463,64 @@ static void common_hv_2ht_2vt_16w_msa(const uint8_t *src, int32_t src_stride,
                                       int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
-  v8u16 horiz_vec0, horiz_vec1, tmp1, tmp2;
-  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
+  v16u8 filt_hz, filt_vt, vec0, vec1;
+  v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
   v8i16 filt;
 
-  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+  mask = LD_SB(&mc_filt_mask_arr[0]);
 
   /* rearranging filter */
-  filt = LOAD_SH(filter_horiz);
-  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
-
-  filt = LOAD_SH(filter_vert);
-  filt_vert = (v16u8)__msa_splati_h(filt, 0);
-
-  src0 = LOAD_SB(src);
-  src1 = LOAD_SB(src + 8);
+  filt = LD_SH(filter_horiz);
+  filt_hz = (v16u8)__msa_splati_h(filt, 0);
 
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
-  horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
-  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
-  horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+  filt = LD_SH(filter_vert);
+  filt_vt = (v16u8)__msa_splati_h(filt, 0);
 
+  LD_SB2(src, 8, src0, src1);
   src += src_stride;
 
+  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+  hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src0, src2, src4, src6);
-    LOAD_4VECS_SB(src + 8, src_stride, src1, src3, src5, src7);
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
     src += (4 * src_stride);
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
-    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
-    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+    hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+    SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+    SAT_UH2_UH(tmp1, tmp2, 7);
+    PCKEV_ST_SB(tmp1, tmp2, dst);
     dst += dst_stride;
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
-    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
-    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+    SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+    SAT_UH2_UH(tmp1, tmp2, 7);
+    PCKEV_ST_SB(tmp1, tmp2, dst);
     dst += dst_stride;
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
-    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src5);
-    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+    hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+    SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+    SAT_UH2_UH(tmp1, tmp2, 7);
+    PCKEV_ST_SB(tmp1, tmp2, dst);
     dst += dst_stride;
 
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src6, src6);
-    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
-    horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src7);
-    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
-    horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
+    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+    SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+    SAT_UH2_UH(tmp1, tmp2, 7);
+    PCKEV_ST_SB(tmp1, tmp2, dst);
     dst += dst_stride;
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
index 6b71ec1c0e4..e9ec2507a0a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
@@ -16,58 +16,48 @@ static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
                                 int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
-  v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-  v16i8 src2110, src4332, src6554, src8776, src10998;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+  v16i8 src10998, filt0, filt1, filt2, filt3;
+  v16u8 out;
   v8i16 filt, out10, out32;
-  v16i8 filt0, filt1, filt2, filt3;
 
   src -= (3 * src_stride);
 
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
-  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
   src += (7 * src_stride);
 
-  ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
-                  src1, src3, src5, src2, src4, src6,
-                  src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
-
-  ILVR_D_3VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
-                  src6554, src65_r, src54_r);
-
-  XORI_B_3VECS_SB(src2110, src4332, src6554, src2110, src4332, src6554, 128);
+  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+             src54_r, src21_r);
+  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+  ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+             src4332, src6554);
+  XORI_B3_128_SB(src2110, src4332, src6554);
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
     src += (4 * src_stride);
 
-    ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
-                    src76_r, src87_r, src98_r, src109_r);
-
-    ILVR_D_2VECS_SB(src8776, src87_r, src76_r, src10998, src109_r, src98_r);
-
-    XORI_B_2VECS_SB(src8776, src10998, src8776, src10998, 128);
-
-    out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776,
-                                filt0, filt1, filt2, filt3);
-    out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998,
-                                filt0, filt1, filt2, filt3);
-
-    out10 = SRARI_SATURATE_SIGNED_H(out10, FILTER_BITS, 7);
-    out32 = SRARI_SATURATE_SIGNED_H(out32, FILTER_BITS, 7);
-
-    PCKEV_2B_XORI128_STORE_4_BYTES_4(out10, out32, dst, dst_stride);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+    XORI_B2_128_SB(src8776, src10998);
+    out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+                                filt1, filt2, filt3);
+    out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+                                filt1, filt2, filt3);
+    SRARI_H2_SH(out10, out32, FILTER_BITS);
+    SAT_SH2_SH(out10, out32, 7);
+    out = PCKEV_XORI128_UB(out10, out32);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
     dst += (4 * dst_stride);
 
     src2110 = src6554;
     src4332 = src8776;
     src6554 = src10998;
-
     src6 = src10;
   }
 }
@@ -77,54 +67,115 @@ static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
                                 int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
-  v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+  v16u8 tmp0, tmp1;
   v8i16 filt, out0_r, out1_r, out2_r, out3_r;
 
   src -= (3 * src_stride);
 
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
-  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
   src += (7 * src_stride);
-
-  XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
-                  src0, src1, src2, src3, src4, src5, src6, 128);
-
-  ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
-                  src1, src3, src5, src2, src4, src6,
-                  src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
+  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+             src54_r, src21_r);
+  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    XORI_B4_128_SB(src7, src8, src9, src10);
     src += (4 * src_stride);
 
-    XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                 filt1, filt2, filt3);
+    out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                 filt1, filt2, filt3);
+    out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                 filt1, filt2, filt3);
+    out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                 filt1, filt2, filt3);
+    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
+    tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+    dst += (4 * dst_stride);
 
-    ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
-                    src76_r, src87_r, src98_r, src109_r);
+    src10_r = src54_r;
+    src32_r = src76_r;
+    src54_r = src98_r;
+    src21_r = src65_r;
+    src43_r = src87_r;
+    src65_r = src109_r;
+    src6 = src10;
+  }
+}
 
-    out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
-                                 filt0, filt1, filt2, filt3);
-    out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
-                                 filt0, filt1, filt2, filt3);
-    out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
-                                 filt0, filt1, filt2, filt3);
-    out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
-                                 filt0, filt1, filt2, filt3);
+static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+  v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+  v16u8 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+  src -= (3 * src_stride);
 
-    out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
-    out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
-    out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
-    out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+             src54_r, src21_r);
+  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+  ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+             src54_l, src21_l);
+  ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LD_SB4(src, src_stride, src7, src8, src9, src10);
+    XORI_B4_128_SB(src7, src8, src9, src10);
+    src += (4 * src_stride);
 
-    PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0_r, out1_r, out2_r, out3_r,
-                                      dst, dst_stride);
+    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+               src87_r, src98_r, src109_r);
+    ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+               src87_l, src98_l, src109_l);
+    out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                 filt1, filt2, filt3);
+    out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                 filt1, filt2, filt3);
+    out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                 filt1, filt2, filt3);
+    out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                 filt1, filt2, filt3);
+    out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+                                 filt1, filt2, filt3);
+    out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+                                 filt1, filt2, filt3);
+    out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+                                 filt1, filt2, filt3);
+    out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+                                 filt1, filt2, filt3);
+    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+    SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+    SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+    PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r,
+                tmp0, tmp1, tmp2, tmp3);
+    XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+    ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
     dst += (4 * dst_stride);
 
     src10_r = src54_r;
@@ -133,7 +184,12 @@ static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
     src21_r = src65_r;
     src43_r = src87_r;
     src65_r = src109_r;
-
+    src10_l = src54_l;
+    src32_l = src76_l;
+    src54_l = src98_l;
+    src21_l = src65_l;
+    src43_l = src87_l;
+    src65_l = src109_l;
     src6 = src10;
   }
 }
@@ -147,89 +203,63 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
   uint32_t loop_cnt, cnt;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
   v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
-  v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
-  v16i8 src10_l, src32_l, src54_l, src76_l, src98_l;
-  v16i8 src21_l, src43_l, src65_l, src87_l, src109_l;
-  v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-  v8i16 filt;
+  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+  v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+  v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
   v16u8 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
 
   src -= (3 * src_stride);
 
-  filt = LOAD_SH(filter);
-  filt0 = (v16i8)__msa_splati_h(filt, 0);
-  filt1 = (v16i8)__msa_splati_h(filt, 1);
-  filt2 = (v16i8)__msa_splati_h(filt, 2);
-  filt3 = (v16i8)__msa_splati_h(filt, 3);
+  filt = LD_SH(filter);
+  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
 
   for (cnt = (width >> 4); cnt--;) {
     src_tmp = src;
     dst_tmp = dst;
 
-    LOAD_7VECS_SB(src_tmp, src_stride,
-                  src0, src1, src2, src3, src4, src5, src6);
+    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
     src_tmp += (7 * src_stride);
-
-    XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
-                    src0, src1, src2, src3, src4, src5, src6, 128);
-
-    ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
-                    src1, src3, src5, src2, src4, src6,
-                    src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
-
-    ILVL_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
-                    src1, src3, src5, src2, src4, src6,
-                    src10_l, src32_l, src54_l, src21_l, src43_l, src65_l);
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r,
+               src32_r, src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l,
+               src32_l, src54_l, src21_l);
+    ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
 
     for (loop_cnt = (height >> 2); loop_cnt--;) {
-      LOAD_4VECS_SB(src_tmp, src_stride, src7, src8, src9, src10);
+      LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+      XORI_B4_128_SB(src7, src8, src9, src10);
       src_tmp += (4 * src_stride);
-
-      XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
-
-      ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
-                      src76_r, src87_r, src98_r, src109_r);
-
-      ILVL_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
-                      src76_l, src87_l, src98_l, src109_l);
-
-      out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
-                                   filt0, filt1, filt2, filt3);
-      out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
-                                   filt0, filt1, filt2, filt3);
-      out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
-                                   filt0, filt1, filt2, filt3);
-      out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
-                                   filt0, filt1, filt2, filt3);
-
-      out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
-                                   filt0, filt1, filt2, filt3);
-      out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
-                                   filt0, filt1, filt2, filt3);
-      out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
-                                   filt0, filt1, filt2, filt3);
-      out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
-                                   filt0, filt1, filt2, filt3);
-
-      out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
-      out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
-      out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
-      out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
-      out0_l = SRARI_SATURATE_SIGNED_H(out0_l, FILTER_BITS, 7);
-      out1_l = SRARI_SATURATE_SIGNED_H(out1_l, FILTER_BITS, 7);
-      out2_l = SRARI_SATURATE_SIGNED_H(out2_l, FILTER_BITS, 7);
-      out3_l = SRARI_SATURATE_SIGNED_H(out3_l, FILTER_BITS, 7);
-
-      out0_r = (v8i16)__msa_pckev_b((v16i8)out0_l, (v16i8)out0_r);
-      out1_r = (v8i16)__msa_pckev_b((v16i8)out1_l, (v16i8)out1_r);
-      out2_r = (v8i16)__msa_pckev_b((v16i8)out2_l, (v16i8)out2_r);
-      out3_r = (v8i16)__msa_pckev_b((v16i8)out3_l, (v16i8)out3_r);
-
-      XORI_B_4VECS_UB(out0_r, out1_r, out2_r, out3_r,
-                      tmp0, tmp1, tmp2, tmp3, 128);
-
-      STORE_4VECS_UB(dst_tmp, dst_stride, tmp0, tmp1, tmp2, tmp3);
+      ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                 src87_r, src98_r, src109_r);
+      ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                 src87_l, src98_l, src109_l);
+      out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                   filt1, filt2, filt3);
+      out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                   filt1, filt2, filt3);
+      out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                   filt1, filt2, filt3);
+      out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                   filt1, filt2, filt3);
+      out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+                                   filt1, filt2, filt3);
+      out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+                                   filt1, filt2, filt3);
+      out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+                                   filt1, filt2, filt3);
+      out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+                                   filt1, filt2, filt3);
+      SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+      SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+      SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+      SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+      PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                  out3_r, tmp0, tmp1, tmp2, tmp3);
+      XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+      ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride);
       dst_tmp += (4 * dst_stride);
 
       src10_r = src54_r;
@@ -238,14 +268,12 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
       src21_r = src65_r;
       src43_r = src87_r;
       src65_r = src109_r;
-
       src10_l = src54_l;
       src32_l = src76_l;
       src54_l = src98_l;
       src21_l = src65_l;
       src43_l = src87_l;
       src65_l = src109_l;
-
       src6 = src10;
     }
 
@@ -254,134 +282,77 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
   }
 }
 
-static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
-                            filter, height, 16);
-}
-
 static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
-  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
-                            filter, height, 32);
+  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+                            32);
 }
 
 static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
-  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
-                            filter, height, 64);
+  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+                            64);
 }
 
 static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  uint32_t out0, out1, out2, out3;
   v16i8 src0, src1, src2, src3, src4;
   v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
-  v16i8 filt0;
-  v8u16 filt;
+  v16u8 filt0;
+  v8i16 filt;
+  v8u16 tmp0, tmp1;
 
-  filt = LOAD_UH(filter);
-  filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
+  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
   src += (5 * src_stride);
 
-  ILVR_B_4VECS_SB(src0, src1, src2, src3, src1, src2, src3, src4,
-                  src10_r, src21_r, src32_r, src43_r);
-
-  ILVR_D_2VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r);
-
-  src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
-  src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
-
-  src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
-  src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
-
-  src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
-
-  out0 = __msa_copy_u_w((v4i32)src2110, 0);
-  out1 = __msa_copy_u_w((v4i32)src2110, 1);
-  out2 = __msa_copy_u_w((v4i32)src2110, 2);
-  out3 = __msa_copy_u_w((v4i32)src2110, 3);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
+  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+             src32_r, src43_r);
+  ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+  DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+  SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+  SAT_UH2_UH(tmp0, tmp1, 7);
+  src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+  ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
 }
 
 static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  uint32_t out0, out1, out2, out3, out4, out5, out6, out7;
   v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
   v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
   v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
-  v16i8 filt0;
-  v8u16 filt;
+  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v16u8 filt0;
+  v8i16 filt;
 
-  filt = LOAD_UH(filter);
-  filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  LOAD_8VECS_SB(src, src_stride,
-                src0, src1, src2, src3, src4, src5, src6, src7);
+  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
   src += (8 * src_stride);
 
-  src8 = LOAD_SB(src);
+  src8 = LD_SB(src);
   src += src_stride;
 
-  ILVR_B_8VECS_SB(src0, src1, src2, src3, src4, src5, src6, src7,
-                  src1, src2, src3, src4, src5, src6, src7, src8,
-                  src10_r, src21_r, src32_r, src43_r,
-                  src54_r, src65_r, src76_r, src87_r);
-
-  ILVR_D_4VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
-                  src6554, src65_r, src54_r, src8776, src87_r, src76_r);
-
-  src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
-  src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
-  src6554 = (v16i8)__msa_dotp_u_h((v16u8)src6554, (v16u8)filt0);
-  src8776 = (v16i8)__msa_dotp_u_h((v16u8)src8776, (v16u8)filt0);
-
-  src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
-  src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
-  src6554 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src6554, FILTER_BITS, 7);
-  src8776 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src8776, FILTER_BITS, 7);
-
-  src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
-  src4332 = (v16i8)__msa_pckev_b((v16i8)src8776, (v16i8)src6554);
-
-  out0 = __msa_copy_u_w((v4i32)src2110, 0);
-  out1 = __msa_copy_u_w((v4i32)src2110, 1);
-  out2 = __msa_copy_u_w((v4i32)src2110, 2);
-  out3 = __msa_copy_u_w((v4i32)src2110, 3);
-  out4 = __msa_copy_u_w((v4i32)src4332, 0);
-  out5 = __msa_copy_u_w((v4i32)src4332, 1);
-  out6 = __msa_copy_u_w((v4i32)src4332, 2);
-  out7 = __msa_copy_u_w((v4i32)src4332, 3);
-
-  STORE_WORD(dst, out0);
-  dst += dst_stride;
-  STORE_WORD(dst, out1);
-  dst += dst_stride;
-  STORE_WORD(dst, out2);
-  dst += dst_stride;
-  STORE_WORD(dst, out3);
-  dst += dst_stride;
-  STORE_WORD(dst, out4);
-  dst += dst_stride;
-  STORE_WORD(dst, out5);
-  dst += dst_stride;
-  STORE_WORD(dst, out6);
-  dst += dst_stride;
-  STORE_WORD(dst, out7);
+  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+             src32_r, src43_r);
+  ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+             src76_r, src87_r);
+  ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+             src87_r, src76_r, src2110, src4332, src6554, src8776);
+  DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+              tmp0, tmp1, tmp2, tmp3);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+  ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+  ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
 }
 
 static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -397,32 +368,24 @@ static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
 static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter) {
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 vec0, vec1, vec2, vec3, filt0;
+  v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
+  v16i8 out0, out1;
   v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8u16 filt;
+  v8i16 filt;
 
   /* rearranging filter_y */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LOAD_5VECS_UB(src, src_stride, src0, src1, src2, src3, src4);
-
-  ILVR_B_2VECS_UB(src0, src1, src1, src2, vec0, vec1);
-  ILVR_B_2VECS_UB(src2, src3, src3, src4, vec2, vec3);
-
-  /* filter calc */
-  tmp0 = __msa_dotp_u_h(vec0, filt0);
-  tmp1 = __msa_dotp_u_h(vec1, filt0);
-  tmp2 = __msa_dotp_u_h(vec2, filt0);
-  tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-  tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-  tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-  tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-  tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-  PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+  LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+  ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+  ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+              tmp2, tmp3);
+  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+  SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+  PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+  ST8x4_UB(out0, out1, dst, dst_stride);
 }
 
 static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
@@ -431,51 +394,39 @@ static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
   uint32_t loop_cnt;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
   v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+  v16i8 out0, out1;
   v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8u16 filt;
+  v8i16 filt;
 
   /* rearranging filter_y */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  src0 = LOAD_UB(src);
+  src0 = LD_UB(src);
   src += src_stride;
 
   for (loop_cnt = (height >> 3); loop_cnt--;) {
-    LOAD_8VECS_UB(src, src_stride,
-                  src1, src2, src3, src4, src5, src6, src7, src8);
+    LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
     src += (8 * src_stride);
 
-    ILVR_B_4VECS_UB(src0, src1, src2, src3, src1, src2, src3, src4,
-                    vec0, vec1, vec2, vec3);
-
-    ILVR_B_4VECS_UB(src4, src5, src6, src7, src5, src6, src7, src8,
-                    vec4, vec5, vec6, vec7);
-
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1,
+               vec2, vec3);
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5,
+               vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+                tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
 
-    tmp0 = __msa_dotp_u_h(vec4, filt0);
-    tmp1 = __msa_dotp_u_h(vec5, filt0);
-    tmp2 = __msa_dotp_u_h(vec6, filt0);
-    tmp3 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
+                tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
     dst += (4 * dst_stride);
 
     src0 = src8;
@@ -499,57 +450,45 @@ static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride,
   v16u8 src0, src1, src2, src3, src4;
   v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
   v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8u16 filt;
+  v8i16 filt;
 
   /* rearranging filter_y */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  src0 = LOAD_UB(src);
+  src0 = LD_UB(src);
   src += src_stride;
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
+    LD_UB4(src, src_stride, src1, src2, src3, src4);
     src += (4 * src_stride);
 
-    ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst);
     dst += dst_stride;
 
-    ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
-
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
+    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst);
     dst += dst_stride;
 
-    tmp0 = __msa_dotp_u_h(vec4, filt0);
-    tmp1 = __msa_dotp_u_h(vec5, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst);
     dst += dst_stride;
 
-    tmp2 = __msa_dotp_u_h(vec6, filt0);
-    tmp3 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst);
     dst += dst_stride;
 
     src0 = src4;
@@ -563,93 +502,68 @@ static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride,
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
   v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
   v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8u16 filt;
+  v8i16 filt;
 
   /* rearranging filter_y */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  src0 = LOAD_UB(src);
-  src5 = LOAD_UB(src + 16);
+  src0 = LD_UB(src);
+  src5 = LD_UB(src + 16);
   src += src_stride;
 
   for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
+    LD_UB4(src, src_stride, src1, src2, src3, src4);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
 
-    ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
-    LOAD_4VECS_UB(src + 16, src_stride, src6, src7, src8, src9);
+    LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
     src += (4 * src_stride);
 
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
-
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
-
-    ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
-
-    tmp0 = __msa_dotp_u_h(vec4, filt0);
-    tmp1 = __msa_dotp_u_h(vec5, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 2 * dst_stride);
-
-    tmp2 = __msa_dotp_u_h(vec6, filt0);
-    tmp3 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 3 * dst_stride);
-
-    ILV_B_LRLR_UB(src5, src6, src6, src7, vec1, vec0, vec3, vec2);
-
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16);
-
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + dst_stride);
-
-    ILV_B_LRLR_UB(src7, src8, src8, src9, vec5, vec4, vec7, vec6);
-
-    tmp0 = __msa_dotp_u_h(vec4, filt0);
-    tmp1 = __msa_dotp_u_h(vec5, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16 + 2 * dst_stride);
-
-    tmp2 = __msa_dotp_u_h(vec6, filt0);
-    tmp3 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + 3 * dst_stride);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst);
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride);
+
+    ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+    ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst + 16);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride);
+
+    ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+    ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride);
     dst += (4 * dst_stride);
 
     src0 = src4;
@@ -661,97 +575,72 @@ static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
                                  int8_t *filter, int32_t height) {
   uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16u8 src8, src9, src10, src11;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
   v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  v8u16 filt;
+  v8i16 filt;
 
   /* rearranging filter_y */
-  filt = LOAD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+  filt = LD_SH(filter);
+  filt0 = (v16u8)__msa_splati_h(filt, 0);
 
-  LOAD_4VECS_UB(src, 16, src0, src3, src6, src9);
+  LD_UB4(src, 16, src0, src3, src6, src9);
   src += src_stride;
 
   for (loop_cnt = (height >> 1); loop_cnt--;) {
-    LOAD_2VECS_UB(src, src_stride, src1, src2);
-    LOAD_2VECS_UB(src + 16, src_stride, src4, src5);
-    LOAD_2VECS_UB(src + 32, src_stride, src7, src8);
-    LOAD_2VECS_UB(src + 48, src_stride, src10, src11);
+    LD_UB2(src, src_stride, src1, src2);
+    LD_UB2(src + 16, src_stride, src4, src5);
+    LD_UB2(src + 32, src_stride, src7, src8);
+    LD_UB2(src + 48, src_stride, src10, src11);
     src += (2 * src_stride);
 
-    ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
-
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
-
-    ILV_B_LRLR_UB(src3, src4, src4, src5, vec5, vec4, vec7, vec6);
-
-    tmp4 = __msa_dotp_u_h(vec4, filt0);
-    tmp5 = __msa_dotp_u_h(vec5, filt0);
-
-    tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
-    tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 16);
-
-    tmp6 = __msa_dotp_u_h(vec6, filt0);
-    tmp7 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
-    tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 16 + dst_stride);
-
-    ILV_B_LRLR_UB(src6, src7, src7, src8, vec1, vec0, vec3, vec2);
-
-    tmp0 = __msa_dotp_u_h(vec0, filt0);
-    tmp1 = __msa_dotp_u_h(vec1, filt0);
-
-    tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
-    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 32);
-
-    tmp2 = __msa_dotp_u_h(vec2, filt0);
-    tmp3 = __msa_dotp_u_h(vec3, filt0);
-
-    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 32 + dst_stride);
-
-    ILV_B_LRLR_UB(src9, src10, src10, src11, vec5, vec4, vec7, vec6);
-
-    tmp4 = __msa_dotp_u_h(vec4, filt0);
-    tmp5 = __msa_dotp_u_h(vec5, filt0);
-
-    tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
-    tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 48);
-
-    tmp6 = __msa_dotp_u_h(vec6, filt0);
-    tmp7 = __msa_dotp_u_h(vec7, filt0);
-
-    tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
-    tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
-
-    PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 48 + dst_stride);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+    ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+    ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+    SAT_UH2_UH(tmp4, tmp5, 7);
+    PCKEV_ST_SB(tmp4, tmp5, dst + 16);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+    SAT_UH2_UH(tmp6, tmp7, 7);
+    PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride);
+
+    ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+    ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_ST_SB(tmp0, tmp1, dst + 32);
+
+    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+    SAT_UH2_UH(tmp2, tmp3, 7);
+    PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride);
+
+    ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+    ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+    SAT_UH2_UH(tmp4, tmp5, 7);
+    PCKEV_ST_SB(tmp4, tmp5, dst + 48);
+
+    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+    SAT_UH2_UH(tmp6, tmp7, 7);
+    PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride);
     dst += (2 * dst_stride);
 
     src0 = src2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
index 72b8ab71397..eb8776078b2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
@@ -19,46 +19,35 @@ static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
 
   if (0 == (height % 4)) {
     for (cnt = (height / 4); cnt--;) {
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
       src += (4 * src_stride);
 
-      LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
+      LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
 
-      dst0 = __msa_aver_u_b(src0, dst0);
-      dst1 = __msa_aver_u_b(src1, dst1);
-      dst2 = __msa_aver_u_b(src2, dst2);
-      dst3 = __msa_aver_u_b(src3, dst3);
+      AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                  dst0, dst1, dst2, dst3);
 
       out0 = __msa_copy_u_w((v4i32)dst0, 0);
       out1 = __msa_copy_u_w((v4i32)dst1, 0);
       out2 = __msa_copy_u_w((v4i32)dst2, 0);
       out3 = __msa_copy_u_w((v4i32)dst3, 0);
-
-      STORE_WORD(dst, out0);
-      dst += dst_stride;
-      STORE_WORD(dst, out1);
-      dst += dst_stride;
-      STORE_WORD(dst, out2);
-      dst += dst_stride;
-      STORE_WORD(dst, out3);
-      dst += dst_stride;
+      SW4(out0, out1, out2, out3, dst, dst_stride);
+      dst += (4 * dst_stride);
     }
   } else if (0 == (height % 2)) {
     for (cnt = (height / 2); cnt--;) {
-      LOAD_2VECS_UB(src, src_stride, src0, src1);
+      LD_UB2(src, src_stride, src0, src1);
       src += (2 * src_stride);
 
-      LOAD_2VECS_UB(dst, dst_stride, dst0, dst1);
+      LD_UB2(dst, dst_stride, dst0, dst1);
 
-      dst0 = __msa_aver_u_b(src0, dst0);
-      dst1 = __msa_aver_u_b(src1, dst1);
+      AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
 
       out0 = __msa_copy_u_w((v4i32)dst0, 0);
       out1 = __msa_copy_u_w((v4i32)dst1, 0);
-
-      STORE_WORD(dst, out0);
+      SW(out0, dst);
       dst += dst_stride;
-      STORE_WORD(dst, out1);
+      SW(out1, dst);
       dst += dst_stride;
     }
   }
@@ -72,29 +61,19 @@ static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
   v16u8 dst0, dst1, dst2, dst3;
 
   for (cnt = (height / 4); cnt--;) {
-    LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+    LD_UB4(src, src_stride, src0, src1, src2, src3);
     src += (4 * src_stride);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
 
-    LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
-
-    dst0 = __msa_aver_u_b(src0, dst0);
-    dst1 = __msa_aver_u_b(src1, dst1);
-    dst2 = __msa_aver_u_b(src2, dst2);
-    dst3 = __msa_aver_u_b(src3, dst3);
+    AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                dst0, dst1, dst2, dst3);
 
     out0 = __msa_copy_u_d((v2i64)dst0, 0);
     out1 = __msa_copy_u_d((v2i64)dst1, 0);
     out2 = __msa_copy_u_d((v2i64)dst2, 0);
     out3 = __msa_copy_u_d((v2i64)dst3, 0);
-
-    STORE_DWORD(dst, out0);
-    dst += dst_stride;
-    STORE_DWORD(dst, out1);
-    dst += dst_stride;
-    STORE_DWORD(dst, out2);
-    dst += dst_stride;
-    STORE_DWORD(dst, out3);
-    dst += dst_stride;
+    SD4(out0, out1, out2, out3, dst, dst_stride);
+    dst += (4 * dst_stride);
   }
 }
 
@@ -105,24 +84,15 @@ static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
   v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
 
   for (cnt = (height / 8); cnt--;) {
-    LOAD_8VECS_UB(src, src_stride,
-                  src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
     src += (8 * src_stride);
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
 
-    LOAD_8VECS_UB(dst, dst_stride,
-                  dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
-
-    dst0 = __msa_aver_u_b(src0, dst0);
-    dst1 = __msa_aver_u_b(src1, dst1);
-    dst2 = __msa_aver_u_b(src2, dst2);
-    dst3 = __msa_aver_u_b(src3, dst3);
-    dst4 = __msa_aver_u_b(src4, dst4);
-    dst5 = __msa_aver_u_b(src5, dst5);
-    dst6 = __msa_aver_u_b(src6, dst6);
-    dst7 = __msa_aver_u_b(src7, dst7);
-
-    STORE_8VECS_UB(dst, dst_stride,
-                   dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                dst0, dst1, dst2, dst3);
+    AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                dst4, dst5, dst6, dst7);
+    ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
     dst += (8 * dst_stride);
   }
 }
@@ -137,99 +107,34 @@ static void avg_width32_msa(const uint8_t *src, int32_t src_stride,
   v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
 
   for (cnt = (height / 8); cnt--;) {
-    src0 = LOAD_UB(src);
-    src1 = LOAD_UB(src + 16);
-    src += src_stride;
-    src2 = LOAD_UB(src);
-    src3 = LOAD_UB(src + 16);
-    src += src_stride;
-    src4 = LOAD_UB(src);
-    src5 = LOAD_UB(src + 16);
-    src += src_stride;
-    src6 = LOAD_UB(src);
-    src7 = LOAD_UB(src + 16);
-    src += src_stride;
-
-    dst0 = LOAD_UB(dst_dup);
-    dst1 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst2 = LOAD_UB(dst_dup);
-    dst3 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst4 = LOAD_UB(dst_dup);
-    dst5 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst6 = LOAD_UB(dst_dup);
-    dst7 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-
-    src8 = LOAD_UB(src);
-    src9 = LOAD_UB(src + 16);
-    src += src_stride;
-    src10 = LOAD_UB(src);
-    src11 = LOAD_UB(src + 16);
-    src += src_stride;
-    src12 = LOAD_UB(src);
-    src13 = LOAD_UB(src + 16);
-    src += src_stride;
-    src14 = LOAD_UB(src);
-    src15 = LOAD_UB(src + 16);
-    src += src_stride;
-
-    dst8 = LOAD_UB(dst_dup);
-    dst9 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst10 = LOAD_UB(dst_dup);
-    dst11 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst12 = LOAD_UB(dst_dup);
-    dst13 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-    dst14 = LOAD_UB(dst_dup);
-    dst15 = LOAD_UB(dst_dup + 16);
-    dst_dup += dst_stride;
-
-    dst0 = __msa_aver_u_b(src0, dst0);
-    dst1 = __msa_aver_u_b(src1, dst1);
-    dst2 = __msa_aver_u_b(src2, dst2);
-    dst3 = __msa_aver_u_b(src3, dst3);
-    dst4 = __msa_aver_u_b(src4, dst4);
-    dst5 = __msa_aver_u_b(src5, dst5);
-    dst6 = __msa_aver_u_b(src6, dst6);
-    dst7 = __msa_aver_u_b(src7, dst7);
-    dst8 = __msa_aver_u_b(src8, dst8);
-    dst9 = __msa_aver_u_b(src9, dst9);
-    dst10 = __msa_aver_u_b(src10, dst10);
-    dst11 = __msa_aver_u_b(src11, dst11);
-    dst12 = __msa_aver_u_b(src12, dst12);
-    dst13 = __msa_aver_u_b(src13, dst13);
-    dst14 = __msa_aver_u_b(src14, dst14);
-    dst15 = __msa_aver_u_b(src15, dst15);
-
-    STORE_UB(dst0, dst);
-    STORE_UB(dst1, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst2, dst);
-    STORE_UB(dst3, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst4, dst);
-    STORE_UB(dst5, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst6, dst);
-    STORE_UB(dst7, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst8, dst);
-    STORE_UB(dst9, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst10, dst);
-    STORE_UB(dst11, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst12, dst);
-    STORE_UB(dst13, dst + 16);
-    dst += dst_stride;
-    STORE_UB(dst14, dst);
-    STORE_UB(dst15, dst + 16);
-    dst += dst_stride;
+    LD_UB4(src, src_stride, src0, src2, src4, src6);
+    LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+    LD_UB4(dst_dup, dst_stride, dst0, dst2, dst4, dst6);
+    LD_UB4(dst_dup + 16, dst_stride, dst1, dst3, dst5, dst7);
+    dst_dup += (4 * dst_stride);
+    LD_UB4(src, src_stride, src8, src10, src12, src14);
+    LD_UB4(src + 16, src_stride, src9, src11, src13, src15);
+    src += (4 * src_stride);
+    LD_UB4(dst_dup, dst_stride, dst8, dst10, dst12, dst14);
+    LD_UB4(dst_dup + 16, dst_stride, dst9, dst11, dst13, dst15);
+    dst_dup += (4 * dst_stride);
+
+    AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                dst0, dst1, dst2, dst3);
+    AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                dst4, dst5, dst6, dst7);
+    AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+                dst8, dst9, dst10, dst11);
+    AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+                dst12, dst13, dst14, dst15);
+
+    ST_UB4(dst0, dst2, dst4, dst6, dst, dst_stride);
+    ST_UB4(dst1, dst3, dst5, dst7, dst + 16, dst_stride);
+    dst += (4 * dst_stride);
+    ST_UB4(dst8, dst10, dst12, dst14, dst, dst_stride);
+    ST_UB4(dst9, dst11, dst13, dst15, dst + 16, dst_stride);
+    dst += (4 * dst_stride);
   }
 }
 
@@ -243,48 +148,40 @@ static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
   v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
 
   for (cnt = (height / 4); cnt--;) {
-    LOAD_4VECS_UB(src, 16, src0, src1, src2, src3);
+    LD_UB4(src, 16, src0, src1, src2, src3);
     src += src_stride;
-    LOAD_4VECS_UB(src, 16, src4, src5, src6, src7);
+    LD_UB4(src, 16, src4, src5, src6, src7);
     src += src_stride;
-    LOAD_4VECS_UB(src, 16, src8, src9, src10, src11);
+    LD_UB4(src, 16, src8, src9, src10, src11);
     src += src_stride;
-    LOAD_4VECS_UB(src, 16, src12, src13, src14, src15);
+    LD_UB4(src, 16, src12, src13, src14, src15);
     src += src_stride;
 
-    LOAD_4VECS_UB(dst_dup, 16, dst0, dst1, dst2, dst3);
+    LD_UB4(dst_dup, 16, dst0, dst1, dst2, dst3);
     dst_dup += dst_stride;
-    LOAD_4VECS_UB(dst_dup, 16, dst4, dst5, dst6, dst7);
+    LD_UB4(dst_dup, 16, dst4, dst5, dst6, dst7);
     dst_dup += dst_stride;
-    LOAD_4VECS_UB(dst_dup, 16, dst8, dst9, dst10, dst11);
+    LD_UB4(dst_dup, 16, dst8, dst9, dst10, dst11);
     dst_dup += dst_stride;
-    LOAD_4VECS_UB(dst_dup, 16, dst12, dst13, dst14, dst15);
+    LD_UB4(dst_dup, 16, dst12, dst13, dst14, dst15);
     dst_dup += dst_stride;
 
-    dst0 = __msa_aver_u_b(src0, dst0);
-    dst1 = __msa_aver_u_b(src1, dst1);
-    dst2 = __msa_aver_u_b(src2, dst2);
-    dst3 = __msa_aver_u_b(src3, dst3);
-    dst4 = __msa_aver_u_b(src4, dst4);
-    dst5 = __msa_aver_u_b(src5, dst5);
-    dst6 = __msa_aver_u_b(src6, dst6);
-    dst7 = __msa_aver_u_b(src7, dst7);
-    dst8 = __msa_aver_u_b(src8, dst8);
-    dst9 = __msa_aver_u_b(src9, dst9);
-    dst10 = __msa_aver_u_b(src10, dst10);
-    dst11 = __msa_aver_u_b(src11, dst11);
-    dst12 = __msa_aver_u_b(src12, dst12);
-    dst13 = __msa_aver_u_b(src13, dst13);
-    dst14 = __msa_aver_u_b(src14, dst14);
-    dst15 = __msa_aver_u_b(src15, dst15);
+    AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                dst0, dst1, dst2, dst3);
+    AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                dst4, dst5, dst6, dst7);
+    AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+                dst8, dst9, dst10, dst11);
+    AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+                dst12, dst13, dst14, dst15);
 
-    STORE_4VECS_UB(dst, 16, dst0, dst1, dst2, dst3);
+    ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
     dst += dst_stride;
-    STORE_4VECS_UB(dst, 16, dst4, dst5, dst6, dst7);
+    ST_UB4(dst4, dst5, dst6, dst7, dst, 16);
     dst += dst_stride;
-    STORE_4VECS_UB(dst, 16, dst8, dst9, dst10, dst11);
+    ST_UB4(dst8, dst9, dst10, dst11, dst, 16);
     dst += dst_stride;
-    STORE_4VECS_UB(dst, 16, dst12, dst13, dst14, dst15);
+    ST_UB4(dst12, dst13, dst14, dst15, dst, 16);
     dst += dst_stride;
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
index 064ba762fa0..7a292c5ce16 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
@@ -12,16 +12,14 @@
 #include "vp9/common/mips/msa/vp9_macros_msa.h"
 
 static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
-                            uint8_t *dst, int32_t dst_stride,
-                            int32_t height) {
+                            uint8_t *dst, int32_t dst_stride, int32_t height) {
   int32_t cnt;
   uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
 
   if (0 == height % 12) {
     for (cnt = (height / 12); cnt--;) {
-      LOAD_8VECS_UB(src, src_stride,
-                    src0, src1, src2, src3, src4, src5, src6, src7);
+      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
       src += (8 * src_stride);
 
       out0 = __msa_copy_u_d((v2i64)src0, 0);
@@ -33,44 +31,24 @@ static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
       out6 = __msa_copy_u_d((v2i64)src6, 0);
       out7 = __msa_copy_u_d((v2i64)src7, 0);
 
-      STORE_DWORD(dst, out0);
-      dst += dst_stride;
-      STORE_DWORD(dst, out1);
-      dst += dst_stride;
-      STORE_DWORD(dst, out2);
-      dst += dst_stride;
-      STORE_DWORD(dst, out3);
-      dst += dst_stride;
-      STORE_DWORD(dst, out4);
-      dst += dst_stride;
-      STORE_DWORD(dst, out5);
-      dst += dst_stride;
-      STORE_DWORD(dst, out6);
-      dst += dst_stride;
-      STORE_DWORD(dst, out7);
-      dst += dst_stride;
+      SD4(out0, out1, out2, out3, dst, dst_stride);
+      dst += (4 * dst_stride);
+      SD4(out4, out5, out6, out7, dst, dst_stride);
+      dst += (4 * dst_stride);
 
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
       src += (4 * src_stride);
 
       out0 = __msa_copy_u_d((v2i64)src0, 0);
       out1 = __msa_copy_u_d((v2i64)src1, 0);
       out2 = __msa_copy_u_d((v2i64)src2, 0);
       out3 = __msa_copy_u_d((v2i64)src3, 0);
-
-      STORE_DWORD(dst, out0);
-      dst += dst_stride;
-      STORE_DWORD(dst, out1);
-      dst += dst_stride;
-      STORE_DWORD(dst, out2);
-      dst += dst_stride;
-      STORE_DWORD(dst, out3);
-      dst += dst_stride;
+      SD4(out0, out1, out2, out3, dst, dst_stride);
+      dst += (4 * dst_stride);
     }
   } else if (0 == height % 8) {
     for (cnt = height >> 3; cnt--;) {
-      LOAD_8VECS_UB(src, src_stride,
-                    src0, src1, src2, src3, src4, src5, src6, src7);
+      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
       src += (8 * src_stride);
 
       out0 = __msa_copy_u_d((v2i64)src0, 0);
@@ -82,53 +60,33 @@ static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
       out6 = __msa_copy_u_d((v2i64)src6, 0);
       out7 = __msa_copy_u_d((v2i64)src7, 0);
 
-      STORE_DWORD(dst, out0);
-      dst += dst_stride;
-      STORE_DWORD(dst, out1);
-      dst += dst_stride;
-      STORE_DWORD(dst, out2);
-      dst += dst_stride;
-      STORE_DWORD(dst, out3);
-      dst += dst_stride;
-      STORE_DWORD(dst, out4);
-      dst += dst_stride;
-      STORE_DWORD(dst, out5);
-      dst += dst_stride;
-      STORE_DWORD(dst, out6);
-      dst += dst_stride;
-      STORE_DWORD(dst, out7);
-      dst += dst_stride;
+      SD4(out0, out1, out2, out3, dst, dst_stride);
+      dst += (4 * dst_stride);
+      SD4(out4, out5, out6, out7, dst, dst_stride);
+      dst += (4 * dst_stride);
     }
   } else if (0 == height % 4) {
     for (cnt = (height / 4); cnt--;) {
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
       src += (4 * src_stride);
-
       out0 = __msa_copy_u_d((v2i64)src0, 0);
       out1 = __msa_copy_u_d((v2i64)src1, 0);
       out2 = __msa_copy_u_d((v2i64)src2, 0);
       out3 = __msa_copy_u_d((v2i64)src3, 0);
 
-      STORE_DWORD(dst, out0);
-      dst += dst_stride;
-      STORE_DWORD(dst, out1);
-      dst += dst_stride;
-      STORE_DWORD(dst, out2);
-      dst += dst_stride;
-      STORE_DWORD(dst, out3);
-      dst += dst_stride;
+      SD4(out0, out1, out2, out3, dst, dst_stride);
+      dst += (4 * dst_stride);
     }
   } else if (0 == height % 2) {
     for (cnt = (height / 2); cnt--;) {
-      LOAD_2VECS_UB(src, src_stride, src0, src1);
+      LD_UB2(src, src_stride, src0, src1);
       src += (2 * src_stride);
-
       out0 = __msa_copy_u_d((v2i64)src0, 0);
       out1 = __msa_copy_u_d((v2i64)src1, 0);
 
-      STORE_DWORD(dst, out0);
+      SD(out0, dst);
       dst += dst_stride;
-      STORE_DWORD(dst, out1);
+      SD(out1, dst);
       dst += dst_stride;
     }
   }
@@ -147,12 +105,12 @@ static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
     dst_tmp = dst;
 
     for (loop_cnt = (height >> 3); loop_cnt--;) {
-      LOAD_8VECS_UB(src_tmp, src_stride,
-                    src0, src1, src2, src3, src4, src5, src6, src7);
+      LD_UB8(src_tmp, src_stride,
+             src0, src1, src2, src3, src4, src5, src6, src7);
       src_tmp += (8 * src_stride);
 
-      STORE_8VECS_UB(dst_tmp, dst_stride,
-                     src0, src1, src2, src3, src4, src5, src6, src7);
+      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+             dst_tmp, dst_stride);
       dst_tmp += (8 * dst_stride);
     }
 
@@ -162,90 +120,79 @@ static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
 }
 
 static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride,
-                             int32_t height) {
+                             uint8_t *dst, int32_t dst_stride, int32_t height) {
   int32_t cnt;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
 
   if (0 == height % 12) {
     for (cnt = (height / 12); cnt--;) {
-      LOAD_8VECS_UB(src, src_stride,
-                    src0, src1, src2, src3, src4, src5, src6, src7);
+      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
       src += (8 * src_stride);
-
-      STORE_8VECS_UB(dst, dst_stride,
-                     src0, src1, src2, src3, src4, src5, src6, src7);
+      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
       dst += (8 * dst_stride);
 
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
       src += (4 * src_stride);
-
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
       dst += (4 * dst_stride);
     }
   } else if (0 == height % 8) {
     copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
   } else if (0 == height % 4) {
     for (cnt = (height >> 2); cnt--;) {
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
       src += (4 * src_stride);
 
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
       dst += (4 * dst_stride);
     }
   }
 }
 
 static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride,
-                             int32_t height) {
+                             uint8_t *dst, int32_t dst_stride, int32_t height) {
   int32_t cnt;
   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
 
   if (0 == height % 12) {
     for (cnt = (height / 12); cnt--;) {
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
-      LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
       src += (4 * src_stride);
-
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
-      STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
       dst += (4 * dst_stride);
 
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
-      LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
       src += (4 * src_stride);
-
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
-      STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
       dst += (4 * dst_stride);
 
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
-      LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
       src += (4 * src_stride);
-
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
-      STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
       dst += (4 * dst_stride);
     }
   } else if (0 == height % 8) {
     copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
   } else if (0 == height % 4) {
     for (cnt = (height >> 2); cnt--;) {
-      LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
-      LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+      LD_UB4(src, src_stride, src0, src1, src2, src3);
+      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
       src += (4 * src_stride);
-
-      STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
-      STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
       dst += (4 * dst_stride);
     }
   }
 }
 
 static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride,
-                             int32_t height) {
+                             uint8_t *dst, int32_t dst_stride, int32_t height) {
   copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
 }
 
@@ -264,8 +211,8 @@ void vp9_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
       uint32_t cnt, tmp;
       /* 1 word storage */
       for (cnt = h; cnt--;) {
-        tmp = LOAD_WORD(src);
-        STORE_WORD(dst, tmp);
+        tmp = LW(src);
+        SW(tmp, dst);
         src += src_stride;
         dst += dst_stride;
       }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
index b109a4014f8..40fe94d3be5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
@@ -16,142 +16,104 @@
 
 extern const uint8_t mc_filt_mask_arr[16 * 3];
 
-#define HORIZ_8TAP_FILT(src, mask0, mask1, mask2, mask3,                   \
-                        filt_h0, filt_h1, filt_h2, filt_h3) ({             \
-  v8i16 vec0, vec1, vec2, vec3, horiz_out;                                 \
-                                                                           \
-  vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src), (v16i8)(src));  \
-  vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0));                    \
-  vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src), (v16i8)(src));  \
-  vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1);             \
-  vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src), (v16i8)(src));  \
-  vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2));                    \
-  vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src), (v16i8)(src));  \
-  vec2 = __msa_dpadd_s_h(vec2, (v16i8)(filt_h3), (v16i8)vec3);             \
-  vec0 = __msa_adds_s_h(vec0, vec2);                                       \
-  horiz_out = SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7);               \
-                                                                           \
-  horiz_out;                                                               \
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3,         \
+                            filt0, filt1, filt2, filt3) ({  \
+  v8i16 tmp0, tmp1;                                         \
+                                                            \
+  tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0);         \
+  tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1);  \
+  tmp1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2);         \
+  tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)vec3, (v16i8)filt3);  \
+  tmp0 = __msa_adds_s_h(tmp0, tmp1);                        \
+                                                            \
+  tmp0;                                                     \
 })
 
-#define HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3,        \
-                              filt_h0, filt_h1, filt_h2, filt_h3) ({         \
-  v8i16 vec0, vec1, vec2, vec3, horiz_out;                                   \
-                                                                             \
-  vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0));  \
-  vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0));                      \
-  vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0));  \
-  vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1);               \
-  vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0));  \
-  vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2));                      \
-  vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0));  \
-  vec2 = __msa_dpadd_s_h(vec2, ((v16i8)filt_h3), (v16i8)vec3);               \
-  vec0 = __msa_adds_s_h(vec0, vec2);                                         \
-  horiz_out = (v8i16)SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7);          \
-                                                                             \
-  horiz_out;                                                                 \
+#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3,        \
+                        filt_h0, filt_h1, filt_h2, filt_h3) ({         \
+  v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                \
+  v8i16 hz_out_m;                                                      \
+                                                                       \
+  VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3,                   \
+             vec0_m, vec1_m, vec2_m, vec3_m);                          \
+  hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m,       \
+                                 filt_h0, filt_h1, filt_h2, filt_h3);  \
+                                                                       \
+  hz_out_m = __msa_srari_h(hz_out_m, FILTER_BITS);                     \
+  hz_out_m = __msa_sat_s_h(hz_out_m, 7);                               \
+                                                                       \
+  hz_out_m;                                                            \
 })
 
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3,             \
-                            filt0, filt1, filt2, filt3) ({      \
-  v8i16 tmp0, tmp1;                                             \
-                                                                \
-  tmp0 = __msa_dotp_s_h((v16i8)(vec0), (v16i8)(filt0));         \
-  tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)(vec1), (v16i8)(filt1));  \
-  tmp1 = __msa_dotp_s_h((v16i8)(vec2), (v16i8)(filt2));         \
-  tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)(vec3), ((v16i8)filt3));  \
-  tmp0 = __msa_adds_s_h(tmp0, tmp1);                            \
-                                                                \
-  tmp0;                                                         \
-})
+#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3,               \
+                                   mask0, mask1, mask2, mask3,           \
+                                   filt0, filt1, filt2, filt3,           \
+                                   out0, out1) {                         \
+  v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;  \
+  v8i16 res0_m, res1_m, res2_m, res3_m;                                  \
+                                                                         \
+  VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);      \
+  DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m);             \
+  VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);      \
+  DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m);            \
+  VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);      \
+  DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m);             \
+  VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);      \
+  DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m);            \
+  ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1);               \
+}
+
+#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                  \
+                                   mask0, mask1, mask2, mask3,              \
+                                   filt0, filt1, filt2, filt3,              \
+                                   out0, out1, out2, out3) {                \
+  v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;     \
+  v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m;     \
+                                                                            \
+  VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+  VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+  DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+              res0_m, res1_m, res2_m, res3_m);                              \
+  VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);         \
+  VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);         \
+  DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,   \
+              res4_m, res5_m, res6_m, res7_m);                              \
+  VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);         \
+  VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);         \
+  DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1,  \
+               res0_m, res1_m, res2_m, res3_m);                             \
+  VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);         \
+  VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);         \
+  DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3,  \
+               res4_m, res5_m, res6_m, res7_m);                             \
+  ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m,       \
+              res7_m, out0, out1, out2, out3);                              \
+}
+
+#define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst) {  \
+  v16u8 tmp_m;                                          \
+                                                        \
+  tmp_m = PCKEV_XORI128_UB(in1, in0);                   \
+  tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst);            \
+  ST_UB(tmp_m, (pdst));                                 \
+}
 
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3,                     \
-                                   mask0, mask1, mask2, mask3,                 \
-                                   filt0, filt1, filt2, filt3,                 \
-                                   out0, out1) {                               \
-  v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;        \
-  v8i16 res0_m, res1_m, res2_m, res3_m;                                        \
-                                                                               \
-  vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0));  \
-  vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src2));  \
-                                                                               \
-  res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0));                      \
-  res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0));                      \
-                                                                               \
-  vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0));  \
-  vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src2));  \
-                                                                               \
-  res0_m = __msa_dpadd_s_h(res0_m, (filt1), (v16i8)vec2_m);                    \
-  res1_m = __msa_dpadd_s_h(res1_m, (filt1), (v16i8)vec3_m);                    \
-                                                                               \
-  vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0));  \
-  vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src2));  \
-                                                                               \
-  res2_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec4_m);                      \
-  res3_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec5_m);                      \
-                                                                               \
-  vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0));  \
-  vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src2));  \
-                                                                               \
-  res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt3), (v16i8)vec6_m);             \
-  res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt3), (v16i8)vec7_m);             \
-                                                                               \
-  out0 = __msa_adds_s_h(res0_m, res2_m);                                       \
-  out1 = __msa_adds_s_h(res1_m, res3_m);                                       \
+#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst) {           \
+  v16u8 tmp_m;                                           \
+                                                         \
+  tmp_m = (v16u8)__msa_pckev_b((v16i8)in0, (v16i8)in1);  \
+  tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst);             \
+  ST_UB(tmp_m, (pdst));                                  \
 }
 
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                     \
-                                   mask0, mask1, mask2, mask3,                 \
-                                   filt0, filt1, filt2, filt3,                 \
-                                   out0, out1, out2, out3) {                   \
-  v8i16 vec0_m, vec1_m, vec2_m, vec3_m;                                        \
-  v8i16 vec4_m, vec5_m, vec6_m, vec7_m;                                        \
-  v8i16 res0_m, res1_m, res2_m, res3_m;                                        \
-  v8i16 res4_m, res5_m, res6_m, res7_m;                                        \
-                                                                               \
-  vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src0), (v16i8)(src0));  \
-  vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src1));  \
-  vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src2), (v16i8)(src2));  \
-  vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src3));  \
-                                                                               \
-  res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0));                      \
-  res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0));                      \
-  res2_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt0));                      \
-  res3_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt0));                      \
-                                                                               \
-  vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src0), (v16i8)(src0));  \
-  vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src1));  \
-  vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src2), (v16i8)(src2));  \
-  vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src3));  \
-                                                                               \
-  res4_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt2));                      \
-  res5_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt2));                      \
-  res6_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt2));                      \
-  res7_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt2));                      \
-                                                                               \
-  vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src0), (v16i8)(src0));  \
-  vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src1));  \
-  vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src2), (v16i8)(src2));  \
-  vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src3));  \
-                                                                               \
-  res0_m = __msa_dpadd_s_h(res0_m, (v16i8)(filt1), (v16i8)vec4_m);             \
-  res1_m = __msa_dpadd_s_h(res1_m, (v16i8)(filt1), (v16i8)vec5_m);             \
-  res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt1), (v16i8)vec6_m);             \
-  res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt1), (v16i8)vec7_m);             \
-                                                                               \
-  vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src0), (v16i8)(src0));  \
-  vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src1));  \
-  vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src2), (v16i8)(src2));  \
-  vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src3));  \
-                                                                               \
-  res4_m = __msa_dpadd_s_h(res4_m, (v16i8)(filt3), (v16i8)vec4_m);             \
-  res5_m = __msa_dpadd_s_h(res5_m, (v16i8)(filt3), (v16i8)vec5_m);             \
-  res6_m = __msa_dpadd_s_h(res6_m, (v16i8)(filt3), (v16i8)vec6_m);             \
-  res7_m = __msa_dpadd_s_h(res7_m, (v16i8)(filt3), (v16i8)vec7_m);             \
-                                                                               \
-  out0 = __msa_adds_s_h(res0_m, res4_m);                                       \
-  out1 = __msa_adds_s_h(res1_m, res5_m);                                       \
-  out2 = __msa_adds_s_h(res2_m, res6_m);                                       \
-  out3 = __msa_adds_s_h(res3_m, res7_m);                                       \
+#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3,  \
+                           pdst, stride) {                              \
+  v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                 \
+  uint8_t *pdst_m = (uint8_t *)(pdst);                                  \
+                                                                        \
+  PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m);                      \
+  PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                  \
+  AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);          \
+  ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                             \
 }
 #endif  /* VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
index e5c0eaa32f3..dd7ca35d3bd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
@@ -9,265 +9,7 @@
  */
 
 #include <assert.h>
-
-#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_idct.h"
-#include "vp9/common/mips/msa/vp9_macros_msa.h"
-
-#define SET_COSPI_PAIR(c0_h, c1_h) ({  \
-  v8i16 out0, r0_m, r1_m;              \
-                                       \
-  r0_m = __msa_fill_h(c0_h);           \
-  r1_m = __msa_fill_h(c1_h);           \
-  out0 = __msa_ilvev_h(r1_m, r0_m);    \
-                                       \
-  out0;                                \
-})
-
-#define DOTP_CONST_PAIR(reg0, reg1, const0, const1, out0, out1) {  \
-  v8i16 k0_m = __msa_fill_h(const0);                               \
-  v8i16 s0_m, s1_m, s2_m, s3_m;                                    \
-                                                                   \
-  s0_m = __msa_fill_h(const1);                                     \
-  k0_m = __msa_ilvev_h(s0_m, k0_m);                                \
-                                                                   \
-  s0_m = __msa_ilvl_h(-reg1, reg0);                                \
-  s1_m = __msa_ilvr_h(-reg1, reg0);                                \
-  s2_m = __msa_ilvl_h(reg0, reg1);                                 \
-  s3_m = __msa_ilvr_h(reg0, reg1);                                 \
-  s1_m = (v8i16)__msa_dotp_s_w(s1_m, k0_m);                        \
-  s0_m = (v8i16)__msa_dotp_s_w(s0_m, k0_m);                        \
-  s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS);        \
-  s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS);        \
-  out0 = __msa_pckev_h(s0_m, s1_m);                                \
-                                                                   \
-  s1_m = (v8i16)__msa_dotp_s_w(s3_m, k0_m);                        \
-  s0_m = (v8i16)__msa_dotp_s_w(s2_m, k0_m);                        \
-  s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS);        \
-  s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS);        \
-  out1 = __msa_pckev_h(s0_m, s1_m);                                \
-}
-
-#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) {      \
-  v4i32 madd0_m, madd1_m, madd2_m, madd3_m;               \
-  v8i16 madd_s0_m, madd_s1_m;                             \
-                                                          \
-  ILV_H_LR_SH(m0, m1, madd_s1_m, madd_s0_m);              \
-                                                          \
-  DOTP_S_W_4VECS_SW(madd_s0_m, c0, madd_s1_m, c0,         \
-                    madd_s0_m, c1, madd_s1_m, c1,         \
-                    madd0_m, madd1_m, madd2_m, madd3_m);  \
-                                                          \
-  SRARI_W_4VECS_SW(madd0_m, madd1_m, madd2_m, madd3_m,    \
-                   madd0_m, madd1_m, madd2_m, madd3_m,    \
-                   DCT_CONST_BITS);                       \
-                                                          \
-  PCKEV_H_2VECS_SH(madd1_m, madd0_m, madd3_m, madd2_m,    \
-                   res0, res1);                           \
-}
-
-#define VP9_MADD_BF(inp0, inp1, inp2, inp3,                   \
-                    cst0, cst1, cst2, cst3,                   \
-                    out0, out1, out2, out3) {                 \
-  v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;           \
-  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                       \
-  v4i32 m4_m, m5_m;                                           \
-                                                              \
-  ILV_H_LRLR_SH(inp0, inp1, inp2, inp3,                       \
-                madd_s1_m, madd_s0_m, madd_s3_m, madd_s2_m);  \
-                                                              \
-  DOTP_S_W_4VECS_SW(madd_s0_m, cst0, madd_s1_m, cst0,         \
-                    madd_s2_m, cst2, madd_s3_m, cst2,         \
-                    tmp0_m, tmp1_m, tmp2_m, tmp3_m);          \
-                                                              \
-  m4_m = tmp0_m + tmp2_m;                                     \
-  m5_m = tmp1_m + tmp3_m;                                     \
-  tmp3_m = tmp1_m - tmp3_m;                                   \
-  tmp2_m = tmp0_m - tmp2_m;                                   \
-                                                              \
-  SRARI_W_4VECS_SW(m4_m, m5_m, tmp2_m, tmp3_m,                \
-                   m4_m, m5_m, tmp2_m, tmp3_m,                \
-                   DCT_CONST_BITS);                           \
-                                                              \
-  PCKEV_H_2VECS_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1);   \
-                                                              \
-  DOTP_S_W_4VECS_SW(madd_s0_m, cst1, madd_s1_m, cst1,         \
-                    madd_s2_m, cst3, madd_s3_m, cst3,         \
-                    tmp0_m, tmp1_m, tmp2_m, tmp3_m);          \
-                                                              \
-  m4_m = tmp0_m + tmp2_m;                                     \
-  m5_m = tmp1_m + tmp3_m;                                     \
-  tmp3_m = tmp1_m - tmp3_m;                                   \
-  tmp2_m = tmp0_m - tmp2_m;                                   \
-                                                              \
-  SRARI_W_4VECS_SW(m4_m, m5_m, tmp2_m, tmp3_m,                \
-                   m4_m, m5_m, tmp2_m, tmp3_m,                \
-                   DCT_CONST_BITS);                           \
-                                                              \
-  PCKEV_H_2VECS_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3);   \
-}
-
-#define TRANSPOSE8x8_H1(in0, in1, in2, in3,                   \
-                        in4, in5, in6, in7,                   \
-                        out0, out1, out2, out3,               \
-                        out4, out5, out6, out7) {             \
-  v8i16 loc0_m, loc1_m;                                       \
-  v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                       \
-  v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                       \
-                                                              \
-  loc0_m = __msa_ilvr_h((in6), (in4));                        \
-  loc1_m = __msa_ilvr_h((in7), (in5));                        \
-  tmp0_m = __msa_ilvr_h(loc1_m, loc0_m);                      \
-  tmp1_m = __msa_ilvl_h(loc1_m, loc0_m);                      \
-                                                              \
-  loc0_m = __msa_ilvl_h((in6), (in4));                        \
-  loc1_m = __msa_ilvl_h((in7), (in5));                        \
-  tmp2_m = __msa_ilvr_h(loc1_m, loc0_m);                      \
-  tmp3_m = __msa_ilvl_h(loc1_m, loc0_m);                      \
-                                                              \
-  loc0_m = __msa_ilvr_h((in2), (in0));                        \
-  loc1_m = __msa_ilvr_h((in3), (in1));                        \
-  tmp4_m = __msa_ilvr_h(loc1_m, loc0_m);                      \
-  tmp5_m = __msa_ilvl_h(loc1_m, loc0_m);                      \
-                                                              \
-  loc0_m = __msa_ilvl_h((in2), (in0));                        \
-  loc1_m = __msa_ilvl_h((in3), (in1));                        \
-  tmp6_m = __msa_ilvr_h(loc1_m, loc0_m);                      \
-  tmp7_m = __msa_ilvl_h(loc1_m, loc0_m);                      \
-                                                              \
-  out0 = (v8i16)__msa_pckev_d((v2i64)tmp0_m, (v2i64)tmp4_m);  \
-  out1 = (v8i16)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m);  \
-  out2 = (v8i16)__msa_pckev_d((v2i64)tmp1_m, (v2i64)tmp5_m);  \
-  out3 = (v8i16)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m);  \
-  out4 = (v8i16)__msa_pckev_d((v2i64)tmp2_m, (v2i64)tmp6_m);  \
-  out5 = (v8i16)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m);  \
-  out6 = (v8i16)__msa_pckev_d((v2i64)tmp3_m, (v2i64)tmp7_m);  \
-  out7 = (v8i16)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m);  \
-}
-
-#define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7,                  \
-                         r8, r9, r10, r11, r12, r13, r14, r15,            \
-                         out0, out1, out2, out3, out4, out5, out6, out7,  \
-                         out8, out9, out10, out11,                        \
-                         out12, out13, out14, out15) {                    \
-  v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m;                   \
-  v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m;             \
-  v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m;                   \
-  v8i16 h8_m, h9_m, h10_m, h11_m;                                         \
-  v8i16 k0_m, k1_m, k2_m, k3_m;                                           \
-                                                                          \
-  /* stage 1 */                                                           \
-  k0_m = SET_COSPI_PAIR(cospi_1_64, cospi_31_64);                         \
-  k1_m = SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);                        \
-  k2_m = SET_COSPI_PAIR(cospi_17_64, cospi_15_64);                        \
-  k3_m = SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);                       \
-  VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m,                    \
-              g0_m, g1_m, g2_m, g3_m);                                    \
-                                                                          \
-  k0_m = SET_COSPI_PAIR(cospi_5_64, cospi_27_64);                         \
-  k1_m = SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);                        \
-  k2_m = SET_COSPI_PAIR(cospi_21_64, cospi_11_64);                        \
-  k3_m = SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);                       \
-  VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m,                   \
-              g4_m, g5_m, g6_m, g7_m);                                    \
-                                                                          \
-  k0_m = SET_COSPI_PAIR(cospi_9_64, cospi_23_64);                         \
-  k1_m = SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);                        \
-  k2_m = SET_COSPI_PAIR(cospi_25_64, cospi_7_64);                         \
-  k3_m = SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);                        \
-  VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m,                   \
-              g8_m, g9_m, g10_m, g11_m);                                  \
-                                                                          \
-  k0_m = SET_COSPI_PAIR(cospi_13_64, cospi_19_64);                        \
-  k1_m = SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);                       \
-  k2_m = SET_COSPI_PAIR(cospi_29_64, cospi_3_64);                         \
-  k3_m = SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);                        \
-  VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m,                    \
-              g12_m, g13_m, g14_m, g15_m);                                \
-                                                                          \
-  /* stage 2 */                                                           \
-  k0_m = SET_COSPI_PAIR(cospi_4_64, cospi_28_64);                         \
-  k1_m = SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);                        \
-  k2_m = SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);                        \
-  VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m,            \
-              h0_m, h1_m, h2_m, h3_m);                                    \
-                                                                          \
-  k0_m = SET_COSPI_PAIR(cospi_12_64, cospi_20_64);                        \
-  k1_m = SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);                       \
-  k2_m = SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);                       \
-  VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m,           \
-              h4_m, h5_m, h6_m, h7_m);                                    \
-                                                                          \
-  BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10);          \
-                                                                          \
-  BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m,          \
-              h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m);          \
-                                                                          \
-  /* stage 3 */                                                           \
-  BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m);        \
-                                                                          \
-  k0_m = SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                         \
-  k1_m = SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                        \
-  k2_m = SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);                        \
-  VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m,             \
-              out4, out6, out5, out7);                                    \
-  VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m,             \
-              out12, out14, out13, out15);                                \
-                                                                          \
-  /* stage 4 */                                                           \
-  k0_m = SET_COSPI_PAIR(cospi_16_64, cospi_16_64);                        \
-  k1_m = SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);                      \
-  k2_m = SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);                       \
-  k3_m = SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);                       \
-  VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3);                   \
-  VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7);                     \
-  VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11);                 \
-  VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15);                 \
-}
-
-#define VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4(dest, dest_stride,     \
-                                            in0, in1, in2, in3) {  \
-  uint64_t out0_m, out1_m, out2_m, out3_m;                         \
-  v8i16 res0_m, res1_m, res2_m, res3_m;                            \
-  v16u8 dest0_m, dest1_m, dest2_m, dest3_m;                        \
-  v16i8 tmp0_m, tmp1_m;                                            \
-  v16i8 zero_m = { 0 };                                            \
-  uint8_t *dst_m = (uint8_t *)(dest);                              \
-                                                                   \
-  LOAD_4VECS_UB(dst_m, (dest_stride),                              \
-                dest0_m, dest1_m, dest2_m, dest3_m);               \
-                                                                   \
-  res0_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest0_m);            \
-  res1_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest1_m);            \
-  res2_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest2_m);            \
-  res3_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest3_m);            \
-                                                                   \
-  res0_m += (v8i16)(in0);                                          \
-  res1_m += (v8i16)(in1);                                          \
-  res2_m += (v8i16)(in2);                                          \
-  res3_m += (v8i16)(in3);                                          \
-                                                                   \
-  res0_m = CLIP_UNSIGNED_CHAR_H(res0_m);                           \
-  res1_m = CLIP_UNSIGNED_CHAR_H(res1_m);                           \
-  res2_m = CLIP_UNSIGNED_CHAR_H(res2_m);                           \
-  res3_m = CLIP_UNSIGNED_CHAR_H(res3_m);                           \
-                                                                   \
-  tmp0_m = __msa_pckev_b((v16i8)res1_m, (v16i8)res0_m);            \
-  tmp1_m = __msa_pckev_b((v16i8)res3_m, (v16i8)res2_m);            \
-                                                                   \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);                       \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);                       \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);                       \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);                       \
-                                                                   \
-  STORE_DWORD(dst_m, out0_m);                                      \
-  dst_m += (dest_stride);                                          \
-  STORE_DWORD(dst_m, out1_m);                                      \
-  dst_m += (dest_stride);                                          \
-  STORE_DWORD(dst_m, out2_m);                                      \
-  dst_m += (dest_stride);                                          \
-  STORE_DWORD(dst_m, out3_m);                                      \
-}
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
 
 void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
   v8i16 loc0, loc1, loc2, loc3;
@@ -275,63 +17,38 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
   v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
   v8i16 tmp5, tmp6, tmp7;
 
-  /* load left top 8x8 */
-  LOAD_8VECS_SH(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
-  /* load right top 8x8 */
-  LOAD_8VECS_SH((input + 8), 16,
-                reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
-
-  /* transpose block */
-  TRANSPOSE8x8_H1(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7,
-                  reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
-  /* transpose block */
-  TRANSPOSE8x8_H1(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15,
-                  reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
-
-  DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
-  DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
-
-  loc0 = reg2 + reg10;
-  reg2 = reg2 - reg10;
-  loc1 = reg14 + reg6;
-  reg14 = reg14 - reg6;
-
-  DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
-  DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
-  DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
-
-  reg14 = reg8 - reg12;
-  reg2 = reg8 + reg12;
-  reg10 = reg0 - reg4;
-  reg6 = reg0 + reg4;
-
-  reg0 = reg2 - loc1;
-  reg2 = reg2 + loc1;
-  reg12 = reg14 - loc0;
-  reg14 = reg14 + loc0;
-  reg4 = reg6 - loc3;
-  reg6 = reg6 + loc3;
-  reg8 = reg10 - loc2;
-  reg10 = reg10 + loc2;
+  LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  input += 8;
+  LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+
+  TRANSPOSE8x8_SH_SH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7,
+                     reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  TRANSPOSE8x8_SH_SH(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15,
+                     reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+  VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+  VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+  BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+  VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+  VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+  BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
+  SUB4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg0, reg12, reg4,
+       reg8);
+  ADD4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg2, reg14, reg6,
+       reg10);
 
   /* stage 2 */
-  DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
-  DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+  VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
 
   reg9 = reg1 - loc2;
   reg1 = reg1 + loc2;
   reg7 = reg15 - loc3;
   reg15 = reg15 + loc3;
 
-  DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
-  DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
-
-  reg13 = loc0 + reg5;
-  reg5 = loc0 - reg5;
-  reg3 = loc1 + reg11;
-  reg11 = loc1 - reg11;
+  VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+  BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
 
   loc1 = reg15 + reg3;
   reg3 = reg15 - reg3;
@@ -346,8 +63,8 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
   tmp7 = loc1;
   reg0 = loc2;
 
-  DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
-  DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+  VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
 
   loc0 = reg9 + reg5;
   reg5 = reg9 - reg5;
@@ -360,21 +77,15 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
   loc2 = reg4 - loc0;
   tmp5 = loc1;
 
-  DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+  BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
 
-  loc0 = reg8 + reg5;
-  loc1 = reg8 - reg5;
-  reg4 = reg10 + reg11;
-  reg9 = reg10 - reg11;
   reg10 = loc0;
   reg11 = loc1;
 
-  DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+  VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+  BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
 
-  reg8 = reg12 + reg3;
-  reg5 = reg12 - reg3;
-  reg6 = reg14 + reg13;
-  reg7 = reg14 - reg13;
   reg13 = loc2;
 
   /* Transpose and store the output */
@@ -383,49 +94,36 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
   reg3 = tmp7;
 
   /* transpose block */
-  TRANSPOSE8x8_H1(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14,
-                  reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
-
-  STORE_8VECS_SH(output, 16, reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
+  TRANSPOSE8x8_SH_SH(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14,
+                     reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
+  ST_SH8(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, output, 16);
 
   /* transpose block */
-  TRANSPOSE8x8_H1(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15,
-                  reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
-
-  STORE_8VECS_SH((output + 8), 16,
-                 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
+  TRANSPOSE8x8_SH_SH(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15,
+                     reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
+  ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16);
 }
 
-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
-                                      int32_t dest_stride) {
+void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                      int32_t dst_stride) {
   v8i16 loc0, loc1, loc2, loc3;
   v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
   v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
   v8i16 tmp5, tmp6, tmp7;
 
   /* load up 8x8 */
-  LOAD_8VECS_SH(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
+  LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  input += 8 * 16;
   /* load bottom 8x8 */
-  LOAD_8VECS_SH((input + 8 * 16), 16,
-                reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+  LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
 
-  DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
-  DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
-
-  loc0 = reg2 + reg10;
-  reg2 = reg2 - reg10;
-  loc1 = reg14 + reg6;
-  reg14 = reg14 - reg6;
-
-  DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
-  DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
-  DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
-
-  reg14 = reg8 - reg12;
-  reg2 = reg8 + reg12;
-  reg10 = reg0 - reg4;
-  reg6 = reg0 + reg4;
+  VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+  VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+  BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+  VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+  VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+  BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
 
   reg0 = reg2 - loc1;
   reg2 = reg2 + loc1;
@@ -437,21 +135,17 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
   reg10 = reg10 + loc2;
 
   /* stage 2 */
-  DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
-  DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+  VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
 
   reg9 = reg1 - loc2;
   reg1 = reg1 + loc2;
   reg7 = reg15 - loc3;
   reg15 = reg15 + loc3;
 
-  DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
-  DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
-
-  reg13 = loc0 + reg5;
-  reg5 = loc0 - reg5;
-  reg3 = loc1 + reg11;
-  reg11 = loc1 - reg11;
+  VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+  BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
 
   loc1 = reg15 + reg3;
   reg3 = reg15 - reg3;
@@ -466,8 +160,8 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
   tmp7 = loc1;
   reg0 = loc2;
 
-  DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
-  DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+  VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
 
   loc0 = reg9 + reg5;
   reg5 = reg9 - reg5;
@@ -480,21 +174,14 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
   loc2 = reg4 - loc0;
   tmp5 = loc1;
 
-  DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+  VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+  BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
 
-  loc0 = reg8 + reg5;
-  loc1 = reg8 - reg5;
-  reg4 = reg10 + reg11;
-  reg9 = reg10 - reg11;
   reg10 = loc0;
   reg11 = loc1;
 
-  DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
-
-  reg8 = reg12 + reg3;
-  reg5 = reg12 - reg3;
-  reg6 = reg14 + reg13;
-  reg7 = reg14 - reg13;
+  VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+  BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
   reg13 = loc2;
 
   /* Transpose and store the output */
@@ -502,22 +189,21 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
   reg14 = tmp6;
   reg3 = tmp7;
 
-  SRARI_H_4VECS_SH(reg0, reg2, reg4, reg6, reg0, reg2, reg4, reg6, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4(dest, dest_stride,
-                                      reg0, reg2, reg4, reg6);
-  SRARI_H_4VECS_SH(reg8, reg10, reg12, reg14, reg8, reg10, reg12, reg14, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (4 * dest_stride)),
-                                      dest_stride, reg8, reg10, reg12, reg14);
-  SRARI_H_4VECS_SH(reg3, reg13, reg11, reg5, reg3, reg13, reg11, reg5, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (8 * dest_stride)),
-                                      dest_stride, reg3, reg13, reg11, reg5);
-  SRARI_H_4VECS_SH(reg7, reg9, reg1, reg15, reg7, reg9, reg1, reg15, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (12 * dest_stride)),
-                                      dest_stride, reg7, reg9, reg1, reg15);
+  SRARI_H4_SH(reg0, reg2, reg4, reg6, 6);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6);
+  dst += (4 * dst_stride);
+  SRARI_H4_SH(reg8, reg10, reg12, reg14, 6);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14);
+  dst += (4 * dst_stride);
+  SRARI_H4_SH(reg3, reg13, reg11, reg5, 6);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5);
+  dst += (4 * dst_stride);
+  SRARI_H4_SH(reg7, reg9, reg1, reg15, 6);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
 }
 
-void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dest,
-                               int32_t dest_stride) {
+void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+                               int32_t dst_stride) {
   int32_t i;
   DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
   int16_t *out = out_arr;
@@ -531,13 +217,13 @@ void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dest,
   /* transform columns */
   for (i = 0; i < 2; ++i) {
     /* process 8 * 16 block */
-    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dest + (i << 3)),
-                                     dest_stride);
+    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+                                     dst_stride);
   }
 }
 
-void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dest,
-                              int32_t dest_stride) {
+void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,
+                              int32_t dst_stride) {
   uint8_t i;
   DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
   int16_t *out = out_arr;
@@ -570,64 +256,38 @@ void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dest,
   /* transform columns */
   for (i = 0; i < 2; ++i) {
     /* process 8 * 16 block */
-    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dest + (i << 3)),
-                                     dest_stride);
+    vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+                                     dst_stride);
   }
 }
 
-void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dest,
-                             int32_t dest_stride) {
+void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,
+                             int32_t dst_stride) {
   uint8_t i;
-  int32_t const1;
   int16_t out;
-  v8i16 const2, res0, res1, res2, res3, res4, res5, res6, res7;
-  v16u8 dest0, dest1, dest2, dest3;
-  v16u8 tmp0, tmp1, tmp2, tmp3;
-  v16i8 zero = { 0 };
-
-  out = dct_const_round_shift(input[0] * cospi_16_64);
-  out = dct_const_round_shift(out * cospi_16_64);
-  const1 = ROUND_POWER_OF_TWO(out, 6);
-
-  const2 = __msa_fill_h(const1);
-
-  for (i = 0; i < 4; ++i) {
-    LOAD_4VECS_UB(dest, dest_stride, dest0, dest1, dest2, dest3);
-
-    res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
-    res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
-    res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
-    res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
-    res4 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest0);
-    res5 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest1);
-    res6 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest2);
-    res7 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest3);
-
-    res0 += const2;
-    res1 += const2;
-    res2 += const2;
-    res3 += const2;
-    res4 += const2;
-    res5 += const2;
-    res6 += const2;
-    res7 += const2;
-
-    res0 = CLIP_UNSIGNED_CHAR_H(res0);
-    res1 = CLIP_UNSIGNED_CHAR_H(res1);
-    res2 = CLIP_UNSIGNED_CHAR_H(res2);
-    res3 = CLIP_UNSIGNED_CHAR_H(res3);
-    res4 = CLIP_UNSIGNED_CHAR_H(res4);
-    res5 = CLIP_UNSIGNED_CHAR_H(res5);
-    res6 = CLIP_UNSIGNED_CHAR_H(res6);
-    res7 = CLIP_UNSIGNED_CHAR_H(res7);
-
-    tmp0 = (v16u8)__msa_pckev_b((v16i8)res4, (v16i8)res0);
-    tmp1 = (v16u8)__msa_pckev_b((v16i8)res5, (v16i8)res1);
-    tmp2 = (v16u8)__msa_pckev_b((v16i8)res6, (v16i8)res2);
-    tmp3 = (v16u8)__msa_pckev_b((v16i8)res7, (v16i8)res3);
-
-    STORE_4VECS_UB(dest, dest_stride, tmp0, tmp1, tmp2, tmp3);
-    dest += (4 * dest_stride);
+  v8i16 vec, res0, res1, res2, res3, res4, res5, res6, res7;
+  v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+
+  out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO(out, 6);
+
+  vec = __msa_fill_h(out);
+
+  for (i = 4; i--;) {
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    UNPCK_UB_SH(dst0, res0, res4);
+    UNPCK_UB_SH(dst1, res1, res5);
+    UNPCK_UB_SH(dst2, res2, res6);
+    UNPCK_UB_SH(dst3, res3, res7);
+    ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
+    ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7);
+    CLIP_SH4_0_255(res0, res1, res2, res3);
+    CLIP_SH4_0_255(res4, res5, res6, res7);
+    PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+                tmp0, tmp1, tmp2, tmp3);
+    ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    dst += (4 * dst_stride);
   }
 }
 
@@ -636,15 +296,12 @@ static void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
   v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
 
   /* load input data */
-  LOAD_16VECS_SH(input, 8,
-                 l0, l8, l1, l9, l2, l10, l3, l11,
-                 l4, l12, l5, l13, l6, l14, l7, l15);
-
-  TRANSPOSE8x8_H_SH(l0, l1, l2, l3, l4, l5, l6, l7,
-                    l0, l1, l2, l3, l4, l5, l6, l7);
-
-  TRANSPOSE8x8_H_SH(l8, l9, l10, l11, l12, l13, l14, l15,
-                    l8, l9, l10, l11, l12, l13, l14, l15);
+  LD_SH16(input, 8,
+          l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15);
+  TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+                     l0, l1, l2, l3, l4, l5, l6, l7);
+  TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+                     l8, l9, l10, l11, l12, l13, l14, l15);
 
   /* ADST in horizontal */
   VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7,
@@ -657,19 +314,16 @@ static void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
   l13 = -r13;
   l15 = -r1;
 
-  TRANSPOSE8x8_H_SH(r0, l1, r12, l3, r6, r14, r10, r2,
-                    l0, l1, l2, l3, l4, l5, l6, l7);
-
-  STORE_8VECS_SH(output, 16, l0, l1, l2, l3, l4, l5, l6, l7);
-
-  TRANSPOSE8x8_H_SH(r3, r11, r15, r7, r5, l13, r9, l15,
-                    l8, l9, l10, l11, l12, l13, l14, l15);
-
-  STORE_8VECS_SH((output + 8), 16, l8, l9, l10, l11, l12, l13, l14, l15);
+  TRANSPOSE8x8_SH_SH(r0, l1, r12, l3, r6, r14, r10, r2,
+                     l0, l1, l2, l3, l4, l5, l6, l7);
+  ST_SH8(l0, l1, l2, l3, l4, l5, l6, l7, output, 16);
+  TRANSPOSE8x8_SH_SH(r3, r11, r15, r7, r5, l13, r9, l15,
+                     l8, l9, l10, l11, l12, l13, l14, l15);
+  ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16);
 }
 
-static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
-                                              int32_t dest_stride) {
+static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                              int32_t dst_stride) {
   v8i16 v0, v2, v4, v6, k0, k1, k2, k3;
   v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
   v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
@@ -678,210 +332,163 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
   v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
   v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
   v8i16 res8, res9, res10, res11, res12, res13, res14, res15;
-  v16u8 dest0, dest1, dest2, dest3, dest4, dest5, dest6, dest7;
-  v16u8 dest8, dest9, dest10, dest11, dest12, dest13, dest14, dest15;
+  v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+  v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
   v16i8 zero = { 0 };
 
-  r0 = LOAD_SH(input + 0 * 16);
-  r3 = LOAD_SH(input + 3 * 16);
-  r4 = LOAD_SH(input + 4 * 16);
-  r7 = LOAD_SH(input + 7 * 16);
-  r8 = LOAD_SH(input + 8 * 16);
-  r11 = LOAD_SH(input + 11 * 16);
-  r12 = LOAD_SH(input + 12 * 16);
-  r15 = LOAD_SH(input + 15 * 16);
+  r0 = LD_SH(input + 0 * 16);
+  r3 = LD_SH(input + 3 * 16);
+  r4 = LD_SH(input + 4 * 16);
+  r7 = LD_SH(input + 7 * 16);
+  r8 = LD_SH(input + 8 * 16);
+  r11 = LD_SH(input + 11 * 16);
+  r12 = LD_SH(input + 12 * 16);
+  r15 = LD_SH(input + 15 * 16);
 
   /* stage 1 */
-  k0 = SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
-  k1 = SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
-  k2 = SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
-  k3 = SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
+  k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
   VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
-
-  k0 = SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
-  k1 = SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
-  k2 = SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
-  k3 = SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
+  k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
   VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
-
   BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0);
-
-  k0 = SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
-  k1 = SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
-  k2 = SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
+  k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+  k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
   VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
 
-  r1 = LOAD_SH(input + 1 * 16);
-  r2 = LOAD_SH(input + 2 * 16);
-  r5 = LOAD_SH(input + 5 * 16);
-  r6 = LOAD_SH(input + 6 * 16);
-  r9 = LOAD_SH(input + 9 * 16);
-  r10 = LOAD_SH(input + 10 * 16);
-  r13 = LOAD_SH(input + 13 * 16);
-  r14 = LOAD_SH(input + 14 * 16);
-
-  k0 = SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
-  k1 = SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
-  k2 = SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
-  k3 = SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
+  r1 = LD_SH(input + 1 * 16);
+  r2 = LD_SH(input + 2 * 16);
+  r5 = LD_SH(input + 5 * 16);
+  r6 = LD_SH(input + 6 * 16);
+  r9 = LD_SH(input + 9 * 16);
+  r10 = LD_SH(input + 10 * 16);
+  r13 = LD_SH(input + 13 * 16);
+  r14 = LD_SH(input + 14 * 16);
+
+  k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
   VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7);
-
-  k0 = SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
-  k1 = SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
-  k2 = SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
-  k3 = SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
+  k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
   VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15);
-
   BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4);
-
   BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10);
   out1 = -out1;
-  out0 = __msa_srari_h(out0, 6);
-  out1 = __msa_srari_h(out1, 6);
-  dest0 = LOAD_UB(dest + 0 * dest_stride);
-  dest1 = LOAD_UB(dest + 15 * dest_stride);
-  res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
-  res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
-  res0 += out0;
-  res1 += out1;
-  res0 = CLIP_UNSIGNED_CHAR_H(res0);
-  res1 = CLIP_UNSIGNED_CHAR_H(res1);
-  res0 = (v8i16)__msa_pckev_b((v16i8)res0, (v16i8)res0);
-  res1 = (v8i16)__msa_pckev_b((v16i8)res1, (v16i8)res1);
-  STORE_DWORD(dest, __msa_copy_u_d((v2i64)res0, 0));
-  STORE_DWORD(dest + 15 * dest_stride, __msa_copy_u_d((v2i64)res1, 0));
-
-  k0 = SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
-  k1 = SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
-  k2 = SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
+  SRARI_H2_SH(out0, out1, 6);
+  dst0 = LD_UB(dst + 0 * dst_stride);
+  dst1 = LD_UB(dst + 15 * dst_stride);
+  ILVR_B2_SH(zero, dst0, zero, dst1, res0, res1);
+  ADD2(res0, out0, res1, out1, res0, res1);
+  CLIP_SH2_0_255(res0, res1);
+  PCKEV_B2_SH(res0, res0, res1, res1, res0, res1);
+  ST8x1_UB(res0, dst);
+  ST8x1_UB(res1, dst + 15 * dst_stride);
+
+  k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+  k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
   VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
-
   BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
   out8 = -out8;
 
-  out8 = __msa_srari_h(out8, 6);
-  out9 = __msa_srari_h(out9, 6);
-  dest8 = LOAD_UB(dest + 1 * dest_stride);
-  dest9 = LOAD_UB(dest + 14 * dest_stride);
-  res8 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest8);
-  res9 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest9);
-  res8 += out8;
-  res9 += out9;
-  res8 = CLIP_UNSIGNED_CHAR_H(res8);
-  res9 = CLIP_UNSIGNED_CHAR_H(res9);
-  res8 = (v8i16)__msa_pckev_b((v16i8)res8, (v16i8)res8);
-  res9 = (v8i16)__msa_pckev_b((v16i8)res9, (v16i8)res9);
-  STORE_DWORD(dest + dest_stride, __msa_copy_u_d((v2i64)res8, 0));
-  STORE_DWORD(dest + 14 * dest_stride, __msa_copy_u_d((v2i64)res9, 0));
-
-  k0 = SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
-  k1 = SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
-  k2 = SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
+  SRARI_H2_SH(out8, out9, 6);
+  dst8 = LD_UB(dst + 1 * dst_stride);
+  dst9 = LD_UB(dst + 14 * dst_stride);
+  ILVR_B2_SH(zero, dst8, zero, dst9, res8, res9);
+  ADD2(res8, out8, res9, out9, res8, res9);
+  CLIP_SH2_0_255(res8, res9);
+  PCKEV_B2_SH(res8, res8, res9, res9, res8, res9);
+  ST8x1_UB(res8, dst + dst_stride);
+  ST8x1_UB(res9, dst + 14 * dst_stride);
+
+  k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+  k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
   VP9_MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7);
   out4 = -out4;
-  out4 = __msa_srari_h(out4, 6);
-  out5 = __msa_srari_h(out5, 6);
-  dest4 = LOAD_UB(dest + 3 * dest_stride);
-  dest5 = LOAD_UB(dest + 12 * dest_stride);
-  res4 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest4);
-  res5 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest5);
-  res4 += out4;
-  res5 += out5;
-  res4 = CLIP_UNSIGNED_CHAR_H(res4);
-  res5 = CLIP_UNSIGNED_CHAR_H(res5);
-  res4 = (v8i16)__msa_pckev_b((v16i8)res4, (v16i8)res4);
-  res5 = (v8i16)__msa_pckev_b((v16i8)res5, (v16i8)res5);
-  STORE_DWORD(dest + 3 * dest_stride, __msa_copy_u_d((v2i64)res4, 0));
-  STORE_DWORD(dest + 12 * dest_stride, __msa_copy_u_d((v2i64)res5, 0));
+  SRARI_H2_SH(out4, out5, 6);
+  dst4 = LD_UB(dst + 3 * dst_stride);
+  dst5 = LD_UB(dst + 12 * dst_stride);
+  ILVR_B2_SH(zero, dst4, zero, dst5, res4, res5);
+  ADD2(res4, out4, res5, out5, res4, res5);
+  CLIP_SH2_0_255(res4, res5);
+  PCKEV_B2_SH(res4, res4, res5, res5, res4, res5);
+  ST8x1_UB(res4, dst + 3 * dst_stride);
+  ST8x1_UB(res5, dst + 12 * dst_stride);
 
   VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
   out13 = -out13;
-  out12 = __msa_srari_h(out12, 6);
-  out13 = __msa_srari_h(out13, 6);
-  dest12 = LOAD_UB(dest + 2 * dest_stride);
-  dest13 = LOAD_UB(dest + 13 * dest_stride);
-  res12 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest12);
-  res13 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest13);
-  res12 += out12;
-  res13 += out13;
-  res12 = CLIP_UNSIGNED_CHAR_H(res12);
-  res13 = CLIP_UNSIGNED_CHAR_H(res13);
-  res12 = (v8i16)__msa_pckev_b((v16i8)res12, (v16i8)res12);
-  res13 = (v8i16)__msa_pckev_b((v16i8)res13, (v16i8)res13);
-  STORE_DWORD(dest + 2 * dest_stride, __msa_copy_u_d((v2i64)res12, 0));
-  STORE_DWORD(dest + 13 * dest_stride, __msa_copy_u_d((v2i64)res13, 0));
-
-  k0 = SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
-  k3 = SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+  SRARI_H2_SH(out12, out13, 6);
+  dst12 = LD_UB(dst + 2 * dst_stride);
+  dst13 = LD_UB(dst + 13 * dst_stride);
+  ILVR_B2_SH(zero, dst12, zero, dst13, res12, res13);
+  ADD2(res12, out12, res13, out13, res12, res13);
+  CLIP_SH2_0_255(res12, res13);
+  PCKEV_B2_SH(res12, res12, res13, res13, res12, res13);
+  ST8x1_UB(res12, dst + 2 * dst_stride);
+  ST8x1_UB(res13, dst + 13 * dst_stride);
+
+  k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+  k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
   VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
-  out6 = __msa_srari_h(out6, 6);
-  out7 = __msa_srari_h(out7, 6);
-  dest6 = LOAD_UB(dest + 4 * dest_stride);
-  dest7 = LOAD_UB(dest + 11 * dest_stride);
-  res6 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest6);
-  res7 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest7);
-  res6 += out6;
-  res7 += out7;
-  res6 = CLIP_UNSIGNED_CHAR_H(res6);
-  res7 = CLIP_UNSIGNED_CHAR_H(res7);
-  res6 = (v8i16)__msa_pckev_b((v16i8)res6, (v16i8)res6);
-  res7 = (v8i16)__msa_pckev_b((v16i8)res7, (v16i8)res7);
-  STORE_DWORD(dest + 4 * dest_stride, __msa_copy_u_d((v2i64)res6, 0));
-  STORE_DWORD(dest + 11 * dest_stride, __msa_copy_u_d((v2i64)res7, 0));
+  SRARI_H2_SH(out6, out7, 6);
+  dst6 = LD_UB(dst + 4 * dst_stride);
+  dst7 = LD_UB(dst + 11 * dst_stride);
+  ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7);
+  ADD2(res6, out6, res7, out7, res6, res7);
+  CLIP_SH2_0_255(res6, res7);
+  PCKEV_B2_SH(res6, res6, res7, res7, res6, res7);
+  ST8x1_UB(res6, dst + 4 * dst_stride);
+  ST8x1_UB(res7, dst + 11 * dst_stride);
 
   VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
-  out10 = __msa_srari_h(out10, 6);
-  out11 = __msa_srari_h(out11, 6);
-  dest10 = LOAD_UB(dest + 6 * dest_stride);
-  dest11 = LOAD_UB(dest + 9 * dest_stride);
-  res10 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest10);
-  res11 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest11);
-  res10 += out10;
-  res11 += out11;
-  res10 = CLIP_UNSIGNED_CHAR_H(res10);
-  res11 = CLIP_UNSIGNED_CHAR_H(res11);
-  res10 = (v8i16)__msa_pckev_b((v16i8)res10, (v16i8)res10);
-  res11 = (v8i16)__msa_pckev_b((v16i8)res11, (v16i8)res11);
-  STORE_DWORD(dest + 6 * dest_stride, __msa_copy_u_d((v2i64)res10, 0));
-  STORE_DWORD(dest + 9 * dest_stride, __msa_copy_u_d((v2i64)res11, 0));
-
-  k1 = SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
-  k2 = SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+  SRARI_H2_SH(out10, out11, 6);
+  dst10 = LD_UB(dst + 6 * dst_stride);
+  dst11 = LD_UB(dst + 9 * dst_stride);
+  ILVR_B2_SH(zero, dst10, zero, dst11, res10, res11);
+  ADD2(res10, out10, res11, out11, res10, res11);
+  CLIP_SH2_0_255(res10, res11);
+  PCKEV_B2_SH(res10, res10, res11, res11, res10, res11);
+  ST8x1_UB(res10, dst + 6 * dst_stride);
+  ST8x1_UB(res11, dst + 9 * dst_stride);
+
+  k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
   VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
-  out2 = __msa_srari_h(out2, 6);
-  out3 = __msa_srari_h(out3, 6);
-  dest2 = LOAD_UB(dest + 7 * dest_stride);
-  dest3 = LOAD_UB(dest + 8 * dest_stride);
-  res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
-  res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
-  res2 += out2;
-  res3 += out3;
-  res2 = CLIP_UNSIGNED_CHAR_H(res2);
-  res3 = CLIP_UNSIGNED_CHAR_H(res3);
-  res2 = (v8i16)__msa_pckev_b((v16i8)res2, (v16i8)res2);
-  res3 = (v8i16)__msa_pckev_b((v16i8)res3, (v16i8)res3);
-  STORE_DWORD(dest + 7 * dest_stride, __msa_copy_u_d((v2i64)res2, 0));
-  STORE_DWORD(dest + 8 * dest_stride, __msa_copy_u_d((v2i64)res3, 0));
+  SRARI_H2_SH(out2, out3, 6);
+  dst2 = LD_UB(dst + 7 * dst_stride);
+  dst3 = LD_UB(dst + 8 * dst_stride);
+  ILVR_B2_SH(zero, dst2, zero, dst3, res2, res3);
+  ADD2(res2, out2, res3, out3, res2, res3);
+  CLIP_SH2_0_255(res2, res3);
+  PCKEV_B2_SH(res2, res2, res3, res3, res2, res3);
+  ST8x1_UB(res2, dst + 7 * dst_stride);
+  ST8x1_UB(res3, dst + 8 * dst_stride);
 
   VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
-  out14 = __msa_srari_h(out14, 6);
-  out15 = __msa_srari_h(out15, 6);
-  dest14 = LOAD_UB(dest + 5 * dest_stride);
-  dest15 = LOAD_UB(dest + 10 * dest_stride);
-  res14 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest14);
-  res15 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest15);
-  res14 += out14;
-  res15 += out15;
-  res14 = CLIP_UNSIGNED_CHAR_H(res14);
-  res15 = CLIP_UNSIGNED_CHAR_H(res15);
-  res14 = (v8i16)__msa_pckev_b((v16i8)res14, (v16i8)res14);
-  res15 = (v8i16)__msa_pckev_b((v16i8)res15, (v16i8)res15);
-  STORE_DWORD(dest + 5 * dest_stride, __msa_copy_u_d((v2i64)res14, 0));
-  STORE_DWORD(dest + 10 * dest_stride, __msa_copy_u_d((v2i64)res15, 0));
+  SRARI_H2_SH(out14, out15, 6);
+  dst14 = LD_UB(dst + 5 * dst_stride);
+  dst15 = LD_UB(dst + 10 * dst_stride);
+  ILVR_B2_SH(zero, dst14, zero, dst15, res14, res15);
+  ADD2(res14, out14, res15, out15, res14, res15);
+  CLIP_SH2_0_255(res14, res15);
+  PCKEV_B2_SH(res14, res14, res15, res15, res14, res15);
+  ST8x1_UB(res14, dst + 5 * dst_stride);
+  ST8x1_UB(res15, dst + 10 * dst_stride);
 }
 
-void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
-                              int32_t dest_stride, int32_t tx_type) {
+void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+                              int32_t dst_stride, int32_t tx_type) {
   int32_t i;
   DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
   int16_t *out_ptr = &out[0];
@@ -897,8 +504,8 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
       /* transform columns */
       for (i = 0; i < 2; ++i) {
         /* process 8 * 16 block */
-        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)),
-                                         (dest + (i << 3)), dest_stride);
+        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
       }
       break;
     case ADST_DCT:
@@ -911,7 +518,7 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
       /* transform columns */
       for (i = 0; i < 2; ++i) {
         vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
-                                          (dest + (i << 3)), dest_stride);
+                                          (dst + (i << 3)), dst_stride);
       }
       break;
     case DCT_ADST:
@@ -924,8 +531,8 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
       /* transform columns */
       for (i = 0; i < 2; ++i) {
         /* process 8 * 16 block */
-        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)),
-                                         (dest + (i << 3)), dest_stride);
+        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
       }
       break;
     case ADST_ADST:
@@ -938,7 +545,7 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
       /* transform columns */
       for (i = 0; i < 2; ++i) {
         vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
-                                          (dest + (i << 3)), dest_stride);
+                                          (dst + (i << 3)), dst_stride);
       }
       break;
     default:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
index f576b50ea07..77d53a4c35f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
@@ -8,108 +8,34 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_idct.h"
-#include "vp9/common/mips/msa/vp9_macros_msa.h"
-
-#define DOTP_CONST_PAIR(reg0, reg1, const0, const1, out0, out1) {  \
-  v8i16 k0_m = __msa_fill_h(const0);                               \
-  v8i16 s0_m, s1_m, s2_m, s3_m;                                    \
-                                                                   \
-  s0_m = __msa_fill_h(const1);                                     \
-  k0_m = __msa_ilvev_h(s0_m, k0_m);                                \
-                                                                   \
-  s0_m = __msa_ilvl_h(-reg1, reg0);                                \
-  s1_m = __msa_ilvr_h(-reg1, reg0);                                \
-  s2_m = __msa_ilvl_h(reg0, reg1);                                 \
-  s3_m = __msa_ilvr_h(reg0, reg1);                                 \
-  s1_m = (v8i16)__msa_dotp_s_w(s1_m, k0_m);                        \
-  s0_m = (v8i16)__msa_dotp_s_w(s0_m, k0_m);                        \
-  s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS);        \
-  s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS);        \
-  out0 = __msa_pckev_h(s0_m, s1_m);                                \
-                                                                   \
-  s1_m = (v8i16)__msa_dotp_s_w(s3_m, k0_m);                        \
-  s0_m = (v8i16)__msa_dotp_s_w(s2_m, k0_m);                        \
-  s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS);        \
-  s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS);        \
-  out1 = __msa_pckev_h(s0_m, s1_m);                                \
-}
-
-#define VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS(dest, dest_stride,     \
-                                              in0, in1, in2, in3) {  \
-  uint64_t out0_m, out1_m, out2_m, out3_m;                           \
-  v8i16 res0_m, res1_m, res2_m, res3_m;                              \
-  v16u8 dest0_m, dest1_m, dest2_m, dest3_m;                          \
-  v16i8 tmp0_m, tmp1_m;                                              \
-  v16i8 zero_m = { 0 };                                              \
-  uint8_t *dst_m = (uint8_t *)(dest);                                \
-                                                                     \
-  dest0_m = LOAD_UB(dst_m);                                          \
-  dest1_m = LOAD_UB(dst_m + 4 * dest_stride);                        \
-  dest2_m = LOAD_UB(dst_m + 8 * dest_stride);                        \
-  dest3_m = LOAD_UB(dst_m + 12 * dest_stride);                       \
-                                                                     \
-  res0_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest0_m);              \
-  res1_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest1_m);              \
-  res2_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest2_m);              \
-  res3_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest3_m);              \
-                                                                     \
-  res0_m += (v8i16)(in0);                                            \
-  res1_m += (v8i16)(in1);                                            \
-  res2_m += (v8i16)(in2);                                            \
-  res3_m += (v8i16)(in3);                                            \
-                                                                     \
-  res0_m = CLIP_UNSIGNED_CHAR_H(res0_m);                             \
-  res1_m = CLIP_UNSIGNED_CHAR_H(res1_m);                             \
-  res2_m = CLIP_UNSIGNED_CHAR_H(res2_m);                             \
-  res3_m = CLIP_UNSIGNED_CHAR_H(res3_m);                             \
-                                                                     \
-  tmp0_m = __msa_pckev_b((v16i8)res1_m, (v16i8)res0_m);              \
-  tmp1_m = __msa_pckev_b((v16i8)res3_m, (v16i8)res2_m);              \
-                                                                     \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);                         \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);                         \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);                         \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);                         \
-                                                                     \
-  STORE_DWORD(dst_m, out0_m);                                        \
-  dst_m += (4 * dest_stride);                                        \
-  STORE_DWORD(dst_m, out1_m);                                        \
-  dst_m += (4 * dest_stride);                                        \
-  STORE_DWORD(dst_m, out2_m);                                        \
-  dst_m += (4 * dest_stride);                                        \
-  STORE_DWORD(dst_m, out3_m);                                        \
-}
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
 
 static void vp9_idct32x8_row_transpose_store(const int16_t *input,
                                              int16_t *tmp_buf) {
-  v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
-  v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+  v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
 
   /* 1st & 2nd 8x8 */
-  LOAD_8VECS_SH(input, 32, m0, n0, m1, n1, m2, n2, m3, n3);
-  LOAD_8VECS_SH((input + 8), 32, m4, n4, m5, n5, m6, n6, m7, n7);
-  TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
-                    m0, n0, m1, n1, m2, n2, m3, n3);
-  TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
-                    m4, n4, m5, n5, m6, n6, m7, n7);
-  STORE_4VECS_SH((tmp_buf), 8, m0, n0, m1, n1);
-  STORE_4VECS_SH((tmp_buf + 4 * 8), 8, m2, n2, m3, n3);
-  STORE_4VECS_SH((tmp_buf + 8 * 8), 8, m4, n4, m5, n5);
-  STORE_4VECS_SH((tmp_buf + 12 * 8), 8, m6, n6, m7, n7);
+  LD_SH8(input, 32, m0, n0, m1, n1, m2, n2, m3, n3);
+  LD_SH8((input + 8), 32, m4, n4, m5, n5, m6, n6, m7, n7);
+  TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                     m0, n0, m1, n1, m2, n2, m3, n3);
+  TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                     m4, n4, m5, n5, m6, n6, m7, n7);
+  ST_SH8(m0, n0, m1, n1, m2, n2, m3, n3, (tmp_buf), 8);
+  ST_SH4(m4, n4, m5, n5, (tmp_buf + 8 * 8), 8);
+  ST_SH4(m6, n6, m7, n7, (tmp_buf + 12 * 8), 8);
 
   /* 3rd & 4th 8x8 */
-  LOAD_8VECS_SH((input + 16), 32, m0, n0, m1, n1, m2, n2, m3, n3);
-  LOAD_8VECS_SH((input + 24), 32, m4, n4, m5, n5, m6, n6, m7, n7);
-  TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
-                    m0, n0, m1, n1, m2, n2, m3, n3);
-  TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
-                    m4, n4, m5, n5, m6, n6, m7, n7);
-  STORE_4VECS_SH((tmp_buf + 16 * 8), 8, m0, n0, m1, n1);
-  STORE_4VECS_SH((tmp_buf + 20 * 8), 8, m2, n2, m3, n3);
-  STORE_4VECS_SH((tmp_buf + 24 * 8), 8, m4, n4, m5, n5);
-  STORE_4VECS_SH((tmp_buf + 28 * 8), 8, m6, n6, m7, n7);
+  LD_SH8((input + 16), 32, m0, n0, m1, n1, m2, n2, m3, n3);
+  LD_SH8((input + 24), 32, m4, n4, m5, n5, m6, n6, m7, n7);
+  TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                     m0, n0, m1, n1, m2, n2, m3, n3);
+  TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                     m4, n4, m5, n5, m6, n6, m7, n7);
+  ST_SH4(m0, n0, m1, n1, (tmp_buf + 16 * 8), 8);
+  ST_SH4(m2, n2, m3, n3, (tmp_buf + 20 * 8), 8);
+  ST_SH4(m4, n4, m5, n5, (tmp_buf + 24 * 8), 8);
+  ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8);
 }
 
 static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
@@ -119,46 +45,28 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
   v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
 
   /* Even stage 1 */
-  LOAD_8VECS_SH(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  LD_SH8(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
 
-  DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
-  DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
-
-  vec0 = reg1 - reg5;
-  vec1 = reg1 + reg5;
-  vec2 = reg7 - reg3;
-  vec3 = reg7 + reg3;
-
-  DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
+  VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
+  BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0);
+  VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
 
   loc1 = vec3;
   loc0 = vec1;
 
-  DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
-  DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
-
-  vec0 = reg4 - reg6;
-  vec1 = reg4 + reg6;
-  vec2 = reg0 - reg2;
-  vec3 = reg0 + reg2;
-
-  stp4 = vec0 - loc0;
-  stp3 = vec0 + loc0;
-  stp7 = vec1 - loc1;
-  stp0 = vec1 + loc1;
-  stp5 = vec2 - loc2;
-  stp2 = vec2 + loc2;
-  stp6 = vec3 - loc3;
-  stp1 = vec3 + loc3;
+  VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
+  VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
+  BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0);
+  BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4);
+  BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5);
 
   /* Even stage 2 */
-  LOAD_8VECS_SH((tmp_buf + 16), 32,
-                reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
-  DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
-  DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
+  LD_SH8((tmp_buf + 16), 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
+  VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
 
   vec0 = reg0 + reg4;
   reg0 = reg0 - reg4;
@@ -176,54 +84,42 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
   reg4 = reg5 - vec1;
   reg5 = reg5 + vec1;
 
-  DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
-  DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
 
   vec0 = reg0 - reg6;
   reg0 = reg0 + reg6;
   vec1 = reg7 - reg1;
   reg7 = reg7 + reg1;
 
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
 
   /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */
-  loc0 = stp0 - reg5;
-  loc1 = stp0 + reg5;
-  loc2 = stp1 - reg7;
-  loc3 = stp1 + reg7;
-  STORE_SH(loc0, (tmp_eve_buf + 15 * 8));
-  STORE_SH(loc1, (tmp_eve_buf));
-  STORE_SH(loc2, (tmp_eve_buf + 14 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 8));
-
-  loc0 = stp2 - reg1;
-  loc1 = stp2 + reg1;
-  loc2 = stp3 - reg4;
-  loc3 = stp3 + reg4;
-  STORE_SH(loc0, (tmp_eve_buf + 13 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 2 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 12 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 3 * 8));
+  BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0);
+  ST_SH(loc0, (tmp_eve_buf + 15 * 8));
+  ST_SH(loc1, (tmp_eve_buf));
+  ST_SH(loc2, (tmp_eve_buf + 14 * 8));
+  ST_SH(loc3, (tmp_eve_buf + 8));
+
+  BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0);
+  ST_SH(loc0, (tmp_eve_buf + 13 * 8));
+  ST_SH(loc1, (tmp_eve_buf + 2 * 8));
+  ST_SH(loc2, (tmp_eve_buf + 12 * 8));
+  ST_SH(loc3, (tmp_eve_buf + 3 * 8));
 
   /* Store 8 */
-  loc0 = stp4 - reg3;
-  loc1 = stp4 + reg3;
-  loc2 = stp5 - reg6;
-  loc3 = stp5 + reg6;
-  STORE_SH(loc0, (tmp_eve_buf + 11 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 4 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 10 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 5 * 8));
-
-  loc0 = stp6 - reg0;
-  loc1 = stp6 + reg0;
-  loc2 = stp7 - reg2;
-  loc3 = stp7 + reg2;
-  STORE_SH(loc0, (tmp_eve_buf + 9 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 6 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 8 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 7 * 8));
+  BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0);
+  ST_SH(loc0, (tmp_eve_buf + 11 * 8));
+  ST_SH(loc1, (tmp_eve_buf + 4 * 8));
+  ST_SH(loc2, (tmp_eve_buf + 10 * 8));
+  ST_SH(loc3, (tmp_eve_buf + 5 * 8));
+
+  BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0);
+  ST_SH(loc0, (tmp_eve_buf + 9 * 8));
+  ST_SH(loc1, (tmp_eve_buf + 6 * 8));
+  ST_SH(loc2, (tmp_eve_buf + 8 * 8));
+  ST_SH(loc3, (tmp_eve_buf + 7 * 8));
 }
 
 static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
@@ -232,19 +128,19 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
 
   /* Odd stage 1 */
-  reg0 = LOAD_SH(tmp_buf + 8);
-  reg1 = LOAD_SH(tmp_buf + 7 * 8);
-  reg2 = LOAD_SH(tmp_buf + 9 * 8);
-  reg3 = LOAD_SH(tmp_buf + 15 * 8);
-  reg4 = LOAD_SH(tmp_buf + 17 * 8);
-  reg5 = LOAD_SH(tmp_buf + 23 * 8);
-  reg6 = LOAD_SH(tmp_buf + 25 * 8);
-  reg7 = LOAD_SH(tmp_buf + 31 * 8);
-
-  DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
-  DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
+  reg0 = LD_SH(tmp_buf + 8);
+  reg1 = LD_SH(tmp_buf + 7 * 8);
+  reg2 = LD_SH(tmp_buf + 9 * 8);
+  reg3 = LD_SH(tmp_buf + 15 * 8);
+  reg4 = LD_SH(tmp_buf + 17 * 8);
+  reg5 = LD_SH(tmp_buf + 23 * 8);
+  reg6 = LD_SH(tmp_buf + 25 * 8);
+  reg7 = LD_SH(tmp_buf + 31 * 8);
+
+  VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
 
   vec0 = reg0 + reg3;
   reg0 = reg0 - reg3;
@@ -257,262 +153,192 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
   reg5 = vec0;
 
   /* 4 Stores */
-  vec0 = reg5 + reg4;
-  vec1 = reg3 + reg2;
-  STORE_SH(vec0, (tmp_odd_buf + 4 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 5 * 8));
+  ADD2(reg5, reg4, reg3, reg2, vec0, vec1);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8);
 
-  vec0 = reg5 - reg4;
-  vec1 = reg3 - reg2;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
-  STORE_SH(vec0, (tmp_odd_buf));
-  STORE_SH(vec1, (tmp_odd_buf + 8));
+  SUB2(reg5, reg4, reg3, reg2, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
+  ST_SH2(vec0, vec1, (tmp_odd_buf), 8);
 
   /* 4 Stores */
-  DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
-
-  vec0 = reg0 + reg1;
-  vec2 = reg7 - reg6;
-  vec1 = reg7 + reg6;
-  vec3 = reg0 - reg1;
-  STORE_SH(vec0, (tmp_odd_buf + 6 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 7 * 8));
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
+  BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8);
 
-  DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
-  STORE_SH(vec2, (tmp_odd_buf + 2 * 8));
-  STORE_SH(vec3, (tmp_odd_buf + 3 * 8));
+  VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
+  ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8);
 
   /* Odd stage 2 */
-
   /* 8 loads */
-  reg0 = LOAD_SH(tmp_buf + 3 * 8);
-  reg1 = LOAD_SH(tmp_buf + 5 * 8);
-  reg2 = LOAD_SH(tmp_buf + 11 * 8);
-  reg3 = LOAD_SH(tmp_buf + 13 * 8);
-  reg4 = LOAD_SH(tmp_buf + 19 * 8);
-  reg5 = LOAD_SH(tmp_buf + 21 * 8);
-  reg6 = LOAD_SH(tmp_buf + 27 * 8);
-  reg7 = LOAD_SH(tmp_buf + 29 * 8);
-
-  DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
-  DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
-  DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
+  reg0 = LD_SH(tmp_buf + 3 * 8);
+  reg1 = LD_SH(tmp_buf + 5 * 8);
+  reg2 = LD_SH(tmp_buf + 11 * 8);
+  reg3 = LD_SH(tmp_buf + 13 * 8);
+  reg4 = LD_SH(tmp_buf + 19 * 8);
+  reg5 = LD_SH(tmp_buf + 21 * 8);
+  reg6 = LD_SH(tmp_buf + 27 * 8);
+  reg7 = LD_SH(tmp_buf + 29 * 8);
+
+  VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
+  VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
 
   /* 4 Stores */
-  vec0 = reg1 - reg2;
-  vec1 = reg6 - reg5;
-  vec2 = reg0 - reg3;
-  vec3 = reg7 - reg4;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
-  DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
-
-  vec2 = loc2 - loc0;
-  vec3 = loc3 - loc1;
-  vec0 = loc2 + loc0;
-  vec1 = loc3 + loc1;
-  STORE_SH(vec0, (tmp_odd_buf + 12 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 15 * 8));
+  SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4,
+       vec0, vec1, vec2, vec3);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
+  VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
 
-  DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+  BUTTERFLY_4(loc3, loc2, loc0, loc1, vec1, vec0, vec2, vec3);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8);
 
-  STORE_SH(vec0, (tmp_odd_buf + 10 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 11 * 8));
+  VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8);
 
   /* 4 Stores */
-  vec0 = reg0 + reg3;
-  vec1 = reg1 + reg2;
-  vec2 = reg6 + reg5;
-  vec3 = reg7 + reg4;
-  reg0 = vec0 + vec1;
-  reg1 = vec3 + vec2;
-  reg2 = vec0 - vec1;
-  reg3 = vec3 - vec2;
-  STORE_SH(reg0, (tmp_odd_buf + 13 * 8));
-  STORE_SH(reg1, (tmp_odd_buf + 14 * 8));
-
-  DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+  ADD4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4,
+       vec1, vec2, vec0, vec3);
+  BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2);
+  ST_SH(reg0, (tmp_odd_buf + 13 * 8));
+  ST_SH(reg1, (tmp_odd_buf + 14 * 8));
 
-  STORE_SH(reg0, (tmp_odd_buf + 8 * 8));
-  STORE_SH(reg1, (tmp_odd_buf + 9 * 8));
+  VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+  ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8);
 
   /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
 
   /* Load 8 & Store 8 */
-  reg0 = LOAD_SH(tmp_odd_buf);
-  reg1 = LOAD_SH(tmp_odd_buf + 1 * 8);
-  reg2 = LOAD_SH(tmp_odd_buf + 2 * 8);
-  reg3 = LOAD_SH(tmp_odd_buf + 3 * 8);
-  reg4 = LOAD_SH(tmp_odd_buf + 8 * 8);
-  reg5 = LOAD_SH(tmp_odd_buf + 9 * 8);
-  reg6 = LOAD_SH(tmp_odd_buf + 10 * 8);
-  reg7 = LOAD_SH(tmp_odd_buf + 11 * 8);
-
-  loc0 = reg0 + reg4;
-  loc1 = reg1 + reg5;
-  loc2 = reg2 + reg6;
-  loc3 = reg3 + reg7;
-  STORE_SH(loc0, (tmp_odd_buf));
-  STORE_SH(loc1, (tmp_odd_buf + 1 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 2 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 3 * 8));
-
-  vec0 = reg0 - reg4;
-  vec1 = reg1 - reg5;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
-  vec0 = reg2 - reg6;
-  vec1 = reg3 - reg7;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
-  STORE_SH(loc0, (tmp_odd_buf + 8 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 9 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 10 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 11 * 8));
+  LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3);
+  LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7);
+
+  ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+       loc0, loc1, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8);
+
+  SUB2(reg0, reg4, reg1, reg5, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+  SUB2(reg2, reg6, reg3, reg7, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8);
 
   /* Load 8 & Store 8 */
-  reg1 = LOAD_SH(tmp_odd_buf + 4 * 8);
-  reg2 = LOAD_SH(tmp_odd_buf + 5 * 8);
-  reg0 = LOAD_SH(tmp_odd_buf + 6 * 8);
-  reg3 = LOAD_SH(tmp_odd_buf + 7 * 8);
-  reg4 = LOAD_SH(tmp_odd_buf + 12 * 8);
-  reg5 = LOAD_SH(tmp_odd_buf + 13 * 8);
-  reg6 = LOAD_SH(tmp_odd_buf + 14 * 8);
-  reg7 = LOAD_SH(tmp_odd_buf + 15 * 8);
-
-  loc0 = reg0 + reg4;
-  loc1 = reg1 + reg5;
-  loc2 = reg2 + reg6;
-  loc3 = reg3 + reg7;
-  STORE_SH(loc0, (tmp_odd_buf + 4 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 5 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 6 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 7 * 8));
-
-  vec0 = reg0 - reg4;
-  vec1 = reg3 - reg7;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
-  vec0 = reg1 - reg5;
-  vec1 = reg2 - reg6;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
-  STORE_SH(loc0, (tmp_odd_buf + 12 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 13 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 14 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 15 * 8));
+  LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3);
+  LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7);
+
+  ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+       loc0, loc1, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8);
+
+  SUB2(reg0, reg4, reg3, reg7, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+  SUB2(reg1, reg5, reg2, reg6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
 }
 
 static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf,
                                                int16_t *tmp_eve_buf,
                                                int16_t *tmp_odd_buf,
-                                               int16_t *dest) {
+                                               int16_t *dst) {
   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
-  v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
-  v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+  v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
 
   /* FINAL BUTTERFLY : Dependency on Even & Odd */
-  /* Total: 32 loads, 32 stores */
-  vec0 = LOAD_SH(tmp_odd_buf);
-  vec1 = LOAD_SH(tmp_odd_buf + 9 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 14 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 6 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf);
-  loc1 = LOAD_SH(tmp_eve_buf + 8 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 4 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 12 * 8);
-
-  m0 = (loc0 + vec3);
-  STORE_SH((loc0 - vec3), (tmp_buf + 31 * 8));
-  STORE_SH((loc1 - vec2), (tmp_buf + 23 * 8));
-  m4 = (loc1 + vec2);
-  STORE_SH((loc2 - vec1), (tmp_buf + 27 * 8));
-  m2 = (loc2 + vec1);
-  STORE_SH((loc3 - vec0), (tmp_buf + 19 * 8));
-  m6 = (loc3 + vec0);
+  vec0 = LD_SH(tmp_odd_buf);
+  vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+  loc0 = LD_SH(tmp_eve_buf);
+  loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+
+  ST_SH((loc0 - vec3), (tmp_buf + 31 * 8));
+  ST_SH((loc1 - vec2), (tmp_buf + 23 * 8));
+  ST_SH((loc2 - vec1), (tmp_buf + 27 * 8));
+  ST_SH((loc3 - vec0), (tmp_buf + 19 * 8));
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 4 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 13 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 10 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 3 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 2 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 10 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 6 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 14 * 8);
-
-  m1 = (loc0 + vec3);
-  STORE_SH((loc0 - vec3), (tmp_buf + 29 * 8));
-  STORE_SH((loc1 - vec2), (tmp_buf + 21 * 8));
-  m5 = (loc1 + vec2);
-  STORE_SH((loc2 - vec1), (tmp_buf + 25 * 8));
-  m3 = (loc2 + vec1);
-  STORE_SH((loc3 - vec0), (tmp_buf + 17 * 8));
-  m7 = (loc3 + vec0);
+  vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+
+  ST_SH((loc0 - vec3), (tmp_buf + 29 * 8));
+  ST_SH((loc1 - vec2), (tmp_buf + 21 * 8));
+  ST_SH((loc2 - vec1), (tmp_buf + 25 * 8));
+  ST_SH((loc3 - vec0), (tmp_buf + 17 * 8));
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 2 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 11 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 12 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 7 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 1 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 9 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 5 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 13 * 8);
-
-  n0 = (loc0 + vec3);
-  STORE_SH((loc0 - vec3), (tmp_buf + 30 * 8));
-  STORE_SH((loc1 - vec2), (tmp_buf + 22 * 8));
-  n4 = (loc1 + vec2);
-  STORE_SH((loc2 - vec1), (tmp_buf + 26 * 8));
-  n2 = (loc2 + vec1);
-  STORE_SH((loc3 - vec0), (tmp_buf + 18 * 8));
-  n6 = (loc3 + vec0);
+  vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+
+  ST_SH((loc0 - vec3), (tmp_buf + 30 * 8));
+  ST_SH((loc1 - vec2), (tmp_buf + 22 * 8));
+  ST_SH((loc2 - vec1), (tmp_buf + 26 * 8));
+  ST_SH((loc3 - vec0), (tmp_buf + 18 * 8));
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 5 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 15 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 8 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 1 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 3 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 11 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 7 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 15 * 8);
-
-  n1 = (loc0 + vec3);
-  STORE_SH((loc0 - vec3), (tmp_buf + 28 * 8));
-  STORE_SH((loc1 - vec2), (tmp_buf + 20 * 8));
-  n5 = (loc1 + vec2);
-  STORE_SH((loc2 - vec1), (tmp_buf + 24 * 8));
-  n3 = (loc2 + vec1);
-  STORE_SH((loc3 - vec0), (tmp_buf + 16 * 8));
-  n7 = (loc3 + vec0);
+  vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+
+  ST_SH((loc0 - vec3), (tmp_buf + 28 * 8));
+  ST_SH((loc1 - vec2), (tmp_buf + 20 * 8));
+  ST_SH((loc2 - vec1), (tmp_buf + 24 * 8));
+  ST_SH((loc3 - vec0), (tmp_buf + 16 * 8));
 
   /* Transpose : 16 vectors */
   /* 1st & 2nd 8x8 */
-  TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
-                    m0, n0, m1, n1, m2, n2, m3, n3);
-  STORE_4VECS_SH((dest + 0), 32, m0, n0, m1, n1);
-  STORE_4VECS_SH((dest + 4 * 32), 32, m2, n2, m3, n3);
+  TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                     m0, n0, m1, n1, m2, n2, m3, n3);
+  ST_SH4(m0, n0, m1, n1, (dst + 0), 32);
+  ST_SH4(m2, n2, m3, n3, (dst + 4 * 32), 32);
 
-  TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
-                    m4, n4, m5, n5, m6, n6, m7, n7);
-  STORE_4VECS_SH((dest + 8), 32, m4, n4, m5, n5);
-  STORE_4VECS_SH((dest + 8 + 4 * 32), 32, m6, n6, m7, n7);
+  TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                     m4, n4, m5, n5, m6, n6, m7, n7);
+  ST_SH4(m4, n4, m5, n5, (dst + 8), 32);
+  ST_SH4(m6, n6, m7, n7, (dst + 8 + 4 * 32), 32);
 
   /* 3rd & 4th 8x8 */
-  LOAD_8VECS_SH((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3);
-  LOAD_8VECS_SH((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7);
-  TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
-                    m0, n0, m1, n1, m2, n2, m3, n3);
-  STORE_4VECS_SH((dest + 16), 32, m0, n0, m1, n1);
-  STORE_4VECS_SH((dest + 16 + 4 * 32), 32, m2, n2, m3, n3);
-
-  TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
-                    m4, n4, m5, n5, m6, n6, m7, n7);
-  STORE_4VECS_SH((dest + 24), 32, m4, n4, m5, n5);
-  STORE_4VECS_SH((dest + 24 + 4 * 32), 32, m6, n6, m7, n7);
+  LD_SH8((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3);
+  LD_SH8((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7);
+  TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                     m0, n0, m1, n1, m2, n2, m3, n3);
+  ST_SH4(m0, n0, m1, n1, (dst + 16), 32);
+  ST_SH4(m2, n2, m3, n3, (dst + 16 + 4 * 32), 32);
+
+  TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                     m4, n4, m5, n5, m6, n6, m7, n7);
+  ST_SH4(m4, n4, m5, n5, (dst + 24), 32);
+  ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32);
 }
 
 static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
@@ -521,11 +347,8 @@ static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
   DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
 
   vp9_idct32x8_row_transpose_store(input, &tmp_buf[0]);
-
   vp9_idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);
-
   vp9_idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);
-
   vp9_idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],
                                      &tmp_odd_buf[0], output);
 }
@@ -537,48 +360,31 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
   v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
 
   /* Even stage 1 */
-  LOAD_8VECS_SH(tmp_buf, (4 * 32),
-                reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  tmp_buf += (2 * 32);
 
-  DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
-  DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
-
-  vec0 = reg1 - reg5;
-  vec1 = reg1 + reg5;
-  vec2 = reg7 - reg3;
-  vec3 = reg7 + reg3;
-
-  DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
+  VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
+  BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0);
+  VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
 
   loc1 = vec3;
   loc0 = vec1;
 
-  DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
-  DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
-
-  vec0 = reg4 - reg6;
-  vec1 = reg4 + reg6;
-  vec2 = reg0 - reg2;
-  vec3 = reg0 + reg2;
-
-  stp4 = vec0 - loc0;
-  stp3 = vec0 + loc0;
-  stp7 = vec1 - loc1;
-  stp0 = vec1 + loc1;
-  stp5 = vec2 - loc2;
-  stp2 = vec2 + loc2;
-  stp6 = vec3 - loc3;
-  stp1 = vec3 + loc3;
+  VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
+  VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
+  BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0);
+  BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4);
+  BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5);
 
   /* Even stage 2 */
   /* Load 8 */
-  LOAD_8VECS_SH((tmp_buf + 2 * 32), (4 * 32),
-                reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+  LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
 
-  DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
-  DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
+  VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
+  VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
 
   vec0 = reg0 + reg4;
   reg0 = reg0 - reg4;
@@ -596,55 +402,35 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
   reg4 = reg5 - vec1;
   reg5 = reg5 + vec1;
 
-  DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
-  DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
 
   vec0 = reg0 - reg6;
   reg0 = reg0 + reg6;
   vec1 = reg7 - reg1;
   reg7 = reg7 + reg1;
 
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
 
   /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */
   /* Store 8 */
-  loc0 = stp0 - reg5;
-  loc1 = stp0 + reg5;
-  loc2 = stp1 - reg7;
-  loc3 = stp1 + reg7;
-  STORE_SH(loc0, (tmp_eve_buf + 15 * 8));
-  STORE_SH(loc1, (tmp_eve_buf));
-  STORE_SH(loc2, (tmp_eve_buf + 14 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 1 * 8));
-
-  loc0 = stp2 - reg1;
-  loc1 = stp2 + reg1;
-  loc2 = stp3 - reg4;
-  loc3 = stp3 + reg4;
-  STORE_SH(loc0, (tmp_eve_buf + 13 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 2 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 12 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 3 * 8));
+  BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0);
+  ST_SH2(loc1, loc3, tmp_eve_buf, 8);
+  ST_SH2(loc2, loc0, (tmp_eve_buf + 14 * 8), 8);
+
+  BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0);
+  ST_SH2(loc1, loc3, (tmp_eve_buf + 2 * 8), 8);
+  ST_SH2(loc2, loc0, (tmp_eve_buf + 12 * 8), 8);
 
   /* Store 8 */
-  loc0 = stp4 - reg3;
-  loc1 = stp4 + reg3;
-  loc2 = stp5 - reg6;
-  loc3 = stp5 + reg6;
-  STORE_SH(loc0, (tmp_eve_buf + 11 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 4 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 10 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 5 * 8));
-
-  loc0 = stp6 - reg0;
-  loc1 = stp6 + reg0;
-  loc2 = stp7 - reg2;
-  loc3 = stp7 + reg2;
-  STORE_SH(loc0, (tmp_eve_buf + 9 * 8));
-  STORE_SH(loc1, (tmp_eve_buf + 6 * 8));
-  STORE_SH(loc2, (tmp_eve_buf + 8 * 8));
-  STORE_SH(loc3, (tmp_eve_buf + 7 * 8));
+  BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0);
+  ST_SH2(loc1, loc3, (tmp_eve_buf + 4 * 8), 8);
+  ST_SH2(loc2, loc0, (tmp_eve_buf + 10 * 8), 8);
+
+  BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0);
+  ST_SH2(loc1, loc3, (tmp_eve_buf + 6 * 8), 8);
+  ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8);
 }
 
 static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
@@ -653,19 +439,19 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
   v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
 
   /* Odd stage 1 */
-  reg0 = LOAD_SH(tmp_buf + 32);
-  reg1 = LOAD_SH(tmp_buf + 7 * 32);
-  reg2 = LOAD_SH(tmp_buf + 9 * 32);
-  reg3 = LOAD_SH(tmp_buf + 15 * 32);
-  reg4 = LOAD_SH(tmp_buf + 17 * 32);
-  reg5 = LOAD_SH(tmp_buf + 23 * 32);
-  reg6 = LOAD_SH(tmp_buf + 25 * 32);
-  reg7 = LOAD_SH(tmp_buf + 31 * 32);
-
-  DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
-  DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
+  reg0 = LD_SH(tmp_buf + 32);
+  reg1 = LD_SH(tmp_buf + 7 * 32);
+  reg2 = LD_SH(tmp_buf + 9 * 32);
+  reg3 = LD_SH(tmp_buf + 15 * 32);
+  reg4 = LD_SH(tmp_buf + 17 * 32);
+  reg5 = LD_SH(tmp_buf + 23 * 32);
+  reg6 = LD_SH(tmp_buf + 25 * 32);
+  reg7 = LD_SH(tmp_buf + 31 * 32);
+
+  VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
 
   vec0 = reg0 + reg3;
   reg0 = reg0 - reg3;
@@ -678,278 +464,182 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
   reg5 = vec0;
 
   /* 4 Stores */
-  vec0 = reg5 + reg4;
-  vec1 = reg3 + reg2;
-  STORE_SH(vec0, (tmp_odd_buf + 4 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 5 * 8));
-
-  vec0 = reg5 - reg4;
-  vec1 = reg3 - reg2;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
-  STORE_SH(vec0, (tmp_odd_buf));
-  STORE_SH(vec1, (tmp_odd_buf + 1 * 8));
+  ADD2(reg5, reg4, reg3, reg2, vec0, vec1);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8);
+  SUB2(reg5, reg4, reg3, reg2, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
+  ST_SH2(vec0, vec1, tmp_odd_buf, 8);
 
   /* 4 Stores */
-  DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
-  DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
-
-  vec0 = reg0 + reg1;
-  vec2 = reg7 - reg6;
-  vec1 = reg7 + reg6;
-  vec3 = reg0 - reg1;
-  STORE_SH(vec0, (tmp_odd_buf + 6 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 7 * 8));
-
-  DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
-  STORE_SH(vec2, (tmp_odd_buf + 2 * 8));
-  STORE_SH(vec3, (tmp_odd_buf + 3 * 8));
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
+  VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
+  BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8);
+  VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
+  ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8);
 
   /* Odd stage 2 */
   /* 8 loads */
-  reg0 = LOAD_SH(tmp_buf + 3 * 32);
-  reg1 = LOAD_SH(tmp_buf + 5 * 32);
-  reg2 = LOAD_SH(tmp_buf + 11 * 32);
-  reg3 = LOAD_SH(tmp_buf + 13 * 32);
-  reg4 = LOAD_SH(tmp_buf + 19 * 32);
-  reg5 = LOAD_SH(tmp_buf + 21 * 32);
-  reg6 = LOAD_SH(tmp_buf + 27 * 32);
-  reg7 = LOAD_SH(tmp_buf + 29 * 32);
-
-  DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
-  DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
-  DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
-  DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
+  reg0 = LD_SH(tmp_buf + 3 * 32);
+  reg1 = LD_SH(tmp_buf + 5 * 32);
+  reg2 = LD_SH(tmp_buf + 11 * 32);
+  reg3 = LD_SH(tmp_buf + 13 * 32);
+  reg4 = LD_SH(tmp_buf + 19 * 32);
+  reg5 = LD_SH(tmp_buf + 21 * 32);
+  reg6 = LD_SH(tmp_buf + 27 * 32);
+  reg7 = LD_SH(tmp_buf + 29 * 32);
+
+  VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
+  VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
+  VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
+  VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
 
   /* 4 Stores */
-  vec0 = reg1 - reg2;
-  vec1 = reg6 - reg5;
-  vec2 = reg0 - reg3;
-  vec3 = reg7 - reg4;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
-  DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
+  SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
+  VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
+  BUTTERFLY_4(loc2, loc3, loc1, loc0, vec0, vec1, vec3, vec2);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8);
+  VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+  ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8);
 
-  vec2 = loc2 - loc0;
-  vec3 = loc3 - loc1;
-  vec0 = loc2 + loc0;
-  vec1 = loc3 + loc1;
-  STORE_SH(vec0, (tmp_odd_buf + 12 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 15 * 8));
-
-  DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+  /* 4 Stores */
+  ADD4(reg0, reg3, reg1, reg2, reg5, reg6, reg4, reg7, vec0, vec1, vec2, vec3);
+  BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2);
+  ST_SH2(reg0, reg1, (tmp_odd_buf + 13 * 8), 8);
+  VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+  ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8);
 
-  STORE_SH(vec0, (tmp_odd_buf + 10 * 8));
-  STORE_SH(vec1, (tmp_odd_buf + 11 * 8));
+  /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
+  /* Load 8 & Store 8 */
+  LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3);
+  LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7);
 
-  /* 4 Stores */
-  vec0 = reg0 + reg3;
-  vec1 = reg1 + reg2;
-  vec2 = reg6 + reg5;
-  vec3 = reg7 + reg4;
-  reg0 = vec0 + vec1;
-  reg1 = vec3 + vec2;
-  reg2 = vec0 - vec1;
-  reg3 = vec3 - vec2;
-  STORE_SH(reg0, (tmp_odd_buf + 13 * 8));
-  STORE_SH(reg1, (tmp_odd_buf + 14 * 8));
+  ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8);
 
-  DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+  SUB2(reg0, reg4, reg1, reg5, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
 
-  STORE_SH(reg0, (tmp_odd_buf + 8 * 8));
-  STORE_SH(reg1, (tmp_odd_buf + 9 * 8));
+  SUB2(reg2, reg6, reg3, reg7, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8);
 
-  /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
   /* Load 8 & Store 8 */
-  reg0 = LOAD_SH(tmp_odd_buf);
-  reg1 = LOAD_SH(tmp_odd_buf + 1 * 8);
-  reg2 = LOAD_SH(tmp_odd_buf + 2 * 8);
-  reg3 = LOAD_SH(tmp_odd_buf + 3 * 8);
-  reg4 = LOAD_SH(tmp_odd_buf + 8 * 8);
-  reg5 = LOAD_SH(tmp_odd_buf + 9 * 8);
-  reg6 = LOAD_SH(tmp_odd_buf + 10 * 8);
-  reg7 = LOAD_SH(tmp_odd_buf + 11 * 8);
-
-  loc0 = reg0 + reg4;
-  loc1 = reg1 + reg5;
-  loc2 = reg2 + reg6;
-  loc3 = reg3 + reg7;
-  STORE_SH(loc0, (tmp_odd_buf));
-  STORE_SH(loc1, (tmp_odd_buf + 1 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 2 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 3 * 8));
-
-  vec0 = reg0 - reg4;
-  vec1 = reg1 - reg5;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
-  vec0 = reg2 - reg6;
-  vec1 = reg3 - reg7;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
-  STORE_SH(loc0, (tmp_odd_buf + 8 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 9 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 10 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 11 * 8));
+  LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3);
+  LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7);
 
-  /* Load 8 & Store 8 */
-  reg1 = LOAD_SH(tmp_odd_buf + 4 * 8);
-  reg2 = LOAD_SH(tmp_odd_buf + 5 * 8);
-  reg0 = LOAD_SH(tmp_odd_buf + 6 * 8);
-  reg3 = LOAD_SH(tmp_odd_buf + 7 * 8);
-  reg4 = LOAD_SH(tmp_odd_buf + 12 * 8);
-  reg5 = LOAD_SH(tmp_odd_buf + 13 * 8);
-  reg6 = LOAD_SH(tmp_odd_buf + 14 * 8);
-  reg7 = LOAD_SH(tmp_odd_buf + 15 * 8);
-
-  loc0 = reg0 + reg4;
-  loc1 = reg1 + reg5;
-  loc2 = reg2 + reg6;
-  loc3 = reg3 + reg7;
-  STORE_SH(loc0, (tmp_odd_buf + 4 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 5 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 6 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 7 * 8));
-
-  vec0 = reg0 - reg4;
-  vec1 = reg3 - reg7;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
-  vec0 = reg1 - reg5;
-  vec1 = reg2 - reg6;
-  DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
-  STORE_SH(loc0, (tmp_odd_buf + 12 * 8));
-  STORE_SH(loc1, (tmp_odd_buf + 13 * 8));
-  STORE_SH(loc2, (tmp_odd_buf + 14 * 8));
-  STORE_SH(loc3, (tmp_odd_buf + 15 * 8));
+  ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8);
+
+  SUB2(reg0, reg4, reg3, reg7, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+  SUB2(reg1, reg5, reg2, reg6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+  ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
 }
 
 static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
                                                  int16_t *tmp_odd_buf,
-                                                 uint8_t *dest,
-                                                 int32_t dest_stride) {
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride) {
   v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
-  v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
-  v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+  v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
 
   /* FINAL BUTTERFLY : Dependency on Even & Odd */
-  vec0 = LOAD_SH(tmp_odd_buf);
-  vec1 = LOAD_SH(tmp_odd_buf + 9 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 14 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 6 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf);
-  loc1 = LOAD_SH(tmp_eve_buf + 8 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 4 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 12 * 8);
-
-  m0 = (loc0 + vec3);
-  m4 = (loc1 + vec2);
-  m2 = (loc2 + vec1);
-  m6 = (loc3 + vec0);
-  SRARI_H_4VECS_SH(m0, m2, m4, m6, m0, m2, m4, m6, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS(dest, dest_stride, m0, m2, m4, m6);
-
-  m6 = (loc0 - vec3);
-  m2 = (loc1 - vec2);
-  m4 = (loc2 - vec1);
-  m0 = (loc3 - vec0);
-  SRARI_H_4VECS_SH(m0, m2, m4, m6, m0, m2, m4, m6, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 19 * dest_stride),
-                                        dest_stride, m0, m2, m4, m6);
+  vec0 = LD_SH(tmp_odd_buf);
+  vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+  loc0 = LD_SH(tmp_eve_buf);
+  loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+  SRARI_H4_SH(m0, m2, m4, m6, 6);
+  VP9_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6);
+
+  SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m6, m2, m4, m0);
+  SRARI_H4_SH(m0, m2, m4, m6, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride),
+                      m0, m2, m4, m6);
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 4 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 13 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 10 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 3 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 2 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 10 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 6 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 14 * 8);
-
-  m1 = (loc0 + vec3);
-  m5 = (loc1 + vec2);
-  m3 = (loc2 + vec1);
-  m7 = (loc3 + vec0);
-  SRARI_H_4VECS_SH(m1, m3, m5, m7, m1, m3, m5, m7, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 2 * dest_stride),
-                                        dest_stride, m1, m3, m5, m7);
-
-  m7 = (loc0 - vec3);
-  m3 = (loc1 - vec2);
-  m5 = (loc2 - vec1);
-  m1 = (loc3 - vec0);
-  SRARI_H_4VECS_SH(m1, m3, m5, m7, m1, m3, m5, m7, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 17 * dest_stride),
-                                        dest_stride, m1, m3, m5, m7);
+  vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+  SRARI_H4_SH(m1, m3, m5, m7, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride),
+                      m1, m3, m5, m7);
+
+  SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m7, m3, m5, m1);
+  SRARI_H4_SH(m1, m3, m5, m7, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride),
+                      m1, m3, m5, m7);
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 2 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 11 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 12 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 7 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 1 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 9 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 5 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 13 * 8);
-
-  n0 = (loc0 + vec3);
-  n4 = (loc1 + vec2);
-  n2 = (loc2 + vec1);
-  n6 = (loc3 + vec0);
-  SRARI_H_4VECS_SH(n0, n2, n4, n6, n0, n2, n4, n6, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 1 * dest_stride),
-                                        dest_stride, n0, n2, n4, n6);
-
-  n6 = (loc0 - vec3);
-  n2 = (loc1 - vec2);
-  n4 = (loc2 - vec1);
-  n0 = (loc3 - vec0);
-  SRARI_H_4VECS_SH(n0, n2, n4, n6, n0, n2, n4, n6, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 18 * dest_stride),
-                                        dest_stride, n0, n2, n4, n6);
+  vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+  SRARI_H4_SH(n0, n2, n4, n6, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride),
+                      n0, n2, n4, n6);
+
+  SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n6, n2, n4, n0);
+  SRARI_H4_SH(n0, n2, n4, n6, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride),
+                      n0, n2, n4, n6);
 
   /* Load 8 & Store 8 */
-  vec0 = LOAD_SH(tmp_odd_buf + 5 * 8);
-  vec1 = LOAD_SH(tmp_odd_buf + 15 * 8);
-  vec2 = LOAD_SH(tmp_odd_buf + 8 * 8);
-  vec3 = LOAD_SH(tmp_odd_buf + 1 * 8);
-  loc0 = LOAD_SH(tmp_eve_buf + 3 * 8);
-  loc1 = LOAD_SH(tmp_eve_buf + 11 * 8);
-  loc2 = LOAD_SH(tmp_eve_buf + 7 * 8);
-  loc3 = LOAD_SH(tmp_eve_buf + 15 * 8);
-
-  n1 = (loc0 + vec3);
-  n5 = (loc1 + vec2);
-  n3 = (loc2 + vec1);
-  n7 = (loc3 + vec0);
-  SRARI_H_4VECS_SH(n1, n3, n5, n7, n1, n3, n5, n7, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 3 * dest_stride),
-                                        dest_stride, n1, n3, n5, n7);
-
-  n7 = (loc0 - vec3);
-  n3 = (loc1 - vec2);
-  n5 = (loc2 - vec1);
-  n1 = (loc3 - vec0);
-  SRARI_H_4VECS_SH(n1, n3, n5, n7, n1, n3, n5, n7, 6);
-  VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 16 * dest_stride),
-                                        dest_stride, n1, n3, n5, n7);
+  vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+  vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+  vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+  vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+  loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+  loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+  loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+  loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+  ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+  SRARI_H4_SH(n1, n3, n5, n7, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride),
+                      n1, n3, n5, n7);
+
+  SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n7, n3, n5, n1);
+  SRARI_H4_SH(n1, n3, n5, n7, 6);
+  VP9_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride),
+                      n1, n3, n5, n7);
 }
 
-static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
-                                               int32_t dest_stride) {
+static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                               int32_t dst_stride) {
   DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);
   DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
 
   vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
-
   vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
-
   vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],
-                                       dest, dest_stride);
+                                       dst, dst_stride);
 }
 
-void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dest,
-                                int32_t dest_stride) {
+void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,
+                                int32_t dst_stride) {
   int32_t i;
   DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
   int16_t *out_ptr = out_arr;
@@ -963,13 +653,13 @@ void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dest,
   /* transform columns */
   for (i = 0; i < 4; ++i) {
     /* process 8 * 32 block */
-    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dest + (i << 3)),
-                                       dest_stride);
+    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                       dst_stride);
   }
 }
 
-void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dest,
-                              int32_t dest_stride) {
+void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,
+                              int32_t dst_stride) {
   int32_t i;
   DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
   int16_t *out_ptr = out_arr;
@@ -1008,70 +698,42 @@ void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dest,
   /* transform columns */
   for (i = 0; i < 4; ++i) {
     /* process 8 * 32 block */
-    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dest + (i << 3)),
-                                       dest_stride);
+    vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                       dst_stride);
   }
 }
 
-void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dest,
-                             int32_t dest_stride) {
-  int32_t i, const1;
-  v8i16 const2;
+void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,
+                             int32_t dst_stride) {
+  int32_t i;
   int16_t out;
-  v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
-  v16u8 dest0, dest1, dest2, dest3;
-  v16u8 tmp0, tmp1, tmp2, tmp3;
-  v16i8 zero = { 0 };
-
-  out = dct_const_round_shift(input[0] * cospi_16_64);
-  out = dct_const_round_shift(out * cospi_16_64);
-  const1 = ROUND_POWER_OF_TWO(out, 6);
-
-  const2 = __msa_fill_h(const1);
-
-  for (i = 0; i < 16; ++i) {
-    dest0 = LOAD_UB(dest);
-    dest1 = LOAD_UB(dest + 16);
-    dest2 = LOAD_UB(dest + dest_stride);
-    dest3 = LOAD_UB(dest + dest_stride + 16);
-
-    res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
-    res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
-    res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
-    res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
-    res4 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest0);
-    res5 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest1);
-    res6 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest2);
-    res7 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest3);
-
-    res0 += const2;
-    res1 += const2;
-    res2 += const2;
-    res3 += const2;
-    res4 += const2;
-    res5 += const2;
-    res6 += const2;
-    res7 += const2;
-
-    res0 = CLIP_UNSIGNED_CHAR_H(res0);
-    res1 = CLIP_UNSIGNED_CHAR_H(res1);
-    res2 = CLIP_UNSIGNED_CHAR_H(res2);
-    res3 = CLIP_UNSIGNED_CHAR_H(res3);
-    res4 = CLIP_UNSIGNED_CHAR_H(res4);
-    res5 = CLIP_UNSIGNED_CHAR_H(res5);
-    res6 = CLIP_UNSIGNED_CHAR_H(res6);
-    res7 = CLIP_UNSIGNED_CHAR_H(res7);
-
-    tmp0 = (v16u8)__msa_pckev_b((v16i8)res4, (v16i8)res0);
-    tmp1 = (v16u8)__msa_pckev_b((v16i8)res5, (v16i8)res1);
-    tmp2 = (v16u8)__msa_pckev_b((v16i8)res6, (v16i8)res2);
-    tmp3 = (v16u8)__msa_pckev_b((v16i8)res7, (v16i8)res3);
-
-    STORE_UB(tmp0, dest);
-    STORE_UB(tmp1, dest + 16);
-    dest += dest_stride;
-    STORE_UB(tmp2, dest);
-    STORE_UB(tmp3, dest + 16);
-    dest += dest_stride;
+  v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+  v8i16 res0, res1, res2, res3, res4, res5, res6, res7, vec;
+
+  out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO(out, 6);
+
+  vec = __msa_fill_h(out);
+
+  for (i = 16; i--;) {
+    LD_UB2(dst, 16, dst0, dst1);
+    LD_UB2(dst + dst_stride, 16, dst2, dst3);
+
+    UNPCK_UB_SH(dst0, res0, res4);
+    UNPCK_UB_SH(dst1, res1, res5);
+    UNPCK_UB_SH(dst2, res2, res6);
+    UNPCK_UB_SH(dst3, res3, res7);
+    ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
+    ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7);
+    CLIP_SH4_0_255(res0, res1, res2, res3);
+    CLIP_SH4_0_255(res4, res5, res6, res7);
+    PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+                tmp0, tmp1, tmp2, tmp3);
+
+    ST_UB2(tmp0, tmp1, dst, 16);
+    dst += dst_stride;
+    ST_UB2(tmp2, tmp3, dst, 16);
+    dst += dst_stride;
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
new file mode 100644
index 00000000000..a3df1a4d320
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
@@ -0,0 +1,147 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
+
+void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride) {
+  v8i16 in0, in1, in2, in3;
+  v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;
+
+  /* load vector elements of 4x4 block */
+  LD4x4_SH(input, in0, in2, in3, in1);
+  TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
+  UNPCK_R_SH_SW(in0, in0_r);
+  UNPCK_R_SH_SW(in2, in2_r);
+  UNPCK_R_SH_SW(in3, in3_r);
+  UNPCK_R_SH_SW(in1, in1_r);
+  SRA_4V(in0_r, in1_r, in2_r, in3_r, UNIT_QUANT_SHIFT);
+
+  in0_r += in2_r;
+  in3_r -= in1_r;
+  in4_r = (in0_r - in3_r) >> 1;
+  in1_r = in4_r - in1_r;
+  in2_r = in4_r - in2_r;
+  in0_r -= in1_r;
+  in3_r += in2_r;
+
+  TRANSPOSE4x4_SW_SW(in0_r, in1_r, in2_r, in3_r, in0_r, in1_r, in2_r, in3_r);
+
+  in0_r += in1_r;
+  in2_r -= in3_r;
+  in4_r = (in0_r - in2_r) >> 1;
+  in3_r = in4_r - in3_r;
+  in1_r = in4_r - in1_r;
+  in0_r -= in3_r;
+  in2_r += in1_r;
+
+  PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r,
+              in0, in1, in2, in3);
+  ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);
+}
+
+void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride) {
+  int16_t a1, e1;
+  v8i16 in1, in0 = { 0 };
+
+  a1 = input[0] >> UNIT_QUANT_SHIFT;
+  e1 = a1 >> 1;
+  a1 -= e1;
+
+  in0 = __msa_insert_h(in0, 0, a1);
+  in0 = __msa_insert_h(in0, 1, e1);
+  in0 = __msa_insert_h(in0, 2, e1);
+  in0 = __msa_insert_h(in0, 3, e1);
+
+  in1 = in0 >> 1;
+  in0 -= in1;
+
+  ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);
+}
+
+void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride) {
+  v8i16 in0, in1, in2, in3;
+
+  /* load vector elements of 4x4 block */
+  LD4x4_SH(input, in0, in1, in2, in3);
+  /* rows */
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+  VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+  /* columns */
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+  VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+  /* rounding (add 2^3, divide by 2^4) */
+  SRARI_H4_SH(in0, in1, in2, in3, 4);
+  ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride) {
+  int16_t out;
+  v8i16 vec;
+
+  out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO(out, 4);
+  vec = __msa_fill_h(out);
+
+  ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride);
+}
+
+void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3;
+
+  /* load vector elements of 4x4 block */
+  LD4x4_SH(input, in0, in1, in2, in3);
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+
+  /* final rounding (add 2^3, divide by 2^4) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 4);
+  /* add block and store 4x4 */
+  ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
new file mode 100644
index 00000000000..a4a9c212401
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
@@ -0,0 +1,183 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
+
+void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  /* load vector elements of 8x8 block */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+  /* rows transform */
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  /* 1D idct8x8 */
+  VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                 in0, in1, in2, in3, in4, in5, in6, in7);
+  /* columns transform */
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  /* 1D idct8x8 */
+  VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                 in0, in1, in2, in3, in4, in5, in6, in7);
+  /* final rounding (add 2^4, divide by 2^5) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 5);
+  SRARI_H4_SH(in4, in5, in6, in7, 5);
+  /* add block and store 8x8 */
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+  dst += (4 * dst_stride);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
+  v4i32 tmp0, tmp1, tmp2, tmp3;
+  v8i16 zero = { 0 };
+
+  /* load vector elements of 8x8 block */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+  /* stage1 */
+  ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
+  k0 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+  k2 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+  DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+  SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
+  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+  PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+  BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5);
+
+  /* stage2 */
+  ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
+  k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+  k1 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+  k2 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+  k3 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+  DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+  SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
+  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+  PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+  BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3);
+
+  /* stage3 */
+  s0 = __msa_ilvr_h(s6, s5);
+
+  k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+  DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
+  SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS);
+  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
+
+  /* stage4 */
+  BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7,
+              in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                 in0, in1, in2, in3, in4, in5, in6, in7);
+
+  /* final rounding (add 2^4, divide by 2^5) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 5);
+  SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+  /* add block and store 8x8 */
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+  dst += (4 * dst_stride);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride) {
+  int16_t out;
+  int32_t val;
+  v8i16 vec;
+
+  out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+  out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+  val = ROUND_POWER_OF_TWO(out, 5);
+  vec = __msa_fill_h(val);
+
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+  dst += (4 * dst_stride);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+}
+
+void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+                           int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  /* load vector elements of 8x8 block */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+
+  /* final rounding (add 2^4, divide by 2^5) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 5);
+  SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+  /* add block and store 8x8 */
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+  dst += (4 * dst_stride);
+  VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h
new file mode 100644
index 00000000000..60e27fc1107
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h
@@ -0,0 +1,481 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_
+#define VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) {  \
+  v8i16 k0_m = __msa_fill_h(cnst0);                                  \
+  v4i32 s0_m, s1_m, s2_m, s3_m;                                      \
+                                                                     \
+  s0_m = (v4i32)__msa_fill_h(cnst1);                                 \
+  k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m);                           \
+                                                                     \
+  ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m);                            \
+  ILVRL_H2_SW(reg0, reg1, s3_m, s2_m);                               \
+  DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m);                   \
+  SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                           \
+  out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);                    \
+                                                                     \
+  DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m);                   \
+  SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                           \
+  out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);                    \
+}
+
+#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7,  \
+                                  dst0, dst1, dst2, dst3) {                \
+  v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m;                                 \
+  v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m;                                 \
+                                                                           \
+  DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5,                      \
+              tp0_m, tp2_m, tp3_m, tp4_m);                                 \
+  DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7,                      \
+              tp5_m, tp6_m, tp7_m, tp8_m);                                 \
+  BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m);     \
+  BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m);     \
+  SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS);                 \
+  SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS);                 \
+  PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m,      \
+              dst0, dst1, dst2, dst3);                                     \
+}
+
+#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({   \
+  v8i16 dst_m;                                        \
+  v4i32 tp0_m, tp1_m;                                 \
+                                                      \
+  DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m);      \
+  SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS);          \
+  dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m);  \
+                                                      \
+  dst_m;                                              \
+})
+
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,               \
+                  out0, out1, out2, out3, out4, out5, out6, out7) {     \
+  v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m;                    \
+  v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m;                     \
+  v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64,  \
+    cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 };               \
+  v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64,              \
+    -cospi_16_64, cospi_24_64, -cospi_24_64, 0, 0 };                    \
+                                                                        \
+  SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m);                       \
+  cnst2_m = -cnst0_m;                                                   \
+  ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);    \
+  SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m);                       \
+  cnst4_m = -cnst2_m;                                                   \
+  ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);    \
+                                                                        \
+  ILVRL_H2_SH(in0, in7, vec1_m, vec0_m);                                \
+  ILVRL_H2_SH(in4, in3, vec3_m, vec2_m);                                \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                            cnst1_m, cnst2_m, cnst3_m, in7, in0,        \
+                            in4, in3);                                  \
+                                                                        \
+  SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m);                       \
+  cnst2_m = -cnst0_m;                                                   \
+  ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);    \
+  SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m);                       \
+  cnst4_m = -cnst2_m;                                                   \
+  ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);    \
+                                                                        \
+  ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                \
+  ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                \
+                                                                        \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                            cnst1_m, cnst2_m, cnst3_m, in5, in2,        \
+                            in6, in1);                                  \
+  BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5);                \
+  out7 = -s0_m;                                                         \
+  out0 = s1_m;                                                          \
+                                                                        \
+  SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5,                                    \
+               cnst0_m, cnst1_m, cnst2_m, cnst3_m);                     \
+                                                                        \
+  ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m);    \
+  cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                            \
+  cnst1_m = cnst0_m;                                                    \
+                                                                        \
+  ILVRL_H2_SH(in4, in3, vec1_m, vec0_m);                                \
+  ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                            cnst2_m, cnst3_m, cnst1_m, out1, out6,      \
+                            s0_m, s1_m);                                \
+                                                                        \
+  SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                       \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                            \
+                                                                        \
+  ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                \
+  ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m);                              \
+  out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);            \
+  out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);            \
+  out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);            \
+  out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);            \
+                                                                        \
+  out1 = -out1;                                                         \
+  out3 = -out3;                                                         \
+  out5 = -out5;                                                         \
+}
+
+#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) {                \
+  v4i32 madd0_m, madd1_m, madd2_m, madd3_m;                         \
+  v8i16 madd_s0_m, madd_s1_m;                                       \
+                                                                    \
+  ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m);                        \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m,           \
+              c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m);  \
+  SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS);  \
+  PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1);      \
+}
+
+#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3,     \
+                    out0, out1, out2, out3) {                           \
+  v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                     \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m;                     \
+                                                                        \
+  ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m);                        \
+  ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m);                        \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+              cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+              m4_m, m5_m, tmp3_m, tmp2_m);                              \
+  SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
+  PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1);                  \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+              cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+              m4_m, m5_m, tmp3_m, tmp2_m);                              \
+  SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
+  PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3);                  \
+}
+
+#define VP9_SET_COSPI_PAIR(c0_h, c1_h) ({  \
+  v8i16 out0_m, r0_m, r1_m;                \
+                                           \
+  r0_m = __msa_fill_h(c0_h);               \
+  r1_m = __msa_fill_h(c1_h);               \
+  out0_m = __msa_ilvev_h(r1_m, r0_m);      \
+                                           \
+  out0_m;                                  \
+})
+
+#define VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) {  \
+  uint8_t *dst_m = (uint8_t *) (dst);                               \
+  v16u8 dst0_m, dst1_m, dst2_m, dst3_m;                             \
+  v16i8 tmp0_m, tmp1_m;                                             \
+  v16i8 zero_m = { 0 };                                             \
+  v8i16 res0_m, res1_m, res2_m, res3_m;                             \
+                                                                    \
+  LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m);        \
+  ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m,        \
+             zero_m, dst3_m, res0_m, res1_m, res2_m, res3_m);       \
+  ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3,          \
+       res0_m, res1_m, res2_m, res3_m);                             \
+  CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m);                   \
+  PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m);      \
+  ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride);                      \
+}
+
+#define VP9_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) {   \
+  v8i16 c0_m, c1_m, c2_m, c3_m;                                     \
+  v8i16 step0_m, step1_m;                                           \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                             \
+                                                                    \
+  c0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);              \
+  c1_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);             \
+  step0_m = __msa_ilvr_h(in2, in0);                                 \
+  DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m);        \
+                                                                    \
+  c2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);              \
+  c3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);               \
+  step1_m = __msa_ilvr_h(in3, in1);                                 \
+  DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m);        \
+  SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);      \
+                                                                    \
+  PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m);      \
+  SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8);                  \
+  BUTTERFLY_4((v8i16)tmp0_m, (v8i16)tmp1_m,                         \
+              (v8i16)tmp2_m, (v8i16)tmp3_m,                         \
+              out0, out1, out2, out3);                              \
+}
+
+#define VP9_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  v8i16 res0_m, res1_m, c0_m, c1_m;                                 \
+  v8i16 k1_m, k2_m, k3_m, k4_m;                                     \
+  v8i16 zero_m = { 0 };                                             \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                             \
+  v4i32 int0_m, int1_m, int2_m, int3_m;                             \
+  v8i16 mask_m = { sinpi_1_9, sinpi_2_9, sinpi_3_9,                 \
+    sinpi_4_9, -sinpi_1_9, -sinpi_2_9, -sinpi_3_9,                  \
+    -sinpi_4_9 };                                                   \
+                                                                    \
+  SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m);         \
+  ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m);                  \
+  ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                   \
+  DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m);          \
+  int0_m = tmp2_m + tmp1_m;                                         \
+                                                                    \
+  SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m);                           \
+  ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m);                  \
+  DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);          \
+  int1_m = tmp0_m + tmp1_m;                                         \
+                                                                    \
+  c0_m = __msa_splati_h(mask_m, 6);                                 \
+  ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m);                 \
+  ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                   \
+  DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);          \
+  int2_m = tmp0_m + tmp1_m;                                         \
+                                                                    \
+  c0_m = __msa_splati_h(mask_m, 6);                                 \
+  c0_m = __msa_ilvev_h(c0_m, k1_m);                                 \
+                                                                    \
+  res0_m = __msa_ilvr_h((in1), (in3));                              \
+  tmp0_m = __msa_dotp_s_w(res0_m, c0_m);                            \
+  int3_m = tmp2_m + tmp0_m;                                         \
+                                                                    \
+  res0_m = __msa_ilvr_h((in2), (in3));                              \
+  c1_m = __msa_ilvev_h(k4_m, k3_m);                                 \
+                                                                    \
+  tmp2_m = __msa_dotp_s_w(res0_m, c1_m);                            \
+  res1_m = __msa_ilvr_h((in0), (in2));                              \
+  c1_m = __msa_ilvev_h(k1_m, zero_m);                               \
+                                                                    \
+  tmp3_m = __msa_dotp_s_w(res1_m, c1_m);                            \
+  int3_m += tmp2_m;                                                 \
+  int3_m += tmp3_m;                                                 \
+                                                                    \
+  SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, DCT_CONST_BITS);      \
+  PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1);          \
+  PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3);          \
+}
+
+#define VP9_SET_CONST_PAIR(mask_h, idx1_h, idx2_h) ({  \
+  v8i16 c0_m, c1_m;                                    \
+                                                       \
+  SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m);    \
+  c0_m = __msa_ilvev_h(c1_m, c0_m);                    \
+                                                       \
+  c0_m;                                                \
+})
+
+/* multiply and add macro */
+#define VP9_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3,        \
+                 out0, out1, out2, out3) {                              \
+  v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                     \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                 \
+                                                                        \
+  ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m);                        \
+  ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m);                        \
+  DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m,               \
+              cst0, cst0, cst1, cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);          \
+  PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1);              \
+  DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m,               \
+              cst2, cst2, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);          \
+  PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3);              \
+}
+
+/* idct 8x8 macro */
+#define VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,               \
+                       out0, out1, out2, out3, out4, out5, out6, out7) {     \
+  v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m;              \
+  v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m;              \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                      \
+  v8i16 mask_m = { cospi_28_64, cospi_4_64, cospi_20_64, cospi_12_64,        \
+    cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 };                  \
+                                                                             \
+  k0_m = VP9_SET_CONST_PAIR(mask_m, 0, 5);                                   \
+  k1_m = VP9_SET_CONST_PAIR(mask_m, 1, 0);                                   \
+  k2_m = VP9_SET_CONST_PAIR(mask_m, 6, 3);                                   \
+  k3_m = VP9_SET_CONST_PAIR(mask_m, 3, 2);                                   \
+  VP9_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5);  \
+  SUB2(in1, in3, in7, in5, res0_m, res1_m);                                  \
+  k0_m = VP9_SET_CONST_PAIR(mask_m, 4, 7);                                   \
+  k1_m = __msa_splati_h(mask_m, 4);                                          \
+                                                                             \
+  ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m);                               \
+  DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m,        \
+              tmp0_m, tmp1_m, tmp2_m, tmp3_m);                               \
+  SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);               \
+  tp4_m = in1 + in3;                                                         \
+  PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m);                 \
+  tp7_m = in7 + in5;                                                         \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                       \
+  k3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                        \
+  VP9_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m,                       \
+           in0, in4, in2, in6);                                              \
+  BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m);               \
+  BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m,        \
+              out0, out1, out2, out3, out4, out5, out6, out7);               \
+}
+
+#define VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                        out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m;                    \
+  v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m;                                \
+  v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1;          \
+  v8i16 mask1_m = { cospi_2_64, cospi_30_64, -cospi_2_64,                  \
+    cospi_10_64, cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 };    \
+  v8i16 mask2_m = { cospi_14_64, -cospi_18_64, cospi_26_64,                \
+    cospi_6_64, -cospi_26_64, cospi_8_64, cospi_24_64, -cospi_8_64 };      \
+  v8i16 mask3_m = { -cospi_24_64, cospi_8_64, cospi_16_64,                 \
+    -cospi_16_64, 0, 0, 0, 0 };                                            \
+                                                                           \
+  k0_m = VP9_SET_CONST_PAIR(mask1_m, 0, 1);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask1_m, 1, 2);                                \
+  ILVRL_H2_SH(in1, in0, in_s1, in_s0);                                     \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              r0_m, r1_m, r2_m, r3_m);                                     \
+  k0_m = VP9_SET_CONST_PAIR(mask1_m, 6, 7);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask2_m, 0, 1);                                \
+  ILVRL_H2_SH(in5, in4, in_s1, in_s0);                                     \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              r4_m, r5_m, r6_m, r7_m);                                     \
+  ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m);                     \
+  SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m);                         \
+  k0_m = VP9_SET_CONST_PAIR(mask1_m, 3, 4);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask1_m, 4, 5);                                \
+  ILVRL_H2_SH(in3, in2, in_s1, in_s0);                                     \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              r0_m, r1_m, r2_m, r3_m);                                     \
+  k0_m = VP9_SET_CONST_PAIR(mask2_m, 2, 3);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask2_m, 3, 4);                                \
+  ILVRL_H2_SH(in7, in6, in_s1, in_s0);                                     \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              r4_m, r5_m, r6_m, r7_m);                                     \
+  ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m);                     \
+  SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m);                         \
+  ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m);                                     \
+  BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3);        \
+  k0_m = VP9_SET_CONST_PAIR(mask2_m, 5, 6);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask2_m, 6, 7);                                \
+  ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0);                                   \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              r0_m, r1_m, r2_m, r3_m);                                     \
+  k1_m = VP9_SET_CONST_PAIR(mask3_m, 0, 1);                                \
+  DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m,              \
+              r4_m, r5_m, r6_m, r7_m);                                     \
+  ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6);                          \
+  SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m,                     \
+       m0_m, m1_m, m2_m, m3_m);                                            \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5);                           \
+  k0_m = VP9_SET_CONST_PAIR(mask3_m, 2, 2);                                \
+  k1_m = VP9_SET_CONST_PAIR(mask3_m, 2, 3);                                \
+  ILVRL_H2_SH(in4, in3, in_s1, in_s0);                                     \
+  DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+              m0_m, m1_m, m2_m, m3_m);                                     \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4);                          \
+  ILVRL_H2_SW(in5, in2, m2_m, m3_m);                                       \
+  DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m,              \
+              m0_m, m1_m, m2_m, m3_m);                                     \
+  SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                     \
+  PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5);                          \
+                                                                           \
+  out1 = -in1;                                                             \
+  out3 = -in3;                                                             \
+  out5 = -in5;                                                             \
+  out7 = -in7;                                                             \
+}
+
+#define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8,        \
+                         r9, r10, r11, r12, r13, r14, r15,          \
+                         out0, out1, out2, out3, out4, out5,        \
+                         out6, out7, out8, out9, out10, out11,      \
+                         out12, out13, out14, out15) {              \
+  v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m;             \
+  v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m;       \
+  v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m;             \
+  v8i16 h8_m, h9_m, h10_m, h11_m;                                   \
+  v8i16 k0_m, k1_m, k2_m, k3_m;                                     \
+                                                                    \
+  /* stage 1 */                                                     \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);               \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);              \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);              \
+  k3_m = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);             \
+  VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m,              \
+              g0_m, g1_m, g2_m, g3_m);                              \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);               \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);              \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);              \
+  k3_m = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);             \
+  VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m,             \
+              g4_m, g5_m, g6_m, g7_m);                              \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);               \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);              \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);               \
+  k3_m = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);              \
+  VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m,             \
+              g8_m, g9_m, g10_m, g11_m);                            \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);              \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);             \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);               \
+  k3_m = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);              \
+  VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m,              \
+              g12_m, g13_m, g14_m, g15_m);                          \
+                                                                    \
+  /* stage 2 */                                                     \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);               \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);              \
+  k2_m = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);              \
+  VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m,      \
+              h0_m, h1_m, h2_m, h3_m);                              \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);              \
+  k1_m = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);             \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);             \
+  VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m,     \
+              h4_m, h5_m, h6_m, h7_m);                              \
+  BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10);    \
+  BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m,    \
+              h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m);    \
+                                                                    \
+  /* stage 3 */                                                     \
+  BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m);  \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);               \
+  k1_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);              \
+  k2_m = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);              \
+  VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m,       \
+              out4, out6, out5, out7);                              \
+  VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m,       \
+              out12, out14, out13, out15);                          \
+                                                                    \
+  /* stage 4 */                                                     \
+  k0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);              \
+  k1_m = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);            \
+  k2_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);             \
+  k3_m = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);             \
+  VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3);             \
+  VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7);               \
+  VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11);           \
+  VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15);           \
+}
+#endif  /* VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c
new file mode 100644
index 00000000000..2fc610505a8
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c
@@ -0,0 +1,737 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) {  \
+  out0 = __msa_subs_u_h(out0, in0);                \
+  out1 = __msa_subs_u_h(out1, in1);                \
+}
+
+static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
+                                       int32_t dst_stride) {
+  uint32_t src_data;
+
+  src_data = LW(src);
+
+  SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
+}
+
+static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
+                                       int32_t dst_stride) {
+  uint32_t row;
+  uint32_t src_data1, src_data2;
+
+  src_data1 = LW(src);
+  src_data2 = LW(src + 4);
+
+  for (row = 8; row--;) {
+    SW(src_data1, dst);
+    SW(src_data2, (dst + 4));
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
+                                         int32_t dst_stride) {
+  uint32_t row;
+  v16u8 src0;
+
+  src0 = LD_UB(src);
+
+  for (row = 16; row--;) {
+    ST_UB(src0, dst);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
+                                         int32_t dst_stride) {
+  uint32_t row;
+  v16u8 src1, src2;
+
+  src1 = LD_UB(src);
+  src2 = LD_UB(src + 16);
+
+  for (row = 32; row--;) {
+    ST_UB2(src1, src2, dst, 16);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
+                                        int32_t dst_stride) {
+  uint32_t out0, out1, out2, out3;
+
+  out0 = src[0] * 0x01010101;
+  out1 = src[1] * 0x01010101;
+  out2 = src[2] * 0x01010101;
+  out3 = src[3] * 0x01010101;
+
+  SW4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
+                                        int32_t dst_stride) {
+  uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+  out0 = src[0] * 0x0101010101010101ull;
+  out1 = src[1] * 0x0101010101010101ull;
+  out2 = src[2] * 0x0101010101010101ull;
+  out3 = src[3] * 0x0101010101010101ull;
+  out4 = src[4] * 0x0101010101010101ull;
+  out5 = src[5] * 0x0101010101010101ull;
+  out6 = src[6] * 0x0101010101010101ull;
+  out7 = src[7] * 0x0101010101010101ull;
+
+  SD4(out0, out1, out2, out3, dst, dst_stride);
+  dst += (4 * dst_stride);
+  SD4(out4, out5, out6, out7, dst, dst_stride);
+}
+
+static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
+                                          int32_t dst_stride) {
+  uint32_t row;
+  uint8_t inp0, inp1, inp2, inp3;
+  v16u8 src0, src1, src2, src3;
+
+  for (row = 4; row--;) {
+    inp0 = src[0];
+    inp1 = src[1];
+    inp2 = src[2];
+    inp3 = src[3];
+    src += 4;
+
+    src0 = (v16u8)__msa_fill_b(inp0);
+    src1 = (v16u8)__msa_fill_b(inp1);
+    src2 = (v16u8)__msa_fill_b(inp2);
+    src3 = (v16u8)__msa_fill_b(inp3);
+
+    ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
+                                          int32_t dst_stride) {
+  uint32_t row;
+  uint8_t inp0, inp1, inp2, inp3;
+  v16u8 src0, src1, src2, src3;
+
+  for (row = 8; row--;) {
+    inp0 = src[0];
+    inp1 = src[1];
+    inp2 = src[2];
+    inp3 = src[3];
+    src += 4;
+
+    src0 = (v16u8)__msa_fill_b(inp0);
+    src1 = (v16u8)__msa_fill_b(inp1);
+    src2 = (v16u8)__msa_fill_b(inp2);
+    src3 = (v16u8)__msa_fill_b(inp3);
+
+    ST_UB2(src0, src0, dst, 16);
+    dst += dst_stride;
+    ST_UB2(src1, src1, dst, 16);
+    dst += dst_stride;
+    ST_UB2(src2, src2, dst, 16);
+    dst += dst_stride;
+    ST_UB2(src3, src3, dst, 16);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     uint8_t *dst, int32_t dst_stride) {
+  uint32_t val0, val1;
+  v16i8 store, src = { 0 };
+  v8u16 sum_h;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  val0 = LW(src_top);
+  val1 = LW(src_left);
+  INSERT_W2_SB(val0, val1, src);
+  sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+  store = __msa_splati_b((v16i8)sum_w, 0);
+  val0 = __msa_copy_u_w((v4i32)store, 0);
+
+  SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
+                                        int32_t dst_stride) {
+  uint32_t val0;
+  v16i8 store, data = { 0 };
+  v8u16 sum_h;
+  v4u32 sum_w;
+
+  val0 = LW(src);
+  data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
+  sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
+  store = __msa_splati_b((v16i8)sum_w, 0);
+  val0 = __msa_copy_u_w((v4i32)store, 0);
+
+  SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
+  uint32_t out;
+  const v16i8 store = __msa_ldi_b(128);
+
+  out = __msa_copy_u_w((v4i32)store, 0);
+
+  SW4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     uint8_t *dst, int32_t dst_stride) {
+  uint64_t val0, val1;
+  v16i8 store;
+  v16u8 src = { 0 };
+  v8u16 sum_h;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  val0 = LD(src_top);
+  val1 = LD(src_left);
+  INSERT_D2_UB(val0, val1, src);
+  sum_h = __msa_hadd_u_h(src, src);
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+  store = __msa_splati_b((v16i8)sum_w, 0);
+  val0 = __msa_copy_u_d((v2i64)store, 0);
+
+  SD4(val0, val0, val0, val0, dst, dst_stride);
+  dst += (4 * dst_stride);
+  SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
+                                        int32_t dst_stride) {
+  uint64_t val0;
+  v16i8 store;
+  v16u8 data = { 0 };
+  v8u16 sum_h;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  val0 = LD(src);
+  data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
+  sum_h = __msa_hadd_u_h(data, data);
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+  store = __msa_splati_b((v16i8)sum_w, 0);
+  val0 = __msa_copy_u_d((v2i64)store, 0);
+
+  SD4(val0, val0, val0, val0, dst, dst_stride);
+  dst += (4 * dst_stride);
+  SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
+  uint64_t out;
+  const v16i8 store = __msa_ldi_b(128);
+
+  out = __msa_copy_u_d((v2i64)store, 0);
+
+  SD4(out, out, out, out, dst, dst_stride);
+  dst += (4 * dst_stride);
+  SD4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t dst_stride) {
+  v16u8 top, left, out;
+  v8u16 sum_h, sum_top, sum_left;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  top = LD_UB(src_top);
+  left = LD_UB(src_left);
+  HADD_UB2_UH(top, left, sum_top, sum_left);
+  sum_h = sum_top + sum_left;
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+  dst += (8 * dst_stride);
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
+                                          int32_t dst_stride) {
+  v16u8 data, out;
+  v8u16 sum_h;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  data = LD_UB(src);
+  sum_h = __msa_hadd_u_h(data, data);
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+  dst += (8 * dst_stride);
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
+  const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+  dst += (8 * dst_stride);
+  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t dst_stride) {
+  uint32_t row;
+  v16u8 top0, top1, left0, left1, out;
+  v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  LD_UB2(src_top, 16, top0, top1);
+  LD_UB2(src_left, 16, left0, left1);
+  HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
+  HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
+  sum_h = sum_top0 + sum_top1;
+  sum_h += sum_left0 + sum_left1;
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
+  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+  for (row = 16; row--;) {
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
+                                          int32_t dst_stride) {
+  uint32_t row;
+  v16u8 data0, data1, out;
+  v8u16 sum_h, sum_data0, sum_data1;
+  v4u32 sum_w;
+  v2u64 sum_d;
+
+  LD_UB2(src, 16, data0, data1);
+  HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
+  sum_h = sum_data0 + sum_data1;
+  sum_w = __msa_hadd_u_w(sum_h, sum_h);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+  sum_d = __msa_hadd_u_d(sum_w, sum_w);
+  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+  for (row = 16; row--;) {
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
+  uint32_t row;
+  const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+  for (row = 16; row--;) {
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+    ST_UB2(out, out, dst, 16);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr,
+                                     const uint8_t *src_left,
+                                     uint8_t *dst, int32_t dst_stride) {
+  uint32_t val;
+  uint8_t top_left = src_top_ptr[-1];
+  v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+  v16u8 src0, src1, src2, src3;
+  v8u16 src_top_left, vec0, vec1, vec2, vec3;
+
+  src_top_left = (v8u16)__msa_fill_h(top_left);
+  val = LW(src_top_ptr);
+  src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val);
+
+  src_left0 = __msa_fill_b(src_left[0]);
+  src_left1 = __msa_fill_b(src_left[1]);
+  src_left2 = __msa_fill_b(src_left[2]);
+  src_left3 = __msa_fill_b(src_left[3]);
+
+  ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+             src_left3, src_top, src0, src1, src2, src3);
+  HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+  IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+  IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+  SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+  PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+  ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr,
+                                     const uint8_t *src_left,
+                                     uint8_t *dst, int32_t dst_stride) {
+  uint64_t val;
+  uint8_t top_left = src_top_ptr[-1];
+  uint32_t loop_cnt;
+  v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+  v8u16 src_top_left, vec0, vec1, vec2, vec3;
+  v16u8 src0, src1, src2, src3;
+
+  val = LD(src_top_ptr);
+  src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val);
+  src_top_left = (v8u16)__msa_fill_h(top_left);
+
+  for (loop_cnt = 2; loop_cnt--;) {
+    src_left0 = __msa_fill_b(src_left[0]);
+    src_left1 = __msa_fill_b(src_left[1]);
+    src_left2 = __msa_fill_b(src_left[2]);
+    src_left3 = __msa_fill_b(src_left[3]);
+    src_left += 4;
+
+    ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+               src_left3, src_top, src0, src1, src2, src3);
+    HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+    SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t dst_stride) {
+  uint8_t top_left = src_top_ptr[-1];
+  uint32_t loop_cnt;
+  v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
+  v8u16 src_top_left, res_r, res_l;
+
+  src_top = LD_SB(src_top_ptr);
+  src_top_left = (v8u16)__msa_fill_h(top_left);
+
+  for (loop_cnt = 4; loop_cnt--;) {
+    src_left0 = __msa_fill_b(src_left[0]);
+    src_left1 = __msa_fill_b(src_left[1]);
+    src_left2 = __msa_fill_b(src_left[2]);
+    src_left3 = __msa_fill_b(src_left[3]);
+    src_left += 4;
+
+    ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
+    HADD_UB2_UH(res_r, res_l, res_r, res_l);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+
+    SAT_UH2_UH(res_r, res_l, 7);
+    PCKEV_ST_SB(res_r, res_l, dst);
+    dst += dst_stride;
+
+    ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
+    HADD_UB2_UH(res_r, res_l, res_r, res_l);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+    SAT_UH2_UH(res_r, res_l, 7);
+    PCKEV_ST_SB(res_r, res_l, dst);
+    dst += dst_stride;
+
+    ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
+    HADD_UB2_UH(res_r, res_l, res_r, res_l);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+    SAT_UH2_UH(res_r, res_l, 7);
+    PCKEV_ST_SB(res_r, res_l, dst);
+    dst += dst_stride;
+
+    ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
+    HADD_UB2_UH(res_r, res_l, res_r, res_l);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+    SAT_UH2_UH(res_r, res_l, 7);
+    PCKEV_ST_SB(res_r, res_l, dst);
+    dst += dst_stride;
+  }
+}
+
+static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t dst_stride) {
+  uint8_t top_left = src_top[-1];
+  uint32_t loop_cnt;
+  v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
+  v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
+
+  LD_SB2(src_top, 16, src_top0, src_top1);
+  src_top_left = (v8u16)__msa_fill_h(top_left);
+
+  for (loop_cnt = 8; loop_cnt--;) {
+    src_left0 = __msa_fill_b(src_left[0]);
+    src_left1 = __msa_fill_b(src_left[1]);
+    src_left2 = __msa_fill_b(src_left[2]);
+    src_left3 = __msa_fill_b(src_left[3]);
+    src_left += 4;
+
+    ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
+    ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
+    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+    PCKEV_ST_SB(res_r0, res_l0, dst);
+    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+    dst += dst_stride;
+
+    ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
+    ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
+    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+    PCKEV_ST_SB(res_r0, res_l0, dst);
+    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+    dst += dst_stride;
+
+    ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
+    ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
+    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+    PCKEV_ST_SB(res_r0, res_l0, dst);
+    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+    dst += dst_stride;
+
+    ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
+    ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
+    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+    PCKEV_ST_SB(res_r0, res_l0, dst);
+    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+    dst += dst_stride;
+  }
+}
+
+void vp9_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                             const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_vert_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                             const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_vert_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_vert_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_vert_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                             const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_horiz_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                             const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_horiz_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_horiz_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_horiz_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                              const uint8_t *above, const uint8_t *left) {
+  intra_predict_dc_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                              const uint8_t *above, const uint8_t *left) {
+  intra_predict_dc_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                const uint8_t *above, const uint8_t *left) {
+  intra_predict_dc_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                const uint8_t *above, const uint8_t *left) {
+  intra_predict_dc_32x32_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)left;
+
+  intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+
+  intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+
+  intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+
+  intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+
+  intra_predict_128dc_4x4_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+
+  intra_predict_128dc_8x8_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+
+  intra_predict_128dc_16x16_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+
+  intra_predict_128dc_32x32_msa(dst, y_stride);
+}
+
+void vp9_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+                              const uint8_t *above, const uint8_t *left) {
+  intra_predict_tm_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+                              const uint8_t *above, const uint8_t *left) {
+  intra_predict_tm_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                const uint8_t *above, const uint8_t *left) {
+  intra_predict_tm_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+                                const uint8_t *above, const uint8_t *left) {
+  intra_predict_tm_32x32_msa(above, left, dst, y_stride);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c
new file mode 100644
index 00000000000..aeaa48e4e60
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c
@@ -0,0 +1,1480 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+                                 uint8_t *filter48,
+                                 const uint8_t *b_limit_ptr,
+                                 const uint8_t *limit_ptr,
+                                 const uint8_t *thresh_ptr) {
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+  v16u8 flat, mask, hev, thresh, b_limit, limit;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+  v16u8 zero = { 0 };
+
+  /* load vector elements */
+  LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  if (__msa_test_bz_v(flat)) {
+    ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+
+    return 1;
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+               q2_r, q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+    VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                p0_filt8_r, q0_filt8_r);
+    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                q2_filt8_r);
+
+    /* store pixel values */
+    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+    filter48 += (4 * 16);
+    ST_UB2(q1_out, q2_out, filter48, 16);
+    filter48 += (2 * 16);
+    ST_UB(flat, filter48);
+
+    return 0;
+  }
+}
+
+void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
+  v16u8 flat, flat2, filter8;
+  v16i8 zero = { 0 };
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+  v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+  v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+  v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+  v8i16 l_out, r_out;
+
+  flat = LD_UB(filter48 + 96);
+
+  LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+  LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+  VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+  if (__msa_test_bz_v(flat2)) {
+    LD_UB4(filter48, 16, p2, p1, p0, q0);
+    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+    src -= 3 * pitch;
+    ST_UB4(p2, p1, p0, q0, src, pitch);
+    src += (4 * pitch);
+    ST_UB2(q1, q2, src, pitch);
+  } else {
+    src -= 7 * pitch;
+
+    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+               zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
+               p2_r_in, p1_r_in, p0_r_in);
+
+    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+    tmp0_r = p7_r_in << 3;
+    tmp0_r -= p7_r_in;
+    tmp0_r += p6_r_in;
+    tmp0_r += q0_r_in;
+    tmp1_r = p6_r_in + p5_r_in;
+    tmp1_r += p4_r_in;
+    tmp1_r += p3_r_in;
+    tmp1_r += p2_r_in;
+    tmp1_r += p1_r_in;
+    tmp1_r += p0_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+               p5_l_in, p4_l_in);
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+               p1_l_in, p0_l_in);
+    q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
+
+    tmp0_l = p7_l_in << 3;
+    tmp0_l -= p7_l_in;
+    tmp0_l += p6_l_in;
+    tmp0_l += q0_l_in;
+    tmp1_l = p6_l_in + p5_l_in;
+    tmp1_l += p4_l_in;
+    tmp1_l += p3_l_in;
+    tmp1_l += p2_l_in;
+    tmp1_l += p1_l_in;
+    tmp1_l += p0_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+    ST_UB(p6, src);
+    src += pitch;
+
+    /* p5 */
+    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+    tmp0_r = p5_r_in - p6_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
+    tmp0_l = p5_l_in - p6_l_in;
+    tmp0_l += q1_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+    ST_UB(p5, src);
+    src += pitch;
+
+    /* p4 */
+    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+    tmp0_r = p4_r_in - p5_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = (v8i16)__msa_srari_h((v8i16)tmp1_r, 4);
+
+    q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
+    tmp0_l = p4_l_in - p5_l_in;
+    tmp0_l += q2_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+    ST_UB(p4, src);
+    src += pitch;
+
+    /* p3 */
+    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+    tmp0_r = p3_r_in - p4_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
+    tmp0_l = p3_l_in - p4_l_in;
+    tmp0_l += q3_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+    ST_UB(p3, src);
+    src += pitch;
+
+    /* p2 */
+    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+    filter8 = LD_UB(filter48);
+    tmp0_r = p2_r_in - p3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
+    tmp0_l = p2_l_in - p3_l_in;
+    tmp0_l += q4_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* p1 */
+    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+    filter8 = LD_UB(filter48 + 16);
+    tmp0_r = p1_r_in - p2_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
+    tmp0_l = p1_l_in - p2_l_in;
+    tmp0_l += q5_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* p0 */
+    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+    filter8 = LD_UB(filter48 + 32);
+    tmp0_r = p0_r_in - p1_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
+    tmp0_l = p0_l_in - p1_l_in;
+    tmp0_l += q6_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* q0 */
+    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+    filter8 = LD_UB(filter48 + 48);
+    tmp0_r = q7_r_in - p0_r_in;
+    tmp0_r += q0_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
+    tmp0_l = q7_l_in - p0_l_in;
+    tmp0_l += q0_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* q1 */
+    filter8 = LD_UB(filter48 + 64);
+    tmp0_r = q7_r_in - q0_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p6_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q0_l_in;
+    tmp0_l += q1_l_in;
+    tmp0_l -= p6_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* q2 */
+    filter8 = LD_UB(filter48 + 80);
+    tmp0_r = q7_r_in - q1_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p5_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q1_l_in;
+    tmp0_l += q2_l_in;
+    tmp0_l -= p5_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += pitch;
+
+    /* q3 */
+    tmp0_r = q7_r_in - q2_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p4_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q2_l_in;
+    tmp0_l += q3_l_in;
+    tmp0_l -= p4_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+    ST_UB(q3, src);
+    src += pitch;
+
+    /* q4 */
+    tmp0_r = q7_r_in - q3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p3_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q3_l_in;
+    tmp0_l += q4_l_in;
+    tmp0_l -= p3_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+    ST_UB(q4, src);
+    src += pitch;
+
+    /* q5 */
+    tmp0_r = q7_r_in - q4_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p2_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q4_l_in;
+    tmp0_l += q5_l_in;
+    tmp0_l -= p2_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+    ST_UB(q5, src);
+    src += pitch;
+
+    /* q6 */
+    tmp0_r = q7_r_in - q5_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p1_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    tmp0_l = q7_l_in - q5_l_in;
+    tmp0_l += q6_l_in;
+    tmp0_l -= p1_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+    ST_UB(q6, src);
+  }
+}
+
+void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
+                                    const uint8_t *b_limit_ptr,
+                                    const uint8_t *limit_ptr,
+                                    const uint8_t *thresh_ptr,
+                                    int32_t count) {
+  DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
+  uint8_t early_exit = 0;
+
+  (void)count;
+
+  early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
+                                        limit_ptr, thresh_ptr);
+
+  if (0 == early_exit) {
+    vp9_hz_lpf_t16_16w(src, pitch, filter48);
+  }
+}
+
+void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
+                               const uint8_t *b_limit_ptr,
+                               const uint8_t *limit_ptr,
+                               const uint8_t *thresh_ptr,
+                               int32_t count) {
+  if (1 == count) {
+    uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+    uint64_t dword0, dword1;
+    v16u8 flat2, mask, hev, flat, thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 p0_filter16, p1_filter16;
+    v8i16 p2_filter8, p1_filter8, p0_filter8;
+    v8i16 q0_filter8, q1_filter8, q2_filter8;
+    v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+    v16i8 zero = { 0 };
+    v8u16 tmp0, tmp1, tmp2;
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+    b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+    limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+    if (__msa_test_bz_v(flat)) {
+      p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+      p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+      q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+      q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+      SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch);
+    } else {
+      /* convert 8 bit input data into 16 bit */
+      ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                 zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                 q3_r);
+      VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+                  p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+      /* convert 16 bit output data into 8 bit */
+      PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+                  zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+                  q0_filter8);
+      PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+      /* store pixel values */
+      p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
+      p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
+      p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
+      q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
+      q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
+      q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
+
+      /* load 16 vector elements */
+      LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
+      LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
+
+      VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+      if (__msa_test_bz_v(flat2)) {
+        p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
+        p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+        p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+        q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+        q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+        q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
+
+        SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch);
+        SD(q1_d, src + pitch);
+        SD(q2_d, src + 2 * pitch);
+      } else {
+        /* LSB(right) 8 pixel operation */
+        ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4, zero, q5,
+                   zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r, q4_r, q5_r, q6_r,
+                   q7_r);
+
+        tmp0 = p7_r << 3;
+        tmp0 -= p7_r;
+        tmp0 += p6_r;
+        tmp0 += q0_r;
+
+        src -= 7 * pitch;
+
+        /* calculation of p6 and p5 */
+        tmp1 = p6_r + p5_r + p4_r + p3_r;
+        tmp1 += (p2_r + p1_r + p0_r);
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp0 = p5_r - p6_r + q1_r - p7_r;
+        tmp1 += tmp0;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of p4 and p3 */
+        tmp0 = p4_r - p5_r + q2_r - p7_r;
+        tmp2 = p3_r - p4_r + q3_r - p7_r;
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of p2 and p1 */
+        tmp0 = p2_r - p3_r + q4_r - p7_r;
+        tmp2 = p1_r - p2_r + q5_r - p7_r;
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of p0 and q0 */
+        tmp0 = (p0_r - p1_r) + (q6_r - p7_r);
+        tmp2 = (q7_r - p0_r) + (q0_r - p7_r);
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of q1 and q2 */
+        tmp0 = q7_r - q0_r + q1_r - p6_r;
+        tmp2 = q7_r - q1_r + q2_r - p5_r;
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of q3 and q4 */
+        tmp0 = (q7_r - q2_r) + (q3_r - p4_r);
+        tmp2 = (q7_r - q3_r) + (q4_r - p3_r);
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+        src += pitch;
+
+        /* calculation of q5 and q6 */
+        tmp0 = (q7_r - q4_r) + (q5_r - p2_r);
+        tmp2 = (q7_r - q5_r) + (q6_r - p1_r);
+        tmp1 += tmp0;
+        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        tmp1 += tmp2;
+        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+                    p1_filter16);
+        p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2);
+        p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2);
+        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+        SD(dword0, src);
+        src += pitch;
+        SD(dword1, src);
+      }
+    }
+  } else {
+    vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
+                                   thresh_ptr, count);
+  }
+}
+
+static void vp9_transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
+                                       uint8_t *output, int32_t out_pitch) {
+  v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
+  v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+  LD_UB8(input, in_pitch,
+         p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org);
+  /* 8x8 transpose */
+  TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org,
+                     p0_org, p7, p6, p5, p4, p3, p2, p1, p0);
+  /* 8x8 transpose */
+  ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org,
+             tmp0, tmp1, tmp2, tmp3);
+  ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6);
+  ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7);
+  ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4);
+  ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6);
+  SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8);
+
+  ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+  output += (8 * out_pitch);
+  ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+static void vp9_transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
+                                       uint8_t *output, int32_t out_pitch) {
+  v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+  LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+  LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+  TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5,
+                      q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o);
+  ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
+}
+
+static void vp9_transpose_16x16(uint8_t *input, int32_t in_pitch,
+                                uint8_t *output, int32_t out_pitch) {
+  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+  v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+  v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7;
+  v4i32 tmp2, tmp3;
+
+  LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+  input += (8 * in_pitch);
+  LD_UB8(input, in_pitch,
+         row8, row9, row10, row11, row12, row13, row14, row15);
+
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      p7, p6, p5, p4, p3, p2, p1, p0);
+
+  /* transpose 16x8 matrix into 8x16 */
+  /* total 8 intermediate register and 32 instructions */
+  q7 = (v16u8)__msa_ilvod_d((v2i64)row8, (v2i64)row0);
+  q6 = (v16u8)__msa_ilvod_d((v2i64)row9, (v2i64)row1);
+  q5 = (v16u8)__msa_ilvod_d((v2i64)row10, (v2i64)row2);
+  q4 = (v16u8)__msa_ilvod_d((v2i64)row11, (v2i64)row3);
+  q3 = (v16u8)__msa_ilvod_d((v2i64)row12, (v2i64)row4);
+  q2 = (v16u8)__msa_ilvod_d((v2i64)row13, (v2i64)row5);
+  q1 = (v16u8)__msa_ilvod_d((v2i64)row14, (v2i64)row6);
+  q0 = (v16u8)__msa_ilvod_d((v2i64)row15, (v2i64)row7);
+
+  ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1);
+  tmp4 = (v8i16)__msa_ilvod_b((v16i8)q6, (v16i8)q7);
+  tmp5 = (v8i16)__msa_ilvod_b((v16i8)q4, (v16i8)q5);
+
+  ILVEV_B2_UB(q3, q2, q1, q0, q5, q7);
+  tmp6 = (v8i16)__msa_ilvod_b((v16i8)q2, (v16i8)q3);
+  tmp7 = (v8i16)__msa_ilvod_b((v16i8)q0, (v16i8)q1);
+
+  ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3);
+  q0 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+  q4 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+  tmp2 = (v4i32)__msa_ilvod_h(tmp1, tmp0);
+  tmp3 = (v4i32)__msa_ilvod_h((v8i16)q7, (v8i16)q5);
+  q2 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+  q6 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+  ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3);
+  q1 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+  q5 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+  tmp2 = (v4i32)__msa_ilvod_h(tmp5, tmp4);
+  tmp3 = (v4i32)__msa_ilvod_h(tmp7, tmp6);
+  q3 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+  q7 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+  ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+  output += (8 * out_pitch);
+  ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
+                                uint8_t *src_org, int32_t pitch_org,
+                                const uint8_t *b_limit_ptr,
+                                const uint8_t *limit_ptr,
+                                const uint8_t *thresh_ptr) {
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+  v16u8 flat, mask, hev, thresh, b_limit, limit;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v16i8 zero = { 0 };
+  v8i16 vec0, vec1, vec2, vec3;
+
+  /* load vector elements */
+  LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  /* flat4 */
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  /* filter4 */
+  VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+  if (__msa_test_bz_v(flat)) {
+    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+    ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org);
+    return 1;
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+               q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+    /* convert 16 bit output data into 8 bit */
+    p2_r = (v8u16)__msa_pckev_b((v16i8)p2_filt8_r, (v16i8)p2_filt8_r);
+    p1_r = (v8u16)__msa_pckev_b((v16i8)p1_filt8_r, (v16i8)p1_filt8_r);
+    p0_r = (v8u16)__msa_pckev_b((v16i8)p0_filt8_r, (v16i8)p0_filt8_r);
+    q0_r = (v8u16)__msa_pckev_b((v16i8)q0_filt8_r, (v16i8)q0_filt8_r);
+    q1_r = (v8u16)__msa_pckev_b((v16i8)q1_filt8_r, (v16i8)q1_filt8_r);
+    q2_r = (v8u16)__msa_pckev_b((v16i8)q2_filt8_r, (v16i8)q2_filt8_r);
+
+    /* store pixel values */
+    p2_out = __msa_bmnz_v(p2, (v16u8)p2_r, flat);
+    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_r, flat);
+    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_r, flat);
+    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_r, flat);
+    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_r, flat);
+    q2_out = __msa_bmnz_v(q2, (v16u8)q2_r, flat);
+
+    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+    filter48 += (4 * 16);
+    ST_UB2(q1_out, q2_out, filter48, 16);
+    filter48 += (2 * 16);
+    ST_UB(flat, filter48);
+
+    return 0;
+  }
+}
+
+int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+                          uint8_t *filter48) {
+  v16i8 zero = { 0 };
+  v16u8 filter8, flat, flat2;
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+  v8u16 tmp0_r, tmp1_r;
+  v8i16 r_out;
+
+  flat = LD_UB(filter48 + 6 * 16);
+
+  LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+  LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+  VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+  if (__msa_test_bz_v(flat2)) {
+    v8i16 vec0, vec1, vec2, vec3, vec4;
+
+    LD_UB4(filter48, 16, p2, p1, p0, q0);
+    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+    vec2 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
+
+    src_org -= 3;
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+    src_org += (4 * pitch);
+    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+
+    return 1;
+  } else {
+    src -= 7 * 16;
+
+    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+               zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+               p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+    tmp0_r = p7_r_in << 3;
+    tmp0_r -= p7_r_in;
+    tmp0_r += p6_r_in;
+    tmp0_r += q0_r_in;
+    tmp1_r = p6_r_in + p5_r_in;
+    tmp1_r += p4_r_in;
+    tmp1_r += p3_r_in;
+    tmp1_r += p2_r_in;
+    tmp1_r += p1_r_in;
+    tmp1_r += p0_r_in;
+    tmp1_r += tmp0_r;
+
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+    ST8x1_UB(p6, src);
+    src += 16;
+
+    /* p5 */
+    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+    tmp0_r = p5_r_in - p6_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+    ST8x1_UB(p5, src);
+    src += 16;
+
+    /* p4 */
+    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+    tmp0_r = p4_r_in - p5_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+    ST8x1_UB(p4, src);
+    src += 16;
+
+    /* p3 */
+    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+    tmp0_r = p3_r_in - p4_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+    ST8x1_UB(p3, src);
+    src += 16;
+
+    /* p2 */
+    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+    filter8 = LD_UB(filter48);
+    tmp0_r = p2_r_in - p3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* p1 */
+    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+    filter8 = LD_UB(filter48 + 16);
+    tmp0_r = p1_r_in - p2_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* p0 */
+    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+    filter8 = LD_UB(filter48 + 32);
+    tmp0_r = p0_r_in - p1_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* q0 */
+    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+    filter8 = LD_UB(filter48 + 48);
+    tmp0_r = q7_r_in - p0_r_in;
+    tmp0_r += q0_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* q1 */
+    filter8 = LD_UB(filter48 + 64);
+    tmp0_r = q7_r_in - q0_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p6_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* q2 */
+    filter8 = LD_UB(filter48 + 80);
+    tmp0_r = q7_r_in - q1_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p5_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST8x1_UB(filter8, src);
+    src += 16;
+
+    /* q3 */
+    tmp0_r = q7_r_in - q2_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p4_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+    ST8x1_UB(q3, src);
+    src += 16;
+
+    /* q4 */
+    tmp0_r = q7_r_in - q3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p3_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+    ST8x1_UB(q4, src);
+    src += 16;
+
+    /* q5 */
+    tmp0_r = q7_r_in - q4_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p2_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+    ST8x1_UB(q5, src);
+    src += 16;
+
+    /* q6 */
+    tmp0_r = q7_r_in - q5_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p1_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+    ST8x1_UB(q6, src);
+
+    return 0;
+  }
+}
+
+void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
+                             const uint8_t *b_limit_ptr,
+                             const uint8_t *limit_ptr,
+                             const uint8_t *thresh_ptr) {
+  uint8_t early_exit = 0;
+  DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
+  uint8_t *filter48 = &transposed_input[16 * 16];
+
+  vp9_transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
+
+  early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
+                                       &filter48[0], src, pitch, b_limit_ptr,
+                                       limit_ptr, thresh_ptr);
+
+  if (0 == early_exit) {
+    early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
+                                   &filter48[0]);
+
+    if (0 == early_exit) {
+      vp9_transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
+    }
+  }
+}
+
+int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
+                                 uint8_t *src_org, int32_t pitch,
+                                 const uint8_t *b_limit_ptr,
+                                 const uint8_t *limit_ptr,
+                                 const uint8_t *thresh_ptr) {
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+  v16u8 flat, mask, hev, thresh, b_limit, limit;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+  v16i8 zero = { 0 };
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
+
+  /* load vector elements */
+  LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  /* flat4 */
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  /* filter4 */
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  if (__msa_test_bz_v(flat)) {
+    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+    ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+    src_org -= 2;
+    ST4x8_UB(vec2, vec3, src_org, pitch);
+    src_org += 8 * pitch;
+    ST4x8_UB(vec4, vec5, src_org, pitch);
+
+    return 1;
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+               q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+    VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                p0_filt8_r, q0_filt8_r);
+    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                q2_filt8_r);
+
+    /* store pixel values */
+    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+    filter48 += (4 * 16);
+    ST_UB2(q1_out, q2_out, filter48, 16);
+    filter48 += (2 * 16);
+    ST_UB(flat, filter48);
+
+    return 0;
+  }
+}
+
+int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+                           uint8_t *filter48) {
+  v16u8 flat, flat2, filter8;
+  v16i8 zero = { 0 };
+  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+  v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+  v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+  v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+  v8i16 l_out, r_out;
+
+  flat = LD_UB(filter48 + 6 * 16);
+
+  LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+  LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+  VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+  if (__msa_test_bz_v(flat2)) {
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+    LD_UB4(filter48, 16, p2, p1, p0, q0);
+    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+    ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+    ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+    src_org -= 3;
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+    src_org += (4 * pitch);
+    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+    src_org += (4 * pitch);
+    ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec5, 0, (src_org + 4), pitch);
+    src_org += (4 * pitch);
+    ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch);
+    ST2x4_UB(vec5, 4, (src_org + 4), pitch);
+
+    return 1;
+  } else {
+    src -= 7 * 16;
+
+    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+               zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+               p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+    tmp0_r = p7_r_in << 3;
+    tmp0_r -= p7_r_in;
+    tmp0_r += p6_r_in;
+    tmp0_r += q0_r_in;
+    tmp1_r = p6_r_in + p5_r_in;
+    tmp1_r += p4_r_in;
+    tmp1_r += p3_r_in;
+    tmp1_r += p2_r_in;
+    tmp1_r += p1_r_in;
+    tmp1_r += p0_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+    ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+               p5_l_in, p4_l_in);
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+               p1_l_in, p0_l_in);
+    q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
+
+    tmp0_l = p7_l_in << 3;
+    tmp0_l -= p7_l_in;
+    tmp0_l += p6_l_in;
+    tmp0_l += q0_l_in;
+    tmp1_l = p6_l_in + p5_l_in;
+    tmp1_l += p4_l_in;
+    tmp1_l += p3_l_in;
+    tmp1_l += p2_l_in;
+    tmp1_l += p1_l_in;
+    tmp1_l += p0_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+    ST_UB(p6, src);
+    src += 16;
+
+    /* p5 */
+    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+    tmp0_r = p5_r_in - p6_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
+    tmp0_l = p5_l_in - p6_l_in;
+    tmp0_l += q1_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+    ST_UB(p5, src);
+    src += 16;
+
+    /* p4 */
+    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+    tmp0_r = p4_r_in - p5_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
+    tmp0_l = p4_l_in - p5_l_in;
+    tmp0_l += q2_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+    ST_UB(p4, src);
+    src += 16;
+
+    /* p3 */
+    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+    tmp0_r = p3_r_in - p4_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
+    tmp0_l = p3_l_in - p4_l_in;
+    tmp0_l += q3_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+    ST_UB(p3, src);
+    src += 16;
+
+    /* p2 */
+    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+    filter8 = LD_UB(filter48);
+    tmp0_r = p2_r_in - p3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
+    tmp0_l = p2_l_in - p3_l_in;
+    tmp0_l += q4_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* p1 */
+    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+    filter8 = LD_UB(filter48 + 16);
+    tmp0_r = p1_r_in - p2_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
+    tmp0_l = p1_l_in - p2_l_in;
+    tmp0_l += q5_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)(tmp1_l), 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* p0 */
+    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+    filter8 = LD_UB(filter48 + 32);
+    tmp0_r = p0_r_in - p1_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
+    tmp0_l = p0_l_in - p1_l_in;
+    tmp0_l += q6_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* q0 */
+    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+    filter8 = LD_UB(filter48 + 48);
+    tmp0_r = q7_r_in - p0_r_in;
+    tmp0_r += q0_r_in;
+    tmp0_r -= p7_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
+    tmp0_l = q7_l_in - p0_l_in;
+    tmp0_l += q0_l_in;
+    tmp0_l -= p7_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* q1 */
+    filter8 = LD_UB(filter48 + 64);
+    tmp0_r = q7_r_in - q0_r_in;
+    tmp0_r += q1_r_in;
+    tmp0_r -= p6_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q0_l_in;
+    tmp0_l += q1_l_in;
+    tmp0_l -= p6_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* q2 */
+    filter8 = LD_UB(filter48 + 80);
+    tmp0_r = q7_r_in - q1_r_in;
+    tmp0_r += q2_r_in;
+    tmp0_r -= p5_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q1_l_in;
+    tmp0_l += q2_l_in;
+    tmp0_l -= p5_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+    ST_UB(filter8, src);
+    src += 16;
+
+    /* q3 */
+    tmp0_r = q7_r_in - q2_r_in;
+    tmp0_r += q3_r_in;
+    tmp0_r -= p4_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q2_l_in;
+    tmp0_l += q3_l_in;
+    tmp0_l -= p4_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+    ST_UB(q3, src);
+    src += 16;
+
+    /* q4 */
+    tmp0_r = q7_r_in - q3_r_in;
+    tmp0_r += q4_r_in;
+    tmp0_r -= p3_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q3_l_in;
+    tmp0_l += q4_l_in;
+    tmp0_l -= p3_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+    ST_UB(q4, src);
+    src += 16;
+
+    /* q5 */
+    tmp0_r = q7_r_in - q4_r_in;
+    tmp0_r += q5_r_in;
+    tmp0_r -= p2_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q4_l_in;
+    tmp0_l += q5_l_in;
+    tmp0_l -= p2_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+    ST_UB(q5, src);
+    src += 16;
+
+    /* q6 */
+    tmp0_r = q7_r_in - q5_r_in;
+    tmp0_r += q6_r_in;
+    tmp0_r -= p1_r_in;
+    tmp1_r += tmp0_r;
+    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+    tmp0_l = q7_l_in - q5_l_in;
+    tmp0_l += q6_l_in;
+    tmp0_l -= p1_l_in;
+    tmp1_l += tmp0_l;
+    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+    ST_UB(q6, src);
+
+    return 0;
+  }
+}
+
+void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
+                                  const uint8_t *b_limit_ptr,
+                                  const uint8_t *limit_ptr,
+                                  const uint8_t *thresh_ptr) {
+  uint8_t early_exit = 0;
+  DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
+  uint8_t *filter48 = &transposed_input[16 * 16];
+
+  vp9_transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
+
+  early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
+                                        &filter48[0], src, pitch, b_limit_ptr,
+                                        limit_ptr, thresh_ptr);
+
+  if (0 == early_exit) {
+    early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
+                                    &filter48[0]);
+
+    if (0 == early_exit) {
+      vp9_transpose_16x16(transposed_input, 16, (src - 8), pitch);
+    }
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
new file mode 100644
index 00000000000..7f691355a0f
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
@@ -0,0 +1,152 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
+                              const uint8_t *b_limit_ptr,
+                              const uint8_t *limit_ptr,
+                              const uint8_t *thresh_ptr,
+                              int32_t count) {
+  uint64_t p1_d, p0_d, q0_d, q1_d;
+  v16u8 mask, hev, flat, thresh, b_limit, limit;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
+
+  (void)count;
+
+  /* load vector elements */
+  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+  p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+  q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+  q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+  SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
+                                   const uint8_t *b_limit0_ptr,
+                                   const uint8_t *limit0_ptr,
+                                   const uint8_t *thresh0_ptr,
+                                   const uint8_t *b_limit1_ptr,
+                                   const uint8_t *limit1_ptr,
+                                   const uint8_t *thresh1_ptr) {
+  v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+
+  /* load vector elements */
+  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+  thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+  thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+  b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+  b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+  b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+  limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+  limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+  limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+               hev, mask, flat);
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+
+  ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
+                            const uint8_t *b_limit_ptr,
+                            const uint8_t *limit_ptr,
+                            const uint8_t *thresh_ptr,
+                            int32_t count) {
+  v16u8 mask, hev, flat, limit, thresh, b_limit;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v8i16 vec0, vec1, vec2, vec3;
+
+  (void)count;
+
+  LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+                     p3, p2, p1, p0, q0, q1, q2, q3);
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+  ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
+  ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+  src -= 2;
+  ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+  src += 4 * pitch;
+  ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+}
+
+void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
+                                 const uint8_t *b_limit0_ptr,
+                                 const uint8_t *limit0_ptr,
+                                 const uint8_t *thresh0_ptr,
+                                 const uint8_t *b_limit1_ptr,
+                                 const uint8_t *limit1_ptr,
+                                 const uint8_t *thresh1_ptr) {
+  v16u8 mask, hev, flat;
+  v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+  v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+  LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+  LD_UB8(src - 4 + (8 * pitch), pitch,
+         row8, row9, row10, row11, row12, row13, row14, row15);
+
+  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                      row8, row9, row10, row11, row12, row13, row14, row15,
+                      p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+  thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+  thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+  b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+  b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+  b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+  limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+  limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+  limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+               hev, mask, flat);
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+  ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+  ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
+  ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+  ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
+
+  src -= 2;
+
+  ST4x8_UB(tmp2, tmp3, src, pitch);
+  src += (8 * pitch);
+  ST4x8_UB(tmp4, tmp5, src, pitch);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
new file mode 100644
index 00000000000..26a858d6e62
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
@@ -0,0 +1,348 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
+                              const uint8_t *b_limit_ptr,
+                              const uint8_t *limit_ptr,
+                              const uint8_t *thresh_ptr,
+                              int32_t count) {
+  uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+  v16u8 mask, hev, flat, thresh, b_limit, limit;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+  v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+  v16i8 zero = { 0 };
+
+  (void)count;
+
+  /* load vector elements */
+  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+  if (__msa_test_bz_v(flat)) {
+    p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+    p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+    q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+    q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+    SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+               q2_r, q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+                p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+                zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+                q0_filter8);
+    PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+    /* store pixel values */
+    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
+    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
+    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
+    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
+    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
+    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
+
+    p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
+    p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+    p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+    q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+    q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+    q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
+
+    src -= 3 * pitch;
+
+    SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
+    src += (4 * pitch);
+    SD(q1_d, src);
+    src += pitch;
+    SD(q2_d, src);
+  }
+}
+
+void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
+                                   const uint8_t *b_limit0,
+                                   const uint8_t *limit0,
+                                   const uint8_t *thresh0,
+                                   const uint8_t *b_limit1,
+                                   const uint8_t *limit1,
+                                   const uint8_t *thresh1) {
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+  v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+  v16u8 zero = { 0 };
+
+  /* load vector elements */
+  LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh0);
+  tmp = (v16u8)__msa_fill_b(*thresh1);
+  thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh);
+
+  b_limit = (v16u8)__msa_fill_b(*b_limit0);
+  tmp = (v16u8)__msa_fill_b(*b_limit1);
+  b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit);
+
+  limit = (v16u8)__msa_fill_b(*limit0);
+  tmp = (v16u8)__msa_fill_b(*limit1);
+  limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  if (__msa_test_bz_v(flat)) {
+    ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+               q2_r, q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+    VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                p0_filt8_r, q0_filt8_r);
+    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                q2_filt8_r);
+
+    /* store pixel values */
+    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+    src -= 3 * pitch;
+
+    ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+    src += (4 * pitch);
+    ST_UB2(q1_out, q2_out, src, pitch);
+    src += (2 * pitch);
+  }
+}
+
+void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
+                            const uint8_t *b_limit_ptr,
+                            const uint8_t *limit_ptr,
+                            const uint8_t *thresh_ptr,
+                            int32_t count) {
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p1_out, p0_out, q0_out, q1_out;
+  v16u8 flat, mask, hev, thresh, b_limit, limit;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v16u8 zero = { 0 };
+  v8i16 vec0, vec1, vec2, vec3, vec4;
+
+  (void)count;
+
+  /* load vector elements */
+  LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+  TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+                     p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+  limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  /* flat4 */
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  /* filter4 */
+  VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+  if (__msa_test_bz_v(flat)) {
+    /* Store 4 pixels p1-_q1 */
+    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+    src -= 2;
+    ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+    src += 4 * pitch;
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+               q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
+                p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                p0_filt8_r, q0_filt8_r);
+    PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
+                q2_filt8_r);
+
+    /* store pixel values */
+    p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+    p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+    p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+    q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+    q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+    q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+    /* Store 6 pixels p2-_q2 */
+    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+    vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
+
+    src -= 3;
+    ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec4, 0, src + 4, pitch);
+    src += (4 * pitch);
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec4, 4, src + 4, pitch);
+  }
+}
+
+void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
+                                 const uint8_t *b_limit0,
+                                 const uint8_t *limit0,
+                                 const uint8_t *thresh0,
+                                 const uint8_t *b_limit1,
+                                 const uint8_t *limit1,
+                                 const uint8_t *thresh1) {
+  uint8_t *temp_src;
+  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+  v16u8 p1_out, p0_out, q0_out, q1_out;
+  v16u8 flat, mask, hev, thresh, b_limit, limit;
+  v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+  v16u8 zero = { 0 };
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+  temp_src = src - 4;
+
+  LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+  temp_src += (8 * pitch);
+  LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+  /* transpose 16x8 matrix into 8x16 */
+  TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+                      q3, q2, q1, q0, row12, row13, row14, row15,
+                      p3, p2, p1, p0, q0, q1, q2, q3);
+
+  thresh = (v16u8)__msa_fill_b(*thresh0);
+  vec0 = (v8i16)__msa_fill_b(*thresh1);
+  thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh);
+
+  b_limit = (v16u8)__msa_fill_b(*b_limit0);
+  vec0 = (v8i16)__msa_fill_b(*b_limit1);
+  b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit);
+
+  limit = (v16u8)__msa_fill_b(*limit0);
+  vec0 = (v8i16)__msa_fill_b(*limit1);
+  limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit);
+
+  /* mask and hev */
+  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+               hev, mask, flat);
+  /* flat4 */
+  VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+  /* filter4 */
+  VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+  if (__msa_test_bz_v(flat)) {
+    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+    ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+    src -= 2;
+    ST4x8_UB(vec2, vec3, src, pitch);
+    src += 8 * pitch;
+    ST4x8_UB(vec4, vec5, src, pitch);
+  } else {
+    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+               zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+               q3_r);
+    VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+
+    /* filter8 */
+    VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+    /* convert 16 bit output data into 8 bit */
+    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                p0_filt8_r, q0_filt8_r);
+    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                q2_filt8_r);
+
+    /* store pixel values */
+    p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+    p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+    p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+    q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+    q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+    q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+    ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+    ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+    src -= 3;
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec2, 0, src + 4, pitch);
+    src += (4 * pitch);
+    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec2, 4, src + 4, pitch);
+    src += (4 * pitch);
+    ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec5, 0, src + 4, pitch);
+    src += (4 * pitch);
+    ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+    ST2x4_UB(vec5, 4, src + 4, pitch);
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h
new file mode 100644
index 00000000000..0643e41a520
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h
@@ -0,0 +1,246 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
+#define VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
+
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in,  \
+                           p1_out, p0_out, q0_out, q1_out) {             \
+  v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                    \
+  v16i8 filt, filt1, filt2, cnst4b, cnst3b;                              \
+  v8i16 q0_sub_p0_r, filt_r, cnst3h;                                     \
+                                                                         \
+  p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                               \
+  p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                               \
+  q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                               \
+  q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                               \
+                                                                         \
+  filt = __msa_subs_s_b(p1_m, q1_m);                                     \
+  filt = filt & (v16i8)hev_in;                                           \
+  q0_sub_p0 = q0_m - p0_m;                                               \
+  filt_sign = __msa_clti_s_b(filt, 0);                                   \
+                                                                         \
+  cnst3h = __msa_ldi_h(3);                                               \
+  q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);               \
+  q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);       \
+  filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                         \
+  filt_r += q0_sub_p0_r;                                                 \
+  filt_r = __msa_sat_s_h(filt_r, 7);                                     \
+                                                                         \
+  /* combine left and right part */                                      \
+  filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r);                    \
+                                                                         \
+  filt = filt & (v16i8)mask_in;                                          \
+  cnst4b = __msa_ldi_b(4);                                               \
+  filt1 = __msa_adds_s_b(filt, cnst4b);                                  \
+  filt1 >>= 3;                                                           \
+                                                                         \
+  cnst3b = __msa_ldi_b(3);                                               \
+  filt2 = __msa_adds_s_b(filt, cnst3b);                                  \
+  filt2 >>= 3;                                                           \
+                                                                         \
+  q0_m = __msa_subs_s_b(q0_m, filt1);                                    \
+  q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                              \
+  p0_m = __msa_adds_s_b(p0_m, filt2);                                    \
+  p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                              \
+                                                                         \
+  filt = __msa_srari_b(filt1, 1);                                        \
+  hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                            \
+  filt = filt & (v16i8)hev_in;                                           \
+                                                                         \
+  q1_m = __msa_subs_s_b(q1_m, filt);                                     \
+  q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                              \
+  p1_m = __msa_adds_s_b(p1_m, filt);                                     \
+  p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                              \
+}
+
+#define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in,  \
+                           p1_out, p0_out, q0_out, q1_out) {             \
+  v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                    \
+  v16i8 filt, filt1, filt2, cnst4b, cnst3b;                              \
+  v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;                \
+                                                                         \
+  p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                               \
+  p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                               \
+  q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                               \
+  q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                               \
+                                                                         \
+  filt = __msa_subs_s_b(p1_m, q1_m);                                     \
+                                                                         \
+  filt = filt & (v16i8)hev_in;                                           \
+                                                                         \
+  q0_sub_p0 = q0_m - p0_m;                                               \
+  filt_sign = __msa_clti_s_b(filt, 0);                                   \
+                                                                         \
+  cnst3h = __msa_ldi_h(3);                                               \
+  q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);               \
+  q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);       \
+  filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                         \
+  filt_r += q0_sub_p0_r;                                                 \
+  filt_r = __msa_sat_s_h(filt_r, 7);                                     \
+                                                                         \
+  q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0);               \
+  q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h);       \
+  filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt);                         \
+  filt_l += q0_sub_p0_l;                                                 \
+  filt_l = __msa_sat_s_h(filt_l, 7);                                     \
+                                                                         \
+  filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r);                    \
+  filt = filt & (v16i8)mask_in;                                          \
+                                                                         \
+  cnst4b = __msa_ldi_b(4);                                               \
+  filt1 = __msa_adds_s_b(filt, cnst4b);                                  \
+  filt1 >>= 3;                                                           \
+                                                                         \
+  cnst3b = __msa_ldi_b(3);                                               \
+  filt2 = __msa_adds_s_b(filt, cnst3b);                                  \
+  filt2 >>= 3;                                                           \
+                                                                         \
+  q0_m = __msa_subs_s_b(q0_m, filt1);                                    \
+  q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                              \
+  p0_m = __msa_adds_s_b(p0_m, filt2);                                    \
+  p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                              \
+                                                                         \
+  filt = __msa_srari_b(filt1, 1);                                        \
+  hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                            \
+  filt = filt & (v16i8)hev_in;                                           \
+                                                                         \
+  q1_m = __msa_subs_s_b(q1_m, filt);                                     \
+  q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                              \
+  p1_m = __msa_adds_s_b(p1_m, filt);                                     \
+  p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                              \
+}
+
+#define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) {  \
+  v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0;         \
+  v16u8 zero_in = { 0 };                                                 \
+                                                                         \
+  tmp = __msa_ori_b(zero_in, 1);                                         \
+  p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                            \
+  q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                            \
+  p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                            \
+  q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                            \
+                                                                         \
+  p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);                 \
+  flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                       \
+  p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);                 \
+  flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                       \
+                                                                         \
+  flat_out = (tmp < (v16u8)flat_out);                                    \
+  flat_out = __msa_xori_b(flat_out, 0xff);                               \
+  flat_out = flat_out & (mask);                                          \
+}
+
+#define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in,  \
+                  q5_in, q6_in, q7_in, flat_in, flat2_out) {        \
+  v16u8 tmp, zero_in = { 0 };                                       \
+  v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0;         \
+  v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0;         \
+                                                                    \
+  tmp = __msa_ori_b(zero_in, 1);                                    \
+  p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in);                       \
+  q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in);                       \
+  p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in);                       \
+  q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in);                       \
+  p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in);                       \
+  q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in);                       \
+  p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in);                       \
+  q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in);                       \
+                                                                    \
+  p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0);            \
+  flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0);              \
+  flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out);                \
+  p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0);            \
+  flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out);                \
+  p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0);            \
+  flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out);                \
+                                                                    \
+  flat2_out = (tmp < (v16u8)flat2_out);                             \
+  flat2_out = __msa_xori_b(flat2_out, 0xff);                        \
+  flat2_out = flat2_out & flat_in;                                  \
+}
+
+#define VP9_FILTER8(p3_in, p2_in, p1_in, p0_in,                  \
+                    q0_in, q1_in, q2_in, q3_in,                  \
+                    p2_filt8_out, p1_filt8_out, p0_filt8_out,    \
+                    q0_filt8_out, q1_filt8_out, q2_filt8_out) {  \
+  v8u16 tmp0, tmp1, tmp2;                                        \
+                                                                 \
+  tmp2 = p2_in + p1_in + p0_in;                                  \
+  tmp0 = p3_in << 1;                                             \
+                                                                 \
+  tmp0 = tmp0 + tmp2 + q0_in;                                    \
+  tmp1 = tmp0 + p3_in + p2_in;                                   \
+  p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);           \
+                                                                 \
+  tmp1 = tmp0 + p1_in + q1_in;                                   \
+  p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);           \
+                                                                 \
+  tmp1 = q2_in + q1_in + q0_in;                                  \
+  tmp2 = tmp2 + tmp1;                                            \
+  tmp0 = tmp2 + (p0_in);                                         \
+  tmp0 = tmp0 + (p3_in);                                         \
+  p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp0, 3);           \
+                                                                 \
+  tmp0 = q2_in + q3_in;                                          \
+  tmp0 = p0_in + tmp1 + tmp0;                                    \
+  tmp1 = q3_in + q3_in;                                          \
+  tmp1 = tmp1 + tmp0;                                            \
+  q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);           \
+                                                                 \
+  tmp0 = tmp2 + q3_in;                                           \
+  tmp1 = tmp0 + q0_in;                                           \
+  q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);           \
+                                                                 \
+  tmp1 = tmp0 - p2_in;                                           \
+  tmp0 = q1_in + q3_in;                                          \
+  tmp1 = tmp0 + tmp1;                                            \
+  q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);           \
+}
+
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in,                 \
+                     q0_in, q1_in, q2_in, q3_in,                 \
+                     limit_in, b_limit_in, thresh_in,            \
+                     hev_out, mask_out, flat_out) {              \
+  v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;  \
+  v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;  \
+                                                                 \
+  /* absolute subtraction of pixel values */                     \
+  p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                   \
+  p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                   \
+  p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                   \
+  q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                   \
+  q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                   \
+  q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                   \
+  p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                   \
+  p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                   \
+                                                                 \
+  /* calculation of hev */                                       \
+  flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);          \
+  hev_out = thresh_in < (v16u8)flat_out;                         \
+                                                                 \
+  /* calculation of mask */                                      \
+  p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);     \
+  p1_asub_q1_m >>= 1;                                            \
+  p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);     \
+                                                                 \
+  mask_out = b_limit_in < p0_asub_q0_m;                          \
+  mask_out = __msa_max_u_b(flat_out, mask_out);                  \
+  p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);      \
+  mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);              \
+  q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);      \
+  mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);              \
+                                                                 \
+  mask_out = limit_in < (v16u8)mask_out;                         \
+  mask_out = __msa_xori_b(mask_out, 0xff);                       \
+}
+#endif  /* VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
index d7aabbb8898..43850758c24 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
@@ -16,852 +16,1971 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 
-#if HAVE_MSA
-/* load macros */
-#define LOAD_UB(psrc) *((const v16u8 *)(psrc))
-#define LOAD_SB(psrc) *((const v16i8 *)(psrc))
-#define LOAD_UH(psrc) *((const v8u16 *)(psrc))
-#define LOAD_SH(psrc) *((const v8i16 *)(psrc))
-#define LOAD_UW(psrc) *((const v4u32 *)(psrc))
-#define LOAD_SW(psrc) *((const v4i32 *)(psrc))
-#define LOAD_UD(psrc) *((const v2u64 *)(psrc))
-#define LOAD_SD(psrc) *((const v2i64 *)(psrc))
-
-/* store macros */
-#define STORE_UB(vec, pdest) *((v16u8 *)(pdest)) = (vec)
-#define STORE_SB(vec, pdest) *((v16i8 *)(pdest)) = (vec)
-#define STORE_UH(vec, pdest) *((v8u16 *)(pdest)) = (vec)
-#define STORE_SH(vec, pdest) *((v8i16 *)(pdest)) = (vec)
-#define STORE_UW(vec, pdest) *((v4u32 *)(pdest)) = (vec)
-#define STORE_SW(vec, pdest) *((v4i32 *)(pdest)) = (vec)
-#define STORE_UD(vec, pdest) *((v2u64 *)(pdest)) = (vec)
-#define STORE_SD(vec, pdest) *((v2i64 *)(pdest)) = (vec)
+#define LD_B(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
+
+#define LD_H(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
+#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
+
+#define LD_W(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+#define ST_SB(...) ST_B(v16i8, __VA_ARGS__)
+
+#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
+
+#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
 
 #if (__mips_isa_rev >= 6)
-#define LOAD_WORD(psrc) ({                         \
-  const uint8_t *src_m = (const uint8_t *)(psrc);  \
-  uint32_t val_m;                                  \
-                                                   \
-  __asm__ __volatile__ (                           \
-      "lw  %[val_m],  %[src_m]  \n\t"              \
-                                                   \
-      : [val_m] "=r" (val_m)                       \
-      : [src_m] "m" (*src_m)                       \
-  );                                               \
-                                                   \
-  val_m;                                           \
+#define LH(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint16_t val_m;                                   \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "lh  %[val_m],  %[psrc_m]  \n\t"              \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
+})
+
+#define LW(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint32_t val_m;                                   \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "lw  %[val_m],  %[psrc_m]  \n\t"              \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
 })
 
 #if (__mips == 64)
-#define LOAD_DWORD(psrc) ({                        \
-  const uint8_t *src_m = (const uint8_t *)(psrc);  \
-  uint64_t val_m = 0;                              \
-                                                   \
-  __asm__ __volatile__ (                           \
-      "ld  %[val_m],  %[src_m]  \n\t"              \
-                                                   \
-      : [val_m] "=r" (val_m)                       \
-      : [src_m] "m" (*src_m)                       \
-  );                                               \
-                                                   \
-  val_m;                                           \
+#define LD(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint64_t val_m = 0;                               \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "ld  %[val_m],  %[psrc_m]  \n\t"              \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
 })
 #else  // !(__mips == 64)
-#define LOAD_DWORD(psrc) ({                                      \
-  const uint8_t *src1_m = (const uint8_t *)(psrc);               \
-  const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4;         \
-  uint32_t val0_m, val1_m;                                       \
-  uint64_t genval_m = 0;                                         \
-                                                                 \
-  __asm__ __volatile__ (                                         \
-      "lw  %[val0_m],  %[src1_m]  \n\t"                          \
-                                                                 \
-      : [val0_m] "=r" (val0_m)                                   \
-      : [src1_m] "m" (*src1_m)                                   \
-  );                                                             \
-                                                                 \
-  __asm__ __volatile__ (                                         \
-      "lw  %[val1_m],  %[src2_m]  \n\t"                          \
-                                                                 \
-      : [val1_m] "=r" (val1_m)                                   \
-      : [src2_m] "m" (*src2_m)                                   \
-  );                                                             \
-                                                                 \
-  genval_m = (uint64_t)(val1_m);                                 \
-  genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000);  \
-  genval_m = (uint64_t)(genval_m | (uint64_t)val0_m);            \
-                                                                 \
-  genval_m;                                                      \
+#define LD(psrc) ({                                        \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);         \
+  uint32_t val0_m, val1_m;                                 \
+  uint64_t val_m = 0;                                      \
+                                                           \
+  val0_m = LW(psrc_m);                                     \
+  val1_m = LW(psrc_m + 4);                                 \
+                                                           \
+  val_m = (uint64_t)(val1_m);                              \
+  val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000);  \
+  val_m = (uint64_t)(val_m | (uint64_t)val0_m);            \
+                                                           \
+  val_m;                                                   \
 })
 #endif  // (__mips == 64)
-#define STORE_WORD_WITH_OFFSET_1(pdst, val) {    \
-  uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1;  \
-  const uint32_t val_m = (val);                  \
-                                                 \
-  __asm__ __volatile__ (                         \
-      "sw  %[val_m],  %[dst_ptr_m]  \n\t"        \
-                                                 \
-      : [dst_ptr_m] "=m" (*dst_ptr_m)            \
-      : [val_m] "r" (val_m)                      \
-  );                                             \
-}
-
-#define STORE_WORD(pdst, val) {            \
-  uint8_t *dst_ptr_m = (uint8_t *)(pdst);  \
-  const uint32_t val_m = (val);            \
-                                           \
-  __asm__ __volatile__ (                   \
-      "sw  %[val_m],  %[dst_ptr_m]  \n\t"  \
-                                           \
-      : [dst_ptr_m] "=m" (*dst_ptr_m)      \
-      : [val_m] "r" (val_m)                \
-  );                                       \
-}
-
-#define STORE_DWORD(pdst, val) {           \
-  uint8_t *dst_ptr_m = (uint8_t *)(pdst);  \
-  const uint64_t val_m = (val);            \
-                                           \
-  __asm__ __volatile__ (                   \
-      "sd  %[val_m],  %[dst_ptr_m]  \n\t"  \
-                                           \
-      : [dst_ptr_m] "=m" (*dst_ptr_m)      \
-      : [val_m] "r" (val_m)                \
-  );                                       \
+
+#define SH(val, pdst) {                 \
+  uint8_t *pdst_m = (uint8_t *)(pdst);  \
+  const uint16_t val_m = (val);         \
+                                        \
+  __asm__ __volatile__ (                \
+      "sh  %[val_m],  %[pdst_m]  \n\t"  \
+                                        \
+      : [pdst_m] "=m" (*pdst_m)         \
+      : [val_m] "r" (val_m)             \
+  );                                    \
+}
+
+#define SW(val, pdst) {                 \
+  uint8_t *pdst_m = (uint8_t *)(pdst);  \
+  const uint32_t val_m = (val);         \
+                                        \
+  __asm__ __volatile__ (                \
+      "sw  %[val_m],  %[pdst_m]  \n\t"  \
+                                        \
+      : [pdst_m] "=m" (*pdst_m)         \
+      : [val_m] "r" (val_m)             \
+  );                                    \
+}
+
+#define SD(val, pdst) {                 \
+  uint8_t *pdst_m = (uint8_t *)(pdst);  \
+  const uint64_t val_m = (val);         \
+                                        \
+  __asm__ __volatile__ (                \
+      "sd  %[val_m],  %[pdst_m]  \n\t"  \
+                                        \
+      : [pdst_m] "=m" (*pdst_m)         \
+      : [val_m] "r" (val_m)             \
+  );                                    \
 }
 #else  // !(__mips_isa_rev >= 6)
-#define LOAD_WORD(psrc) ({                         \
-  const uint8_t *src_m = (const uint8_t *)(psrc);  \
-  uint32_t val_m;                                  \
-                                                   \
-  __asm__ __volatile__ (                           \
-      "ulw  %[val_m],  %[src_m]  \n\t"             \
-                                                   \
-      : [val_m] "=r" (val_m)                       \
-      : [src_m] "m" (*src_m)                       \
-  );                                               \
-                                                   \
-  val_m;                                           \
+#define LH(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint16_t val_m;                                   \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "ulh  %[val_m],  %[psrc_m]  \n\t"             \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
+})
+
+#define LW(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint32_t val_m;                                   \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "ulw  %[val_m],  %[psrc_m]  \n\t"             \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
 })
 
 #if (__mips == 64)
-#define LOAD_DWORD(psrc) ({                        \
-  const uint8_t *src_m = (const uint8_t *)(psrc);  \
-  uint64_t val_m = 0;                              \
-                                                   \
-  __asm__ __volatile__ (                           \
-      "uld  %[val_m],  %[src_m]  \n\t"             \
-                                                   \
-      : [val_m] "=r" (val_m)                       \
-      : [src_m] "m" (*src_m)                       \
-  );                                               \
-                                                   \
-  val_m;                                           \
+#define LD(psrc) ({                                 \
+  const uint8_t *psrc_m = (const uint8_t *)(psrc);  \
+  uint64_t val_m = 0;                               \
+                                                    \
+  __asm__ __volatile__ (                            \
+      "uld  %[val_m],  %[psrc_m]  \n\t"             \
+                                                    \
+      : [val_m] "=r" (val_m)                        \
+      : [psrc_m] "m" (*psrc_m)                      \
+  );                                                \
+                                                    \
+  val_m;                                            \
 })
 #else  // !(__mips == 64)
-#define LOAD_DWORD(psrc) ({                                      \
-  const uint8_t *src1_m = (const uint8_t *)(psrc);               \
-  const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4;         \
-  uint32_t val0_m, val1_m;                                       \
-  uint64_t genval_m = 0;                                         \
-                                                                 \
-  __asm__ __volatile__ (                                         \
-      "ulw  %[val0_m],  %[src1_m]  \n\t"                         \
-                                                                 \
-      : [val0_m] "=r" (val0_m)                                   \
-      : [src1_m] "m" (*src1_m)                                   \
-  );                                                             \
-                                                                 \
-  __asm__ __volatile__ (                                         \
-      "ulw  %[val1_m],  %[src2_m]  \n\t"                         \
-                                                                 \
-      : [val1_m] "=r" (val1_m)                                   \
-      : [src2_m] "m" (*src2_m)                                   \
-  );                                                             \
-                                                                 \
-  genval_m = (uint64_t)(val1_m);                                 \
-  genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000);  \
-  genval_m = (uint64_t)(genval_m | (uint64_t)val0_m);            \
-                                                                 \
-  genval_m;                                                      \
+#define LD(psrc) ({                                        \
+  const uint8_t *psrc_m1 = (const uint8_t *)(psrc);        \
+  uint32_t val0_m, val1_m;                                 \
+  uint64_t val_m = 0;                                      \
+                                                           \
+  val0_m = LW(psrc_m1);                                    \
+  val1_m = LW(psrc_m1 + 4);                                \
+                                                           \
+  val_m = (uint64_t)(val1_m);                              \
+  val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000);  \
+  val_m = (uint64_t)(val_m | (uint64_t)val0_m);            \
+                                                           \
+  val_m;                                                   \
 })
 #endif  // (__mips == 64)
 
-#define STORE_WORD_WITH_OFFSET_1(pdst, val) {    \
-  uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1;  \
-  const uint32_t val_m = (val);                  \
-                                                 \
-  __asm__ __volatile__ (                         \
-      "usw  %[val_m],  %[dst_ptr_m]  \n\t"       \
-                                                 \
-      : [dst_ptr_m] "=m" (*dst_ptr_m)            \
-      : [val_m] "r" (val_m)                      \
-  );                                             \
+#define SH(val, pdst) {                  \
+  uint8_t *pdst_m = (uint8_t *)(pdst);   \
+  const uint16_t val_m = (val);          \
+                                         \
+  __asm__ __volatile__ (                 \
+      "ush  %[val_m],  %[pdst_m]  \n\t"  \
+                                         \
+      : [pdst_m] "=m" (*pdst_m)          \
+      : [val_m] "r" (val_m)              \
+  );                                     \
 }
 
-#define STORE_WORD(pdst, val) {             \
-  uint8_t *dst_ptr_m = (uint8_t *)(pdst);   \
-  const uint32_t val_m = (val);             \
-                                            \
-  __asm__ __volatile__ (                    \
-      "usw  %[val_m],  %[dst_ptr_m]  \n\t"  \
-                                            \
-      : [dst_ptr_m] "=m" (*dst_ptr_m)       \
-      : [val_m] "r" (val_m)                 \
-  );                                        \
+#define SW(val, pdst) {                  \
+  uint8_t *pdst_m = (uint8_t *)(pdst);   \
+  const uint32_t val_m = (val);          \
+                                         \
+  __asm__ __volatile__ (                 \
+      "usw  %[val_m],  %[pdst_m]  \n\t"  \
+                                         \
+      : [pdst_m] "=m" (*pdst_m)          \
+      : [val_m] "r" (val_m)              \
+  );                                     \
 }
 
-#define STORE_DWORD(pdst, val) {                            \
-  uint8_t *dst1_m = (uint8_t *)(pdst);                      \
-  uint8_t *dst2_m = ((uint8_t *)(pdst)) + 4;                \
+#define SD(val, pdst) {                                     \
+  uint8_t *pdst_m1 = (uint8_t *)(pdst);                     \
   uint32_t val0_m, val1_m;                                  \
                                                             \
   val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF);          \
   val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF);  \
                                                             \
-  __asm__ __volatile__ (                                    \
-      "usw  %[val0_m],  %[dst1_m]  \n\t"                    \
-      "usw  %[val1_m],  %[dst2_m]  \n\t"                    \
-                                                            \
-      : [dst1_m] "=m" (*dst1_m), [dst2_m] "=m" (*dst2_m)    \
-      : [val0_m] "r" (val0_m), [val1_m] "r" (val1_m)        \
-  );                                                        \
+  SW(val0_m, pdst_m1);                                      \
+  SW(val1_m, pdst_m1 + 4);                                  \
 }
 #endif  // (__mips_isa_rev >= 6)
 
-#define LOAD_2VECS_UB(psrc, stride,   \
-                      val0, val1) {   \
-  val0 = LOAD_UB(psrc + 0 * stride);  \
-  val1 = LOAD_UB(psrc + 1 * stride);  \
-}
-
-#define LOAD_4VECS_UB(psrc, stride,              \
-                      val0, val1, val2, val3) {  \
-  val0 = LOAD_UB(psrc + 0 * stride);             \
-  val1 = LOAD_UB(psrc + 1 * stride);             \
-  val2 = LOAD_UB(psrc + 2 * stride);             \
-  val3 = LOAD_UB(psrc + 3 * stride);             \
-}
-
-#define LOAD_4VECS_SB(psrc, stride,              \
-                      val0, val1, val2, val3) {  \
-  val0 = LOAD_SB(psrc + 0 * stride);             \
-  val1 = LOAD_SB(psrc + 1 * stride);             \
-  val2 = LOAD_SB(psrc + 2 * stride);             \
-  val3 = LOAD_SB(psrc + 3 * stride);             \
-}
-
-#define LOAD_5VECS_UB(psrc, stride,                    \
-                      out0, out1, out2, out3, out4) {  \
-  LOAD_4VECS_UB((psrc), (stride),                      \
-                (out0), (out1), (out2), (out3));       \
-  out4 = LOAD_UB(psrc + 4 * stride);                   \
+/* Description : Load 4 words with stride
+   Arguments   : Inputs  - psrc    (source pointer to load from)
+                         - stride
+                 Outputs - out0, out1, out2, out3
+   Details     : Loads word in 'out0' from (psrc)
+                 Loads word in 'out1' from (psrc + stride)
+                 Loads word in 'out2' from (psrc + 2 * stride)
+                 Loads word in 'out3' from (psrc + 3 * stride)
+*/
+#define LW4(psrc, stride, out0, out1, out2, out3) {  \
+  out0 = LW((psrc));                                 \
+  out1 = LW((psrc) + stride);                        \
+  out2 = LW((psrc) + 2 * stride);                    \
+  out3 = LW((psrc) + 3 * stride);                    \
+}
+
+/* Description : Load double words with stride
+   Arguments   : Inputs  - psrc    (source pointer to load from)
+                         - stride
+                 Outputs - out0, out1
+   Details     : Loads double word in 'out0' from (psrc)
+                 Loads double word in 'out1' from (psrc + stride)
+*/
+#define LD2(psrc, stride, out0, out1) {  \
+  out0 = LD((psrc));                     \
+  out1 = LD((psrc) + stride);            \
+}
+#define LD4(psrc, stride, out0, out1, out2, out3) {  \
+  LD2((psrc), stride, out0, out1);                   \
+  LD2((psrc) + 2 * stride, stride, out2, out3);      \
+}
+
+/* Description : Store 4 words with stride
+   Arguments   : Inputs  - in0, in1, in2, in3, pdst, stride
+   Details     : Stores word from 'in0' to (pdst)
+                 Stores word from 'in1' to (pdst + stride)
+                 Stores word from 'in2' to (pdst + 2 * stride)
+                 Stores word from 'in3' to (pdst + 3 * stride)
+*/
+#define SW4(in0, in1, in2, in3, pdst, stride) {  \
+  SW(in0, (pdst))                                \
+  SW(in1, (pdst) + stride);                      \
+  SW(in2, (pdst) + 2 * stride);                  \
+  SW(in3, (pdst) + 3 * stride);                  \
+}
+
+/* Description : Store 4 double words with stride
+   Arguments   : Inputs  - in0, in1, in2, in3, pdst, stride
+   Details     : Stores double word from 'in0' to (pdst)
+                 Stores double word from 'in1' to (pdst + stride)
+                 Stores double word from 'in2' to (pdst + 2 * stride)
+                 Stores double word from 'in3' to (pdst + 3 * stride)
+*/
+#define SD4(in0, in1, in2, in3, pdst, stride) {  \
+  SD(in0, (pdst))                                \
+  SD(in1, (pdst) + stride);                      \
+  SD(in2, (pdst) + 2 * stride);                  \
+  SD(in3, (pdst) + 3 * stride);                  \
+}
+
+/* Description : Load vectors with 16 byte elements with stride
+   Arguments   : Inputs  - psrc    (source pointer to load from)
+                         - stride
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Loads 16 byte elements in 'out0' from (psrc)
+                 Loads 16 byte elements in 'out1' from (psrc + stride)
+*/
+#define LD_B2(RTYPE, psrc, stride, out0, out1) {  \
+  out0 = LD_B(RTYPE, (psrc));                     \
+  out1 = LD_B(RTYPE, (psrc) + stride);            \
+}
+#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
+
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) {  \
+  LD_B2(RTYPE, (psrc), stride, out0, out1);                   \
+  LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3);     \
+}
+#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
+
+#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) {  \
+  LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3);             \
+  out4 = LD_B(RTYPE, (psrc) + 4 * stride);                          \
+}
+#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
+#define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)
+
+#define LD_B7(RTYPE, psrc, stride,                             \
+              out0, out1, out2, out3, out4, out5, out6) {      \
+  LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4);  \
+  LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6);       \
+}
+#define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
+
+#define LD_B8(RTYPE, psrc, stride,                                    \
+              out0, out1, out2, out3, out4, out5, out6, out7) {       \
+  LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3);               \
+  LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7);  \
+}
+#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
+#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
+
+/* Description : Load vectors with 8 halfword elements with stride
+   Arguments   : Inputs  - psrc    (source pointer to load from)
+                         - stride
+                 Outputs - out0, out1
+   Details     : Loads 8 halfword elements in 'out0' from (psrc)
+                 Loads 8 halfword elements in 'out1' from (psrc + stride)
+*/
+#define LD_H2(RTYPE, psrc, stride, out0, out1) {  \
+  out0 = LD_H(RTYPE, (psrc));                     \
+  out1 = LD_H(RTYPE, (psrc) + (stride));          \
+}
+#define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__)
+
+#define LD_H4(RTYPE, psrc, stride, out0, out1, out2, out3) {  \
+  LD_H2(RTYPE, (psrc), stride, out0, out1);                   \
+  LD_H2(RTYPE, (psrc) + 2 * stride, stride, out2, out3);      \
+}
+#define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__)
+
+#define LD_H8(RTYPE, psrc, stride,                                    \
+              out0, out1, out2, out3, out4, out5, out6, out7) {       \
+  LD_H4(RTYPE, (psrc), stride, out0, out1, out2, out3);               \
+  LD_H4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7);  \
+}
+#define LD_SH8(...) LD_H8(v8i16, __VA_ARGS__)
+
+#define LD_H16(RTYPE, psrc, stride,                                     \
+               out0, out1, out2, out3, out4, out5, out6, out7,          \
+               out8, out9, out10, out11, out12, out13, out14, out15) {  \
+  LD_H8(RTYPE, (psrc), stride,                                          \
+        out0, out1, out2, out3, out4, out5, out6, out7);                \
+  LD_H8(RTYPE, (psrc) + 8 * stride, stride,                             \
+        out8, out9, out10, out11, out12, out13, out14, out15);          \
+}
+#define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
+
+/* Description : Load as 4x4 block of signed halfword elements from 1D source
+                 data into 4 vectors (Each vector with 4 signed halfwords)
+   Arguments   : Inputs  - psrc
+                 Outputs - out0, out1, out2, out3
+*/
+#define LD4x4_SH(psrc, out0, out1, out2, out3) {         \
+  out0 = LD_SH(psrc);                                    \
+  out2 = LD_SH(psrc + 8);                                \
+  out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0);  \
+  out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2);  \
+}
+
+/* Description : Load 2 vectors of signed word elements with stride
+   Arguments   : Inputs  - psrc    (source pointer to load from)
+                         - stride
+                 Outputs - out0, out1
+                 Return Type - signed word
+*/
+#define LD_SW2(psrc, stride, out0, out1) {  \
+  out0 = LD_SW((psrc));                     \
+  out1 = LD_SW((psrc) + stride);            \
+}
+
+/* Description : Store vectors of 16 byte elements with stride
+   Arguments   : Inputs  - in0, in1, stride
+                 Outputs - pdst    (destination pointer to store to)
+   Details     : Stores 16 byte elements from 'in0' to (pdst)
+                 Stores 16 byte elements from 'in1' to (pdst + stride)
+*/
+#define ST_B2(RTYPE, in0, in1, pdst, stride) {  \
+  ST_B(RTYPE, in0, (pdst));                     \
+  ST_B(RTYPE, in1, (pdst) + stride);            \
+}
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) {  \
+  ST_B2(RTYPE, in0, in1, (pdst), stride);                 \
+  ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride);    \
+}
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+
+#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,      \
+              pdst, stride) {                                     \
+  ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride);                 \
+  ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride);  \
+}
+#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
+
+/* Description : Store vectors of 8 halfword elements with stride
+   Arguments   : Inputs  - in0, in1, stride
+                 Outputs - pdst    (destination pointer to store to)
+   Details     : Stores 8 halfword elements from 'in0' to (pdst)
+                 Stores 8 halfword elements from 'in1' to (pdst + stride)
+*/
+#define ST_H2(RTYPE, in0, in1, pdst, stride) {  \
+  ST_H(RTYPE, in0, (pdst));                     \
+  ST_H(RTYPE, in1, (pdst) + stride);            \
+}
+#define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__)
+
+#define ST_H4(RTYPE, in0, in1, in2, in3, pdst, stride) {  \
+  ST_H2(RTYPE, in0, in1, (pdst), stride);                 \
+  ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride);    \
+}
+#define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)
+
+#define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) {  \
+  ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride);                           \
+  ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride);              \
+}
+#define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)
+
+/* Description : Store vectors of word elements with stride
+   Arguments   : Inputs  - in0, in1, stride
+                         - pdst    (destination pointer to store to)
+   Details     : Store 4 word elements from 'in0' to (pdst)
+                 Store 4 word elements from 'in1' to (pdst + stride)
+*/
+#define ST_SW2(in0, in1, pdst, stride) {  \
+  ST_SW(in0, (pdst));                     \
+  ST_SW(in1, (pdst) + stride);            \
+}
+
+/* Description : Store as 2x4 byte block to destination memory from input vector
+   Arguments   : Inputs  - in, stidx, pdst, stride
+                 Return Type - unsigned byte
+   Details     : Index stidx halfword element from 'in' vector is copied and
+                 stored on first line
+                 Index stidx+1 halfword element from 'in' vector is copied and
+                 stored on second line
+                 Index stidx+2 halfword element from 'in' vector is copied and
+                 stored on third line
+                 Index stidx+3 halfword element from 'in' vector is copied and
+                 stored on fourth line
+*/
+#define ST2x4_UB(in, stidx, pdst, stride) {         \
+  uint16_t out0_m, out1_m, out2_m, out3_m;          \
+  uint8_t *pblk_2x4_m = (uint8_t *)(pdst);          \
+                                                    \
+  out0_m = __msa_copy_u_h((v8i16)in, (stidx));      \
+  out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1));  \
+  out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2));  \
+  out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3));  \
+                                                    \
+  SH(out0_m, pblk_2x4_m);                           \
+  SH(out1_m, pblk_2x4_m + stride);                  \
+  SH(out2_m, pblk_2x4_m + 2 * stride);              \
+  SH(out3_m, pblk_2x4_m + 3 * stride);              \
+}
+
+/* Description : Store 4x2 byte block to destination memory from input vector
+   Arguments   : Inputs  - in, pdst, stride
+   Details     : Index 0 word element from 'in' vector is copied to a GP
+                 register and stored to (pdst)
+                 Index 1 word element from 'in' vector is copied to a GP
+                 register and stored to (pdst + stride)
+*/
+#define ST4x2_UB(in, pdst, stride) {        \
+  uint32_t out0_m, out1_m;                  \
+  uint8_t *pblk_4x2_m = (uint8_t *)(pdst);  \
+                                            \
+  out0_m = __msa_copy_u_w((v4i32)in, 0);    \
+  out1_m = __msa_copy_u_w((v4i32)in, 1);    \
+                                            \
+  SW(out0_m, pblk_4x2_m);                   \
+  SW(out1_m, pblk_4x2_m + stride);          \
+}
+
+/* Description : Store as 4x4 byte block to destination memory from input vector
+   Arguments   : Inputs  - in0, in1, pdst, stride
+                 Return Type - unsigned byte
+   Details     : Idx0 word element from input vector 'in0' is copied and stored
+                 on first line
+                 Idx1 word element from input vector 'in0' is copied and stored
+                 on second line
+                 Idx2 word element from input vector 'in1' is copied and stored
+                 on third line
+                 Idx3 word element from input vector 'in1' is copied and stored
+                 on fourth line
+*/
+#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) {  \
+  uint32_t out0_m, out1_m, out2_m, out3_m;                          \
+  uint8_t *pblk_4x4_m = (uint8_t *)(pdst);                          \
+                                                                    \
+  out0_m = __msa_copy_u_w((v4i32)in0, idx0);                        \
+  out1_m = __msa_copy_u_w((v4i32)in0, idx1);                        \
+  out2_m = __msa_copy_u_w((v4i32)in1, idx2);                        \
+  out3_m = __msa_copy_u_w((v4i32)in1, idx3);                        \
+                                                                    \
+  SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride);          \
+}
+#define ST4x8_UB(in0, in1, pdst, stride) {                        \
+  uint8_t *pblk_4x8 = (uint8_t *)(pdst);                          \
+                                                                  \
+  ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride);               \
+  ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride);  \
 }
 
-#define LOAD_5VECS_SB(psrc, stride,                    \
-                      out0, out1, out2, out3, out4) {  \
-  LOAD_4VECS_SB((psrc), (stride),                      \
-                (out0), (out1), (out2), (out3));       \
-  out4 = LOAD_SB(psrc + 4 * stride);                   \
-}
+/* Description : Store as 8x1 byte block to destination memory from input vector
+   Arguments   : Inputs  - in, pdst
+   Details     : Index 0 double word element from input vector 'in' is copied
+                 and stored to destination memory at (pdst)
+*/
+#define ST8x1_UB(in, pdst) {              \
+  uint64_t out0_m;                        \
+                                          \
+  out0_m = __msa_copy_u_d((v2i64)in, 0);  \
+  SD(out0_m, pdst);                       \
+}
 
-#define LOAD_7VECS_SB(psrc, stride,            \
-                      val0, val1, val2, val3,  \
-                      val4, val5, val6) {      \
-  val0 = LOAD_SB((psrc) + 0 * (stride));       \
-  val1 = LOAD_SB((psrc) + 1 * (stride));       \
-  val2 = LOAD_SB((psrc) + 2 * (stride));       \
-  val3 = LOAD_SB((psrc) + 3 * (stride));       \
-  val4 = LOAD_SB((psrc) + 4 * (stride));       \
-  val5 = LOAD_SB((psrc) + 5 * (stride));       \
-  val6 = LOAD_SB((psrc) + 6 * (stride));       \
-}
-
-#define LOAD_8VECS_UB(psrc, stride,               \
-                      out0, out1, out2, out3,     \
-                      out4, out5, out6, out7) {   \
-  LOAD_4VECS_UB((psrc), (stride),                 \
-                (out0), (out1), (out2), (out3));  \
-  LOAD_4VECS_UB((psrc + 4 * stride), (stride),    \
-                (out4), (out5), (out6), (out7));  \
-}
-
-#define LOAD_8VECS_SB(psrc, stride,               \
-                      out0, out1, out2, out3,     \
-                      out4, out5, out6, out7) {   \
-  LOAD_4VECS_SB((psrc), (stride),                 \
-                (out0), (out1), (out2), (out3));  \
-  LOAD_4VECS_SB((psrc + 4 * stride), (stride),    \
-                (out4), (out5), (out6), (out7));  \
-}
-
-#define LOAD_2VECS_SH(psrc, stride,       \
-                      val0, val1) {       \
-  val0 = LOAD_SH((psrc) + 0 * (stride));  \
-  val1 = LOAD_SH((psrc) + 1 * (stride));  \
-}
-
-#define LOAD_4VECS_SH(psrc, stride,                          \
-                      val0, val1, val2, val3) {              \
-  LOAD_2VECS_SH((psrc), (stride), val0, val1);               \
-  LOAD_2VECS_SH((psrc + 2 * stride), (stride), val2, val3);  \
-}
-
-#define LOAD_8VECS_SH(psrc, stride,              \
-                      val0, val1, val2, val3,    \
-                      val4, val5, val6, val7) {  \
-  LOAD_4VECS_SH((psrc), (stride),                \
-                val0, val1, val2, val3);         \
-  LOAD_4VECS_SH((psrc + 4 * stride), (stride),   \
-                val4, val5, val6, val7);         \
-}
-
-#define LOAD_16VECS_SH(psrc, stride,                  \
-                       val0, val1, val2, val3,        \
-                       val4, val5, val6, val7,        \
-                       val8, val9, val10, val11,      \
-                       val12, val13, val14, val15) {  \
-  LOAD_8VECS_SH((psrc), (stride),                     \
-                val0, val1, val2, val3,               \
-                val4, val5, val6, val7);              \
-  LOAD_8VECS_SH((psrc + 8 * (stride)), (stride),      \
-                val8, val9, val10, val11,             \
-                val12, val13, val14, val15);          \
-}
-
-#define STORE_4VECS_UB(dst_out, pitch,         \
-                       in0, in1, in2, in3) {   \
-  STORE_UB((in0), (dst_out));                  \
-  STORE_UB((in1), ((dst_out) + (pitch)));      \
-  STORE_UB((in2), ((dst_out) + 2 * (pitch)));  \
-  STORE_UB((in3), ((dst_out) + 3 * (pitch)));  \
-}
-
-#define STORE_8VECS_UB(dst_out, pitch_in,               \
-                       in0, in1, in2, in3,              \
-                       in4, in5, in6, in7) {            \
-  STORE_4VECS_UB(dst_out, pitch_in,                     \
-                 in0, in1, in2, in3);                   \
-  STORE_4VECS_UB((dst_out + 4 * (pitch_in)), pitch_in,  \
-                 in4, in5, in6, in7);                   \
-}
-
-#define VEC_INSERT_4W_UB(src, src0, src1, src2, src3) {  \
-  src = (v16u8)__msa_insert_w((v4i32)(src), 0, (src0));  \
-  src = (v16u8)__msa_insert_w((v4i32)(src), 1, (src1));  \
-  src = (v16u8)__msa_insert_w((v4i32)(src), 2, (src2));  \
-  src = (v16u8)__msa_insert_w((v4i32)(src), 3, (src3));  \
-}
-
-#define VEC_INSERT_2DW_UB(src, src0, src1) {             \
-  src = (v16u8)__msa_insert_d((v2i64)(src), 0, (src0));  \
-  src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1));  \
-}
-
-#define STORE_4VECS_SH(ptr, stride,           \
-                       in0, in1, in2, in3) {  \
-  STORE_SH(in0, ((ptr) + 0 * stride));        \
-  STORE_SH(in1, ((ptr) + 1 * stride));        \
-  STORE_SH(in2, ((ptr) + 2 * stride));        \
-  STORE_SH(in3, ((ptr) + 3 * stride));        \
-}
-
-#define STORE_8VECS_SH(ptr, stride,           \
-                       in0, in1, in2, in3,    \
-                       in4, in5, in6, in7) {  \
-  STORE_SH(in0, ((ptr) + 0 * stride));        \
-  STORE_SH(in1, ((ptr) + 1 * stride));        \
-  STORE_SH(in2, ((ptr) + 2 * stride));        \
-  STORE_SH(in3, ((ptr) + 3 * stride));        \
-  STORE_SH(in4, ((ptr) + 4 * stride));        \
-  STORE_SH(in5, ((ptr) + 5 * stride));        \
-  STORE_SH(in6, ((ptr) + 6 * stride));        \
-  STORE_SH(in7, ((ptr) + 7 * stride));        \
-}
-
-#define CLIP_UNSIGNED_CHAR_H(in) ({                   \
+/* Description : Store as 8x2 byte block to destination memory from input vector
+   Arguments   : Inputs  - in, pdst, stride
+   Details     : Index 0 double word element from input vector 'in' is copied
+                 and stored to destination memory at (pdst)
+                 Index 1 double word element from input vector 'in' is copied
+                 and stored to destination memory at (pdst + stride)
+*/
+#define ST8x2_UB(in, pdst, stride) {        \
+  uint64_t out0_m, out1_m;                  \
+  uint8_t *pblk_8x2_m = (uint8_t *)(pdst);  \
+                                            \
+  out0_m = __msa_copy_u_d((v2i64)in, 0);    \
+  out1_m = __msa_copy_u_d((v2i64)in, 1);    \
+                                            \
+  SD(out0_m, pblk_8x2_m);                   \
+  SD(out1_m, pblk_8x2_m + stride);          \
+}
+
+/* Description : Store as 8x4 byte block to destination memory from input
+                 vectors
+   Arguments   : Inputs  - in0, in1, pdst, stride
+   Details     : Index 0 double word element from input vector 'in0' is copied
+                 and stored to destination memory at (pblk_8x4_m)
+                 Index 1 double word element from input vector 'in0' is copied
+                 and stored to destination memory at (pblk_8x4_m + stride)
+                 Index 0 double word element from input vector 'in1' is copied
+                 and stored to destination memory at (pblk_8x4_m + 2 * stride)
+                 Index 1 double word element from input vector 'in1' is copied
+                 and stored to destination memory at (pblk_8x4_m + 3 * stride)
+*/
+#define ST8x4_UB(in0, in1, pdst, stride) {                  \
+  uint64_t out0_m, out1_m, out2_m, out3_m;                  \
+  uint8_t *pblk_8x4_m = (uint8_t *)(pdst);                  \
+                                                            \
+  out0_m = __msa_copy_u_d((v2i64)in0, 0);                   \
+  out1_m = __msa_copy_u_d((v2i64)in0, 1);                   \
+  out2_m = __msa_copy_u_d((v2i64)in1, 0);                   \
+  out3_m = __msa_copy_u_d((v2i64)in1, 1);                   \
+                                                            \
+  SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride);  \
+}
+
+/* Description : average with rounding (in0 + in1 + 1) / 2.
+   Arguments   : Inputs  - in0, in1, in2, in3,
+                 Outputs - out0, out1
+                 Return Type - signed byte
+   Details     : Each byte element from 'in0' vector is added with each byte
+                 element from 'in1' vector. The addition of the elements plus 1
+                (for rounding) is done unsigned with full precision,
+                i.e. the result has one extra bit. Unsigned division by 2
+                (or logical shift right by one bit) is performed before writing
+                the result to vector 'out0'
+                Similar for the pair of 'in2' and 'in3'
+*/
+#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1);    \
+  out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3);    \
+}
+#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
+
+#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                 out0, out1, out2, out3) {                       \
+  AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1)                \
+  AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3)                \
+}
+#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
+
+/* Description : Immediate number of columns to slide with zero
+   Arguments   : Inputs  - in0, in1, slide_val
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Byte elements from 'zero_m' vector are slide into 'in0' by
+                 number of elements specified by 'slide_val'
+*/
+#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) {          \
+  v16i8 zero_m = { 0 };                                              \
+  out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val);  \
+  out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val);  \
+}
+#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
+
+#define SLDI_B4_0(RTYPE, in0, in1, in2, in3,            \
+                  out0, out1, out2, out3, slide_val) {  \
+  SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);    \
+  SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);    \
+}
+#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
+
+/* Description : Immediate number of columns to slide
+   Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Byte elements from 'in0_0' vector are slide into 'in1_0' by
+                 number of elements specified by 'slide_val'
+*/
+#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) {  \
+  out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val);         \
+  out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val);         \
+}
+#define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
+
+#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2,      \
+                out0, out1, out2, slide_val) {                        \
+  SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val)   \
+  out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val);  \
+}
+#define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
+#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
+
+/* Description : Shuffle byte vector elements as per mask vector
+   Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Selective byte elements from in0 & in1 are copied to out0 as
+                 per control vector mask0
+                 Selective byte elements from in2 & in3 are copied to out1 as
+                 per control vector mask1
+*/
+#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) {  \
+  out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0);     \
+  out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2);     \
+}
+#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
+#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
+#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
+
+#define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3,     \
+                out0, out1, out2, out3) {                        \
+  VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1);  \
+  VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3);  \
+}
+#define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__)
+#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product of byte vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - unsigned halfword
+   Details     : Unsigned byte elements from mult0 are multiplied with
+                 unsigned byte elements from cnst0 producing a result
+                 twice the size of input i.e. unsigned halfword.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added together and stored to the out vector
+                 (2 unsigned halfword results)
+*/
+#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {  \
+  out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0);        \
+  out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1);        \
+}
+#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
+
+#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3,         \
+                 cnst0, cnst1, cnst2, cnst3,                \
+                 out0, out1, out2, out3) {                  \
+  DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);  \
+  DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);  \
+}
+#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
+
+/* Description : Dot product of byte vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - signed halfword
+   Details     : Signed byte elements from mult0 are multiplied with
+                 signed byte elements from cnst0 producing a result
+                 twice the size of input i.e. signed halfword.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added together and stored to the out vector
+                 (2 signed halfword results)
+*/
+#define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {  \
+  out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0);        \
+  out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1);        \
+}
+#define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__)
+
+#define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3,                     \
+                 cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) {  \
+  DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);              \
+  DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);              \
+}
+#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product of halfword vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - signed word
+   Details     : Signed halfword elements from mult0 are multiplied with
+                 signed halfword elements from cnst0 producing a result
+                 twice the size of input i.e. signed word.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added together and stored to the out vector
+                 (2 signed word results)
+*/
+#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {  \
+  out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0);        \
+  out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1);        \
+}
+#define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__)
+
+#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3,         \
+                 cnst0, cnst1, cnst2, cnst3,                \
+                 out0, out1, out2, out3) {                  \
+  DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);  \
+  DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);  \
+}
+#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
+
+/* Description : Dot product of word vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - signed word
+   Details     : Signed word elements from mult0 are multiplied with
+                 signed word elements from cnst0 producing a result
+                 twice the size of input i.e. signed double word.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added together and stored to the out vector
+                 (2 signed double word results)
+*/
+#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {  \
+  out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0);        \
+  out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1);        \
+}
+#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
+
+/* Description : Dot product & addition of byte vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - signed halfword
+   Details     : Signed byte elements from mult0 are multiplied with
+                 signed byte elements from cnst0 producing a result
+                 twice the size of input i.e. signed halfword.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added to the out vector
+                 (2 signed halfword results)
+*/
+#define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {         \
+  out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0);  \
+  out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1);  \
+}
+#define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__)
+
+#define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3,                     \
+                  cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) {  \
+  DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);              \
+  DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);              \
+}
+#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product & addition of halfword vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Signed halfword elements from 'mult0' are multiplied with
+                 signed halfword elements from 'cnst0' producing a result
+                 twice the size of input i.e. signed word.
+                 The multiplication result of adjacent odd-even elements
+                 are added to the 'out0' vector
+*/
+#define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) {         \
+  out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0);  \
+  out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1);  \
+}
+#define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__)
+
+/* Description : Dot product & addition of double word vector elements
+   Arguments   : Inputs  - mult0, mult1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each signed word element from 'mult0' is multiplied with itself
+                 producing an intermediate result twice the size of input
+                 i.e. signed double word
+                 The multiplication result of adjacent odd-even elements
+                 are added to the 'out0' vector
+*/
+#define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) {                       \
+  out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0);  \
+  out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1);  \
+}
+#define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__)
+
+/* Description : Minimum values between unsigned elements of
+                 either vector are copied to the output vector
+   Arguments   : Inputs  - in0, in1, min_vec
+                 Outputs - in0, in1, (in place)
+                 Return Type - unsigned halfword
+   Details     : Minimum of unsigned halfword element values from 'in0' and
+                 'min_value' are written to output vector 'in0'
+*/
+#define MIN_UH2(RTYPE, in0, in1, min_vec) {         \
+  in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec);  \
+  in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec);  \
+}
+#define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
+
+#define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) {  \
+  MIN_UH2(RTYPE, in0, in1, min_vec);                   \
+  MIN_UH2(RTYPE, in2, in3, min_vec);                   \
+}
+#define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__)
+
+/* Description : Clips all signed halfword elements of input vector
+                 between 0 & 255
+   Arguments   : Inputs  - in       (input vector)
+                 Outputs - out_m    (output vector with clipped elements)
+                 Return Type - signed halfword
+*/
+#define CLIP_SH_0_255(in) ({                          \
   v8i16 max_m = __msa_ldi_h(255);                     \
   v8i16 out_m;                                        \
                                                       \
-  out_m = __msa_maxi_s_h((v8i16)(in), 0);             \
+  out_m = __msa_maxi_s_h((v8i16)in, 0);               \
   out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m);  \
   out_m;                                              \
 })
+#define CLIP_SH2_0_255(in0, in1) {  \
+  in0 = CLIP_SH_0_255(in0);         \
+  in1 = CLIP_SH_0_255(in1);         \
+}
+#define CLIP_SH4_0_255(in0, in1, in2, in3) {  \
+  CLIP_SH2_0_255(in0, in1);                   \
+  CLIP_SH2_0_255(in2, in3);                   \
+}
 
-/* halfword 8x8 transpose macro */
-#define TRANSPOSE8x8_H_SH(in0, in1, in2, in3,                 \
-                          in4, in5, in6, in7,                 \
-                          out0, out1, out2, out3,             \
-                          out4, out5, out6, out7) {           \
-  v8i16 s0_m, s1_m;                                           \
-  v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                       \
-  v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                       \
-                                                              \
-  s0_m = __msa_ilvr_h((v8i16)(in6), (v8i16)(in4));            \
-  s1_m = __msa_ilvr_h((v8i16)(in7), (v8i16)(in5));            \
-  tmp0_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m);            \
-  tmp1_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m);            \
-                                                              \
-  s0_m = __msa_ilvl_h((v8i16)(in6), (v8i16)(in4));            \
-  s1_m = __msa_ilvl_h((v8i16)(in7), (v8i16)(in5));            \
-  tmp2_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m);            \
-  tmp3_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m);            \
-                                                              \
-  s0_m = __msa_ilvr_h((v8i16)(in2), (v8i16)(in0));            \
-  s1_m = __msa_ilvr_h((v8i16)(in3), (v8i16)(in1));            \
-  tmp4_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m);            \
-  tmp5_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m);            \
-                                                              \
-  s0_m = __msa_ilvl_h((v8i16)(in2), (v8i16)(in0));            \
-  s1_m = __msa_ilvl_h((v8i16)(in3), (v8i16)(in1));            \
-  tmp6_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m);            \
-  tmp7_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m);            \
-                                                              \
-  out0 = (v8i16)__msa_pckev_d((v2i64)tmp0_m, (v2i64)tmp4_m);  \
-  out1 = (v8i16)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m);  \
-  out2 = (v8i16)__msa_pckev_d((v2i64)tmp1_m, (v2i64)tmp5_m);  \
-  out3 = (v8i16)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m);  \
-  out4 = (v8i16)__msa_pckev_d((v2i64)tmp2_m, (v2i64)tmp6_m);  \
-  out5 = (v8i16)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m);  \
-  out6 = (v8i16)__msa_pckev_d((v2i64)tmp3_m, (v2i64)tmp7_m);  \
-  out7 = (v8i16)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m);  \
-}
-
-/* interleave macros */
-/* no in-place support */
-#define ILV_B_LRLR_UB(in0, in1, in2, in3,                  \
-                      out0, out1, out2, out3) {            \
-  out0 = (v16u8)__msa_ilvl_b((v16i8)(in1), (v16i8)(in0));  \
-  out1 = (v16u8)__msa_ilvr_b((v16i8)(in1), (v16i8)(in0));  \
-  out2 = (v16u8)__msa_ilvl_b((v16i8)(in3), (v16i8)(in2));  \
-  out3 = (v16u8)__msa_ilvr_b((v16i8)(in3), (v16i8)(in2));  \
-}
-
-#define ILV_H_LRLR_SH(in0, in1, in2, in3,           \
-                      out0, out1, out2, out3) {     \
-  out0 = __msa_ilvl_h((v8i16)(in1), (v8i16)(in0));  \
-  out1 = __msa_ilvr_h((v8i16)(in1), (v8i16)(in0));  \
-  out2 = __msa_ilvl_h((v8i16)(in3), (v8i16)(in2));  \
-  out3 = __msa_ilvr_h((v8i16)(in3), (v8i16)(in2));  \
-}
-
-#define ILV_H_LR_SH(in0, in1, out0, out1) {         \
-  out0 = __msa_ilvl_h((v8i16)(in1), (v8i16)(in0));  \
-  out1 = __msa_ilvr_h((v8i16)(in1), (v8i16)(in0));  \
-}
-
-#define ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l,            \
-                        out0, out1) {                          \
-  out0 = (v16u8)__msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r));  \
-  out1 = (v16u8)__msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r));  \
-}
-
-#define ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,     \
-                        out0, out1) {                   \
-  out0 = __msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r));  \
-  out1 = __msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r));  \
-}
-
-#define ILVR_B_4VECS_UB(in0_r, in1_r, in2_r, in3_r,  \
-                        in0_l, in1_l, in2_l, in3_l,  \
-                        out0, out1, out2, out3) {    \
-  ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l,        \
-                  out0, out1);                       \
-  ILVR_B_2VECS_UB(in2_r, in3_r, in2_l, in3_l,        \
-                  out2, out3);                       \
-}
-
-#define ILVR_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r,  \
-                        in0_l, in1_l, in2_l, in3_l,  \
-                        out0, out1, out2, out3) {    \
-  ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,        \
-                  out0, out1);                       \
-  ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l,        \
-                  out2, out3);                       \
-}
-
-#define ILVR_B_6VECS_SB(in0_r, in1_r, in2_r,   \
-                        in3_r, in4_r, in5_r,   \
-                        in0_l, in1_l, in2_l,   \
-                        in3_l, in4_l, in5_l,   \
-                        out0, out1, out2,      \
-                        out3, out4, out5) {    \
-  ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,  \
-                  out0, out1);                 \
-  ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l,  \
-                  out2, out3);                 \
-  ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l,  \
-                  out4, out5);                 \
-}
-
-#define ILVR_B_8VECS_SB(in0_r, in1_r, in2_r, in3_r,  \
-                        in4_r, in5_r, in6_r, in7_r,  \
-                        in0_l, in1_l, in2_l, in3_l,  \
-                        in4_l, in5_l, in6_l, in7_l,  \
-                        out0, out1, out2, out3,      \
-                        out4, out5, out6, out7) {    \
-  ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,        \
-                  out0, out1);                       \
-  ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l,        \
-                  out2, out3);                       \
-  ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l,        \
-                  out4, out5);                       \
-  ILVR_B_2VECS_SB(in6_r, in7_r, in6_l, in7_l,        \
-                  out6, out7);                       \
-}
-
-#define ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,     \
-                        out0, out1) {                   \
-  out0 = __msa_ilvl_b((v16i8)(in0_l), (v16i8)(in0_r));  \
-  out1 = __msa_ilvl_b((v16i8)(in1_l), (v16i8)(in1_r));  \
-}
-
-#define ILVL_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r,  \
-                        in0_l, in1_l, in2_l, in3_l,  \
-                        out0, out1, out2, out3) {    \
-  ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,        \
-                  out0, out1);                       \
-  ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l,        \
-                  out2, out3);                       \
-}
-
-#define ILVL_B_6VECS_SB(in0_r, in1_r, in2_r,   \
-                        in3_r, in4_r, in5_r,   \
-                        in0_l, in1_l, in2_l,   \
-                        in3_l, in4_l, in5_l,   \
-                        out0, out1, out2,      \
-                        out3, out4, out5) {    \
-  ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l,  \
-                  out0, out1);                 \
-  ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l,  \
-                  out2, out3);                 \
-  ILVL_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l,  \
-                  out4, out5);                 \
-}
-
-#define ILVR_D_2VECS_SB(out0, in0_l, in0_r,                    \
-                        out1, in1_l, in1_r) {                  \
-  out0 = (v16i8)__msa_ilvr_d((v2i64)(in0_l), (v2i64)(in0_r));  \
-  out1 = (v16i8)__msa_ilvr_d((v2i64)(in1_l), (v2i64)(in1_r));  \
-}
-
-#define ILVR_D_3VECS_SB(out0, in0_l, in0_r,                    \
-                        out1, in1_l, in1_r,                    \
-                        out2, in2_l, in2_r) {                  \
-  ILVR_D_2VECS_SB(out0, in0_l, in0_r,                          \
-                  out1, in1_l, in1_r);                         \
-  out2 = (v16i8)__msa_ilvr_d((v2i64)(in2_l), (v2i64)(in2_r));  \
-}
-
-#define ILVR_D_4VECS_SB(out0, in0_l, in0_r,    \
-                        out1, in1_l, in1_r,    \
-                        out2, in2_l, in2_r,    \
-                        out3, in3_l, in3_r) {  \
-  ILVR_D_2VECS_SB(out0, in0_l, in0_r,          \
-                  out1, in1_l, in1_r);         \
-  ILVR_D_2VECS_SB(out2, in2_l, in2_r,          \
-                  out3, in3_l, in3_r);         \
-}
-
-#define DOTP_S_W_4VECS_SW(m0, c0, m1, c1,            \
-                          m2, c2, m3, c3,            \
-                          out0, out1, out2, out3) {  \
-  out0 = __msa_dotp_s_w((v8i16)(m0), (v8i16)(c0));   \
-  out1 = __msa_dotp_s_w((v8i16)(m1), (v8i16)(c1));   \
-  out2 = __msa_dotp_s_w((v8i16)(m2), (v8i16)(c2));   \
-  out3 = __msa_dotp_s_w((v8i16)(m3), (v8i16)(c3));   \
-}
-
-#define PCKEV_H_2VECS_SH(in0_l, in0_r, in1_l, in1_r,     \
-                         out0, out1) {                   \
-  out0 = __msa_pckev_h((v8i16)(in0_l), (v8i16)(in0_r));  \
-  out1 = __msa_pckev_h((v8i16)(in1_l), (v8i16)(in1_r));  \
-}
-
-#define XORI_B_2VECS_UB(val0, val1,               \
-                        out0, out1, xor_val) {    \
-  out0 = __msa_xori_b((v16u8)(val0), (xor_val));  \
-  out1 = __msa_xori_b((v16u8)(val1), (xor_val));  \
-}
-
-#define XORI_B_2VECS_SB(val0, val1,                      \
-                        out0, out1, xor_val) {           \
-  out0 = (v16i8)__msa_xori_b((v16u8)(val0), (xor_val));  \
-  out1 = (v16i8)__msa_xori_b((v16u8)(val1), (xor_val));  \
-}
-
-#define XORI_B_3VECS_SB(val0, val1, val2,                \
-                        out0, out1, out2, xor_val) {     \
-  XORI_B_2VECS_SB(val0, val1,  out0, out1, xor_val);     \
-  out2 = (v16i8)__msa_xori_b((v16u8)(val2), (xor_val));  \
-}
-
-#define XORI_B_4VECS_UB(val0, val1, val2, val3,      \
-                        out0, out1, out2, out3,      \
-                        xor_val) {                   \
-  XORI_B_2VECS_UB(val0, val1, out0, out1, xor_val);  \
-  XORI_B_2VECS_UB(val2, val3, out2, out3, xor_val);  \
-}
-
-#define XORI_B_4VECS_SB(val0, val1, val2, val3,      \
-                        out0, out1, out2, out3,      \
-                        xor_val) {                   \
-  XORI_B_2VECS_SB(val0, val1, out0, out1, xor_val);  \
-  XORI_B_2VECS_SB(val2, val3, out2, out3, xor_val);  \
+/* Description : Addition of 4 signed word elements
+                 4 signed word elements of input vector are added together and
+                 the resulting integer sum is returned
+   Arguments   : Inputs  - in       (signed word vector)
+                 Outputs - sum_m    (i32 sum)
+                 Return Type - signed word
+*/
+#define HADD_SW_S32(in) ({                        \
+  v2i64 res0_m, res1_m;                           \
+  int32_t sum_m;                                  \
+                                                  \
+  res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in);  \
+  res1_m = __msa_splati_d(res0_m, 1);             \
+  res0_m = res0_m + res1_m;                       \
+  sum_m = __msa_copy_s_w((v4i32)res0_m, 0);       \
+  sum_m;                                          \
+})
+
+/* Description : Horizontal addition of unsigned byte vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each unsigned odd byte element from 'in0' is added to
+                 even unsigned byte element from 'in0' (pairwise) and the
+                 halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1) {          \
+  out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0);  \
+  out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1);  \
 }
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
 
-#define XORI_B_7VECS_SB(val0, val1, val2, val3,      \
-                        val4, val5, val6,            \
-                        out0, out1, out2, out3,      \
-                        out4, out5, out6,            \
-                        xor_val) {                   \
-  XORI_B_4VECS_SB(val0, val1, val2, val3,            \
-                  out0, out1, out2, out3, xor_val);  \
-  XORI_B_3VECS_SB(val4, val5, val6,                  \
-                  out4, out5, out6, xor_val);        \
+#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  HADD_UB2(RTYPE, in0, in1, out0, out1);                               \
+  HADD_UB2(RTYPE, in2, in3, out2, out3);                               \
+}
+#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
+
+/* Description : Horizontal subtraction of unsigned byte vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each unsigned odd byte element from 'in0' is subtracted from
+                 even unsigned byte element from 'in0' (pairwise) and the
+                 halfword result is written to 'out0'
+*/
+#define HSUB_UB2(RTYPE, in0, in1, out0, out1) {          \
+  out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0);  \
+  out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1);  \
+}
+#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
+
+/* Description : Horizontal subtraction of signed halfword vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each signed odd halfword element from 'in0' is subtracted from
+                 even signed halfword element from 'in0' (pairwise) and the
+                 word result is written to 'out0'
+*/
+#define HSUB_UH2(RTYPE, in0, in1, out0, out1) {          \
+  out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0);  \
+  out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1);  \
+}
+#define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
+
+/* Description : Insert specified word elements from input vectors to 1
+                 destination vector
+   Arguments   : Inputs  - in0, in1, in2, in3 (4 input vectors)
+                 Outputs - out                (output vector)
+                 Return Type - as per RTYPE
+*/
+#define INSERT_W2(RTYPE, in0, in1, out) {           \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0);  \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1);  \
 }
+#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
 
-#define SRARI_H_4VECS_UH(val0, val1, val2, val3,                  \
-                         out0, out1, out2, out3,                  \
-                         shift_right_val) {                       \
-  out0 = (v8u16)__msa_srari_h((v8i16)(val0), (shift_right_val));  \
-  out1 = (v8u16)__msa_srari_h((v8i16)(val1), (shift_right_val));  \
-  out2 = (v8u16)__msa_srari_h((v8i16)(val2), (shift_right_val));  \
-  out3 = (v8u16)__msa_srari_h((v8i16)(val3), (shift_right_val));  \
+#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) {  \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0);   \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1);   \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2);   \
+  out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3);   \
+}
+#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
+#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
+
+/* Description : Insert specified double word elements from input vectors to 1
+                 destination vector
+   Arguments   : Inputs  - in0, in1      (2 input vectors)
+                 Outputs - out           (output vector)
+                 Return Type - as per RTYPE
+*/
+#define INSERT_D2(RTYPE, in0, in1, out) {           \
+  out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0);  \
+  out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1);  \
+}
+#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
+#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
+
+/* Description : Interleave even byte elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even byte elements of 'in0' and even byte
+                 elements of 'in1' are interleaved and copied to 'out0'
+                 Even byte elements of 'in2' and even byte
+                 elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0);     \
+  out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2);     \
+}
+#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
+#define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave even halfword elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even halfword elements of 'in0' and even halfword
+                 elements of 'in1' are interleaved and copied to 'out0'
+                 Even halfword elements of 'in2' and even halfword
+                 elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0);     \
+  out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2);     \
 }
+#define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
+#define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)
+#define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
+
+/* Description : Interleave even word elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even word elements of 'in0' and 'in1' are interleaved
+                 and written to 'out0'
+*/
+#define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0);     \
+  out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2);     \
+}
+#define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
+
+/* Description : Interleave even double word elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even double word elements of 'in0' and even double word
+                 elements of 'in1' are interleaved and copied to 'out0'
+                 Even double word elements of 'in2' and even double word
+                 elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0);     \
+  out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2);     \
+}
+#define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
+
+/* Description : Interleave left half of byte elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Left half of byte elements of in0 and left half of byte
+                 elements of in1 are interleaved and copied to out0.
+                 Left half of byte elements of in2 and left half of byte
+                 elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1);     \
+  out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3);     \
+}
+#define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__)
+#define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__)
+#define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__)
+#define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
+
+#define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__)
+#define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__)
+
+/* Description : Interleave left half of halfword elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Left half of halfword elements of in0 and left half of halfword
+                 elements of in1 are interleaved and copied to out0.
+                 Left half of halfword elements of in2 and left half of halfword
+                 elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1);     \
+  out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3);     \
+}
+#define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave left half of word elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Left half of word elements of in0 and left half of word
+                 elements of in1 are interleaved and copied to out0.
+                 Left half of word elements of in2 and left half of word
+                 elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1);     \
+  out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3);     \
+}
+#define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__)
+#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave right half of byte elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3
+                 Return Type - as per RTYPE
+   Details     : Right half of byte elements of in0 and right half of byte
+                 elements of in1 are interleaved and copied to out0.
+                 Right half of byte elements of in2 and right half of byte
+                 elements of in3 are interleaved and copied to out1.
+                 Similar for other pairs
+*/
+#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1);     \
+  out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3);     \
+}
+#define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__)
+#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
+#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
+#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
+
+#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
+#define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
+#define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
+#define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
+
+#define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,     \
+                in8, in9, in10, in11, in12, in13, in14, in15,      \
+                out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,           \
+          out0, out1, out2, out3);                                 \
+  ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15,     \
+          out4, out5, out6, out7);                                 \
+}
+#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
+
+/* Description : Interleave right half of halfword elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3
+                 Return Type - signed halfword
+   Details     : Right half of halfword elements of in0 and right half of
+                 halfword elements of in1 are interleaved and copied to out0.
+                 Right half of halfword elements of in2 and right half of
+                 halfword elements of in3 are interleaved and copied to out1.
+                 Similar for other pairs
+*/
+#define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1);     \
+  out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3);     \
+}
+#define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__)
+
+#define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
 
-#define SRARI_H_4VECS_SH(val0, val1, val2, val3,           \
-                         out0, out1, out2, out3,           \
-                         shift_right_val) {                \
-  out0 = __msa_srari_h((v8i16)(val0), (shift_right_val));  \
-  out1 = __msa_srari_h((v8i16)(val1), (shift_right_val));  \
-  out2 = __msa_srari_h((v8i16)(val2), (shift_right_val));  \
-  out3 = __msa_srari_h((v8i16)(val3), (shift_right_val));  \
+#define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1);     \
+  out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3);     \
 }
+#define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)
+#define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)
 
-#define SRARI_W_4VECS_SW(val0, val1, val2, val3,           \
-                         out0, out1, out2, out3,           \
-                         shift_right_val) {                \
-  out0 = __msa_srari_w((v4i32)(val0), (shift_right_val));  \
-  out1 = __msa_srari_w((v4i32)(val1), (shift_right_val));  \
-  out2 = __msa_srari_w((v4i32)(val2), (shift_right_val));  \
-  out3 = __msa_srari_w((v4i32)(val3), (shift_right_val));  \
+#define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3);               \
 }
+#define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)
+
+/* Description : Interleave right half of double word elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3
+                 Return Type - unsigned double word
+   Details     : Right half of double word elements of in0 and right half of
+                 double word elements of in1 are interleaved and copied to out0.
+                 Right half of double word elements of in2 and right half of
+                 double word elements of in3 are interleaved and copied to out1.
+*/
+#define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) {   \
+  out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1));  \
+  out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3));  \
+}
+#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
+#define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
+#define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
+
+#define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) {  \
+  ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1);                         \
+  out2 = (RTYPE)__msa_ilvr_d((v2i64)(in4), (v2i64)(in5));                 \
+}
+#define ILVR_D3_SB(...) ILVR_D3(v16i8, __VA_ARGS__)
 
-#define SRARI_SATURATE_UNSIGNED_H(input, right_shift_val, sat_val) ({  \
-  v8u16 out_m;                                                         \
+#define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
+#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
+
+/* Description : Interleave both left and right half of input vectors
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Right half of byte elements from 'in0' and 'in1' are
+                 interleaved and stored to 'out0'
+                 Left half of byte elements from 'in0' and 'in1' are
+                 interleaved and stored to 'out1'
+*/
+#define ILVRL_B2(RTYPE, in0, in1, out0, out1) {        \
+  out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1);  \
+  out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1);  \
+}
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
+#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
+#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
+
+#define ILVRL_H2(RTYPE, in0, in1, out0, out1) {        \
+  out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1);  \
+  out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1);  \
+}
+#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
+#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
+
+#define ILVRL_W2(RTYPE, in0, in1, out0, out1) {        \
+  out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1);  \
+  out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1);  \
+}
+#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
+#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
+
+/* Description : Saturate the halfword element values to the max
+                 unsigned value of (sat_val+1 bits)
+                 The element data width remains unchanged
+   Arguments   : Inputs  - in0, in1, in2, in3, sat_val
+                 Outputs - in0, in1, in2, in3 (in place)
+                 Return Type - unsigned halfword
+   Details     : Each unsigned halfword element from 'in0' is saturated to the
+                 value generated with (sat_val+1) bit range.
+                 The results are stored in place
+*/
+#define SAT_UH2(RTYPE, in0, in1, sat_val) {         \
+  in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val);  \
+  in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val);  \
+}
+#define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
+
+#define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) {  \
+  SAT_UH2(RTYPE, in0, in1, sat_val);                   \
+  SAT_UH2(RTYPE, in2, in3, sat_val)                    \
+}
+#define SAT_UH4_UH(...) SAT_UH4(v8u16, __VA_ARGS__)
+
+/* Description : Saturate the halfword element values to the max
+                 unsigned value of (sat_val+1 bits)
+                 The element data width remains unchanged
+   Arguments   : Inputs  - in0, in1, in2, in3, sat_val
+                 Outputs - in0, in1, in2, in3 (in place)
+                 Return Type - unsigned halfword
+   Details     : Each unsigned halfword element from 'in0' is saturated to the
+                 value generated with (sat_val+1) bit range
+                 The results are stored in place
+*/
+#define SAT_SH2(RTYPE, in0, in1, sat_val) {         \
+  in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val);  \
+  in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val);  \
+}
+#define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__)
+
+#define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) {  \
+  SAT_SH2(RTYPE, in0, in1, sat_val);                   \
+  SAT_SH2(RTYPE, in2, in3, sat_val);                   \
+}
+#define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Indexed halfword element values are replicated to all
+                 elements in output vector
+   Arguments   : Inputs  - in, idx0, idx1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : 'idx0' element value from 'in' vector is replicated to all
+                  elements in 'out0' vector
+                  Valid index range for halfword operation is 0-7
+*/
+#define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) {  \
+  out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0);        \
+  out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1);        \
+}
+#define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
+
+#define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3,  \
+                  out0, out1, out2, out3) {           \
+  SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1);       \
+  SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3);       \
+}
+#define SPLATI_H4_SB(...) SPLATI_H4(v16i8, __VA_ARGS__)
+#define SPLATI_H4_SH(...) SPLATI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even byte elements of vector pairs
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even byte elements of in0 are copied to the left half of
+                 out0 & even byte elements of in1 are copied to the right
+                 half of out0.
+                 Even byte elements of in2 are copied to the left half of
+                 out1 & even byte elements of in3 are copied to the right
+                 half of out1.
+*/
+#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1);     \
+  out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3);     \
+}
+#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
+#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
+#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
+
+#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                 out0, out1, out2, out3) {                       \
+  PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__)
+#define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__)
+#define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even halfword elements of vector pairs
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even halfword elements of in0 are copied to the left half of
+                 out0 & even halfword elements of in1 are copied to the right
+                 half of out0.
+                 Even halfword elements of in2 are copied to the left half of
+                 out1 & even halfword elements of in3 are copied to the right
+                 half of out1.
+*/
+#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1);     \
+  out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3);     \
+}
+#define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__)
+#define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__)
+
+#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                 out0, out1, out2, out3) {                       \
+  PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even double word elements of vector pairs
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - unsigned byte
+   Details     : Even double elements of in0 are copied to the left half of
+                 out0 & even double elements of in1 are copied to the right
+                 half of out0.
+                 Even double elements of in2 are copied to the left half of
+                 out1 & even double elements of in3 are copied to the right
+                 half of out1.
+*/
+#define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1);     \
+  out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3);     \
+}
+#define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__)
+#define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__)
+
+#define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                 out0, out1, out2, out3) {                       \
+  PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__)
+
+/* Description : Each byte element is logically xor'ed with immediate 128
+   Arguments   : Inputs  - in0, in1
+                 Outputs - in0, in1 (in-place)
+                 Return Type - as per RTYPE
+   Details     : Each unsigned byte element from input vector 'in0' is
+                 logically xor'ed with 128 and the result is in-place stored in
+                 'in0' vector
+                 Each unsigned byte element from input vector 'in1' is
+                 logically xor'ed with 128 and the result is in-place stored in
+                 'in1' vector
+                 Similar for other pairs
+*/
+#define XORI_B2_128(RTYPE, in0, in1) {         \
+  in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128);  \
+  in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128);  \
+}
+#define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__)
+#define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__)
+
+#define XORI_B3_128(RTYPE, in0, in1, in2) {    \
+  XORI_B2_128(RTYPE, in0, in1);                \
+  in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128);  \
+}
+#define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__)
+
+#define XORI_B4_128(RTYPE, in0, in1, in2, in3) {  \
+  XORI_B2_128(RTYPE, in0, in1);                   \
+  XORI_B2_128(RTYPE, in2, in3);                   \
+}
+#define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__)
+#define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__)
+
+#define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) {  \
+  XORI_B4_128(RTYPE, in0, in1, in2, in3);                        \
+  XORI_B3_128(RTYPE, in4, in5, in6);                             \
+}
+#define XORI_B7_128_SB(...) XORI_B7_128(v16i8, __VA_ARGS__)
+
+/* Description : Average of signed halfword elements -> (a + b) / 2
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3
+                 Return Type - as per RTYPE
+   Details     : Each signed halfword element from 'in0' is added to each
+                 signed halfword element of 'in1' with full precision resulting
+                 in one extra bit in the result. The result is then divided by
+                 2 and written to 'out0'
+*/
+#define AVE_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                out0, out1, out2, out3) {                       \
+  out0 = (RTYPE)__msa_ave_s_h((v8i16)in0, (v8i16)in1);          \
+  out1 = (RTYPE)__msa_ave_s_h((v8i16)in2, (v8i16)in3);          \
+  out2 = (RTYPE)__msa_ave_s_h((v8i16)in4, (v8i16)in5);          \
+  out3 = (RTYPE)__msa_ave_s_h((v8i16)in6, (v8i16)in7);          \
+}
+#define AVE_SH4_SH(...) AVE_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Addition of signed halfword elements and signed saturation
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Signed halfword elements from 'in0' are added to signed
+                 halfword elements of 'in1'. The result is then signed saturated
+                 between -32768 to +32767 (as per halfword data type)
+                 Similar for other pairs
+*/
+#define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) {  \
+  out0 = (RTYPE)__msa_adds_s_h((v8i16)in0, (v8i16)in1);    \
+  out1 = (RTYPE)__msa_adds_s_h((v8i16)in2, (v8i16)in3);    \
+}
+#define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)
+
+#define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
+                 out0, out1, out2, out3) {                       \
+  ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1);               \
+  ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3);               \
+}
+#define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Shift left all elements of vector (generic for all data types)
+   Arguments   : Inputs  - in0, in1, in2, in3, shift
+                 Outputs - in0, in1, in2, in3 (in place)
+                 Return Type - as per input vector RTYPE
+   Details     : Each element of vector 'in0' is left shifted by 'shift' and
+                 the result is in place written to 'in0'
+                 Similar for other pairs
+*/
+#define SLLI_4V(in0, in1, in2, in3, shift) {  \
+  in0 = in0 << shift;                         \
+  in1 = in1 << shift;                         \
+  in2 = in2 << shift;                         \
+  in3 = in3 << shift;                         \
+}
+
+/* Description : Arithmetic shift right all elements of vector
+                 (generic for all data types)
+   Arguments   : Inputs  - in0, in1, in2, in3, shift
+                 Outputs - in0, in1, in2, in3 (in place)
+                 Return Type - as per input vector RTYPE
+   Details     : Each element of vector 'in0' is right shifted by 'shift' and
+                 the result is in place written to 'in0'
+                 Here, 'shift' is GP variable passed in
+                 Similar for other pairs
+*/
+#define SRA_4V(in0, in1, in2, in3, shift) {  \
+  in0 = in0 >> shift;                        \
+  in1 = in1 >> shift;                        \
+  in2 = in2 >> shift;                        \
+  in3 = in3 >> shift;                        \
+}
+
+/* Description : Shift right arithmetic rounded words
+   Arguments   : Inputs  - in0, in1, shift
+                 Outputs - in place operation
+                 Return Type - as per RTYPE
+   Details     : Each element of vector 'in0' is shifted right arithmetically by
+                 the number of bits in the corresponding element in the vector
+                 'shift'. The last discarded bit is added to shifted value for
+                 rounding and the result is written in-place.
+                 'shift' is a vector.
+*/
+#define SRAR_W2(RTYPE, in0, in1, shift) {               \
+  in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift);  \
+  in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift);  \
+}
+
+#define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) {  \
+  SRAR_W2(RTYPE, in0, in1, shift)                    \
+  SRAR_W2(RTYPE, in2, in3, shift)                    \
+}
+#define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__)
+
+/* Description : Shift right arithmetic rounded (immediate)
+   Arguments   : Inputs  - in0, in1, in2, in3, shift
+                 Outputs - in0, in1, in2, in3 (in place)
+                 Return Type - as per RTYPE
+   Details     : Each element of vector 'in0' is shifted right arithmetic by
+                 value in 'shift'.
+                 The last discarded bit is added to shifted value for rounding
+                 and the result is in place written to 'in0'
+                 Similar for other pairs
+*/
+#define SRARI_H2(RTYPE, in0, in1, shift) {        \
+  in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift);  \
+  in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift);  \
+}
+#define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__)
+#define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__)
+
+#define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) {  \
+  SRARI_H2(RTYPE, in0, in1, shift);                   \
+  SRARI_H2(RTYPE, in2, in3, shift);                   \
+}
+#define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__)
+#define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Shift right arithmetic rounded (immediate)
+   Arguments   : Inputs  - in0, in1, shift
+                 Outputs - in0, in1     (in place)
+                 Return Type - as per RTYPE
+   Details     : Each element of vector 'in0' is shifted right arithmetic by
+                 value in 'shift'.
+                 The last discarded bit is added to shifted value for rounding
+                 and the result is in place written to 'in0'
+                 Similar for other pairs
+*/
+#define SRARI_W2(RTYPE, in0, in1, shift) {        \
+  in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift);  \
+  in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift);  \
+}
+#define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__)
+
+#define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) {  \
+  SRARI_W2(RTYPE, in0, in1, shift);                   \
+  SRARI_W2(RTYPE, in2, in3, shift);                   \
+}
+#define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__)
+
+/* Description : Logical shift right all elements of vector (immediate)
+   Arguments   : Inputs  - in0, in1, in2, in3, shift
+                 Outputs - out0, out1, out2, out3
+                 Return Type - as per RTYPE
+   Details     : Each element of vector 'in0' is right shifted by 'shift' and
+                 the result is written in-place. 'shift' is an immediate value.
+*/
+#define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) {  \
+  out0 = (RTYPE)__msa_srli_h((v8i16)in0, shift);                             \
+  out1 = (RTYPE)__msa_srli_h((v8i16)in1, shift);                             \
+  out2 = (RTYPE)__msa_srli_h((v8i16)in2, shift);                             \
+  out3 = (RTYPE)__msa_srli_h((v8i16)in3, shift);                             \
+}
+#define SRLI_H4_SH(...) SRLI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Multiplication of pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element from 'in0' is multiplied with elements from 'in1'
+                 and the result is written to 'out0'
+*/
+#define MUL2(in0, in1, in2, in3, out0, out1) {  \
+  out0 = in0 * in1;                             \
+  out1 = in2 * in3;                             \
+}
+#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7,  \
+             out0, out1, out2, out3) {                \
+  MUL2(in0, in1, in2, in3, out0, out1);               \
+  MUL2(in4, in5, in6, in7, out2, out3);               \
+}
+
+/* Description : Addition of 2 pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element from 2 pairs vectors is added and 2 results are
+                 produced
+*/
+#define ADD2(in0, in1, in2, in3, out0, out1) {  \
+  out0 = in0 + in1;                             \
+  out1 = in2 + in3;                             \
+}
+#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7,  \
+             out0, out1, out2, out3) {                \
+  ADD2(in0, in1, in2, in3, out0, out1);               \
+  ADD2(in4, in5, in6, in7, out2, out3);               \
+}
+
+/* Description : Subtraction of 2 pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element from 2 pairs vectors is subtracted and 2 results
+                 are produced
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1) {  \
+  out0 = in0 - in1;                             \
+  out1 = in2 - in3;                             \
+}
+#define SUB4(in0, in1, in2, in3, in4, in5, in6, in7,  \
+             out0, out1, out2, out3) {                \
+  out0 = in0 - in1;                                   \
+  out1 = in2 - in3;                                   \
+  out2 = in4 - in5;                                   \
+  out3 = in6 - in7;                                   \
+}
+
+/* Description : Sign extend halfword elements from right half of the vector
+   Arguments   : Inputs  - in    (input halfword vector)
+                 Outputs - out   (sign extended word vectors)
+                 Return Type - signed word
+   Details     : Sign bit of halfword elements from input vector 'in' is
+                 extracted and interleaved with same vector 'in0' to generate
+                 4 word elements keeping sign intact
+*/
+#define UNPCK_R_SH_SW(in, out) {                 \
+  v8i16 sign_m;                                  \
+                                                 \
+  sign_m = __msa_clti_s_h((v8i16)in, 0);         \
+  out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in);  \
+}
+
+/* Description : Zero extend unsigned byte elements to halfword elements
+   Arguments   : Inputs  - in           (1 input unsigned byte vector)
+                 Outputs - out0, out1   (unsigned 2 halfword vectors)
+                 Return Type - signed halfword
+   Details     : Zero extended right half of vector is returned in 'out0'
+                 Zero extended left half of vector is returned in 'out1'
+*/
+#define UNPCK_UB_SH(in, out0, out1) {   \
+  v16i8 zero_m = { 0 };                 \
+                                        \
+  ILVRL_B2_SH(zero_m, in, out0, out1);  \
+}
+
+/* Description : Sign extend halfword elements from input vector and return
+                 result in pair of vectors
+   Arguments   : Inputs  - in           (1 input halfword vector)
+                 Outputs - out0, out1   (sign extended 2 word vectors)
+                 Return Type - signed word
+   Details     : Sign bit of halfword elements from input vector 'in' is
+                 extracted and interleaved right with same vector 'in0' to
+                 generate 4 signed word elements in 'out0'
+                 Then interleaved left with same vector 'in0' to
+                 generate 4 signed word elements in 'out1'
+*/
+#define UNPCK_SH_SW(in, out0, out1) {    \
+  v8i16 tmp_m;                           \
+                                         \
+  tmp_m = __msa_clti_s_h((v8i16)in, 0);  \
+  ILVRL_H2_SW(tmp_m, in, out0, out1);    \
+}
+
+/* Description : Butterfly of 4 input vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1, out2, out3
+   Details     : Butterfly operation
+*/
+#define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  out0 = in0 + in3;                                                \
+  out1 = in1 + in2;                                                \
+                                                                   \
+  out2 = in1 - in2;                                                \
+  out3 = in0 - in3;                                                \
+}
+
+/* Description : Butterfly of 8 input vectors
+   Arguments   : Inputs  - in0 ...  in7
+                 Outputs - out0 .. out7
+   Details     : Butterfly operation
+*/
+#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                    out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  out0 = in0 + in7;                                                    \
+  out1 = in1 + in6;                                                    \
+  out2 = in2 + in5;                                                    \
+  out3 = in3 + in4;                                                    \
                                                                        \
-  out_m = (v8u16)__msa_srari_h((v8i16)(input), (right_shift_val));     \
-  out_m = __msa_sat_u_h(out_m, (sat_val));                             \
-  out_m;                                                               \
-})
+  out4 = in3 - in4;                                                    \
+  out5 = in2 - in5;                                                    \
+  out6 = in1 - in6;                                                    \
+  out7 = in0 - in7;                                                    \
+}
 
-#define SRARI_SATURATE_SIGNED_H(input, right_shift_val, sat_val) ({  \
-  v8i16 out_m;                                                       \
-                                                                     \
-  out_m = __msa_srari_h((v8i16)(input), (right_shift_val));          \
-  out_m = __msa_sat_s_h(out_m, (sat_val));                           \
-  out_m;                                                             \
-})
+/* Description : Butterfly of 16 input vectors
+   Arguments   : Inputs  - in0 ...  in15
+                 Outputs - out0 .. out15
+   Details     : Butterfly operation
+*/
+#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,                  \
+                     in8, in9,  in10, in11, in12, in13, in14, in15,           \
+                     out0, out1, out2, out3, out4, out5, out6, out7,          \
+                     out8, out9, out10, out11, out12, out13, out14, out15) {  \
+  out0 = in0 + in15;                                                          \
+  out1 = in1 + in14;                                                          \
+  out2 = in2 + in13;                                                          \
+  out3 = in3 + in12;                                                          \
+  out4 = in4 + in11;                                                          \
+  out5 = in5 + in10;                                                          \
+  out6 = in6 + in9;                                                           \
+  out7 = in7 + in8;                                                           \
+                                                                              \
+  out8 = in7 - in8;                                                           \
+  out9 = in6 - in9;                                                           \
+  out10 = in5 - in10;                                                         \
+  out11 = in4 - in11;                                                         \
+  out12 = in3 - in12;                                                         \
+  out13 = in2 - in13;                                                         \
+  out14 = in1 - in14;                                                         \
+  out15 = in0 - in15;                                                         \
+}
 
-#define PCKEV_2B_XORI128_STORE_4_BYTES_4(in1, in2,        \
-                                         pdst, stride) {  \
-  uint32_t out0_m, out1_m, out2_m, out3_m;                \
-  v16i8 tmp0_m;                                           \
-  uint8_t *dst_m = (uint8_t *)(pdst);                     \
-                                                          \
-  tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1));     \
-  tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128);       \
-                                                          \
-  out0_m = __msa_copy_u_w((v4i32)tmp0_m, 0);              \
-  out1_m = __msa_copy_u_w((v4i32)tmp0_m, 1);              \
-  out2_m = __msa_copy_u_w((v4i32)tmp0_m, 2);              \
-  out3_m = __msa_copy_u_w((v4i32)tmp0_m, 3);              \
-                                                          \
-  STORE_WORD(dst_m, out0_m);                              \
-  dst_m += stride;                                        \
-  STORE_WORD(dst_m, out1_m);                              \
-  dst_m += stride;                                        \
-  STORE_WORD(dst_m, out2_m);                              \
-  dst_m += stride;                                        \
-  STORE_WORD(dst_m, out3_m);                              \
-}
-
-#define PCKEV_B_4_XORI128_STORE_8_BYTES_4(in1, in2,        \
-                                          in3, in4,        \
-                                          pdst, stride) {  \
-  uint64_t out0_m, out1_m, out2_m, out3_m;                 \
-  v16i8 tmp0_m, tmp1_m;                                    \
-  uint8_t *dst_m = (uint8_t *)(pdst);                      \
-                                                           \
-  tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1));      \
-  tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3));      \
-                                                           \
-  tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128);        \
-  tmp1_m = (v16i8)__msa_xori_b((v16u8)tmp1_m, 128);        \
-                                                           \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);               \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);               \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);               \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);               \
-                                                           \
-  STORE_DWORD(dst_m, out0_m);                              \
-  dst_m += stride;                                         \
-  STORE_DWORD(dst_m, out1_m);                              \
-  dst_m += stride;                                         \
-  STORE_DWORD(dst_m, out2_m);                              \
-  dst_m += stride;                                         \
-  STORE_DWORD(dst_m, out3_m);                              \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_XORI128_STORE_VEC(in1, in2, pdest) {  \
-  v16i8 tmp_m;                                        \
-                                                      \
-  tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2));  \
-  tmp_m = (v16i8)__msa_xori_b((v16u8)tmp_m, 128);     \
-  STORE_SB(tmp_m, (pdest));                           \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_4_XORI128_AVG_STORE_8_BYTES_4(in1, dst0,       \
-                                              in2, dst1,       \
-                                              in3, dst2,       \
-                                              in4, dst3,       \
-                                              pdst, stride) {  \
-  uint64_t out0_m, out1_m, out2_m, out3_m;                     \
-  v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                        \
-  uint8_t *dst_m = (uint8_t *)(pdst);                          \
-                                                               \
-  tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1));   \
-  tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3));   \
-                                                               \
-  tmp2_m = (v16u8)__msa_ilvr_d((v2i64)(dst1), (v2i64)(dst0));  \
-  tmp3_m = (v16u8)__msa_ilvr_d((v2i64)(dst3), (v2i64)(dst2));  \
-                                                               \
-  tmp0_m = __msa_xori_b(tmp0_m, 128);                          \
-  tmp1_m = __msa_xori_b(tmp1_m, 128);                          \
-                                                               \
-  tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m);                     \
-  tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m);                     \
-                                                               \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);                   \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);                   \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);                   \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);                   \
-                                                               \
-  STORE_DWORD(dst_m, out0_m);                                  \
-  dst_m += stride;                                             \
-  STORE_DWORD(dst_m, out1_m);                                  \
-  dst_m += stride;                                             \
-  STORE_DWORD(dst_m, out2_m);                                  \
-  dst_m += stride;                                             \
-  STORE_DWORD(dst_m, out3_m);                                  \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_XORI128_AVG_STORE_VEC(in1, in2, dst, pdest) {  \
-  v16u8 tmp_m;                                                 \
-                                                               \
-  tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2));    \
-  tmp_m = __msa_xori_b(tmp_m, 128);                            \
-  tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst));                 \
-  STORE_UB(tmp_m, (pdest));                                    \
-}
-
-#define PCKEV_B_STORE_8_BYTES_4(in1, in2, in3, in4,    \
-                                pdst, stride) {        \
-  uint64_t out0_m, out1_m, out2_m, out3_m;             \
-  v16i8 tmp0_m, tmp1_m;                                \
-  uint8_t *dst_m = (uint8_t *)(pdst);                  \
-                                                       \
-  tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1));  \
-  tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3));  \
-                                                       \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);           \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);           \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);           \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);           \
-                                                       \
-  STORE_DWORD(dst_m, out0_m);                          \
-  dst_m += stride;                                     \
-  STORE_DWORD(dst_m, out1_m);                          \
-  dst_m += stride;                                     \
-  STORE_DWORD(dst_m, out2_m);                          \
-  dst_m += stride;                                     \
-  STORE_DWORD(dst_m, out3_m);                          \
-}
-
-/* Only for unsigned vecs */
-#define PCKEV_B_STORE_VEC(in1, in2, pdest) {          \
-  v16i8 tmp_m;                                        \
-                                                      \
-  tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2));  \
-  STORE_SB(tmp_m, (pdest));                           \
+/* Description : Transposes input 8x8 byte block
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                           (input 8x8 byte block)
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                           (output 8x8 byte block)
+                 Return Type - unsigned byte
+   Details     :
+*/
+#define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,     \
+                        out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                    \
+  v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                    \
+                                                                           \
+  ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5,                       \
+             tmp0_m, tmp1_m, tmp2_m, tmp3_m);                              \
+  ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m);                             \
+  ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m);                             \
+  ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2);                             \
+  ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6);                             \
+  SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8);                             \
+  SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8);                             \
+}
+#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
+
+/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7,
+                           in8, in9, in10, in11, in12, in13, in14, in15
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                 Return Type - unsigned byte
+   Details     :
+*/
+#define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                            in8, in9, in10, in11, in12, in13, in14, in15,      \
+                            out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                        \
+  v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                        \
+                                                                               \
+  ILVEV_D2_UB(in0, in8, in1, in9, out7, out6);                                 \
+  ILVEV_D2_UB(in2, in10, in3, in11, out5, out4);                               \
+  ILVEV_D2_UB(in4, in12, in5, in13, out3, out2);                               \
+  ILVEV_D2_UB(in6, in14, in7, in15, out1, out0);                               \
+                                                                               \
+  tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7);                     \
+  tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7);                     \
+  tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5);                     \
+  tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5);                     \
+  out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3);                       \
+  tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3);                     \
+  out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1);                       \
+  tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1);                     \
+                                                                               \
+  ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m);                     \
+  out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+  out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+                                                                               \
+  tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m);                 \
+  tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5);                     \
+  out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+  out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+                                                                               \
+  ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m);                 \
+  out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+  out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+                                                                               \
+  tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m);                 \
+  tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m);                 \
+  tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m);                 \
+  tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m);                 \
+  out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
+  out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                   \
 }
 
-#define PCKEV_B_AVG_STORE_8_BYTES_4(in1, dst0, in2, dst1,       \
-                                    in3, dst2, in4, dst3,       \
-                                    pdst, stride) {             \
-  uint64_t out0_m, out1_m, out2_m, out3_m;                      \
-  v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                         \
-  uint8_t *dst_m = (uint8_t *)(pdst);                           \
-                                                                \
-  tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1));    \
-  tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3));    \
-                                                                \
-  tmp2_m = (v16u8)__msa_pckev_d((v2i64)(dst1), (v2i64)(dst0));  \
-  tmp3_m = (v16u8)__msa_pckev_d((v2i64)(dst3), (v2i64)(dst2));  \
-                                                                \
-  tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m);                      \
-  tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m);                      \
+/* Description : Transposes 4x4 block with half word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1, out2, out3
+                 Return Type - signed halfword
+   Details     :
+*/
+#define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  v8i16 s0_m, s1_m;                                                       \
+                                                                          \
+  ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m);                             \
+  ILVRL_W2_SH(s1_m, s0_m, out0, out2);                                    \
+  out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0);                   \
+  out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2);                   \
+}
+
+/* Description : Transposes 4x8 block with half word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                 Return Type - signed halfword
+   Details     :
+*/
+#define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                           out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                       \
+  v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n;                                       \
+  v8i16 zero_m = { 0 };                                                       \
+                                                                              \
+  ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6,                          \
+             tmp0_n, tmp1_n, tmp2_n, tmp3_n);                                 \
+  ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m);                                \
+  ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m);                                \
+                                                                              \
+  out0 = (v8i16)__msa_ilvr_d((v2i64)tmp1_m, (v2i64)tmp0_m);                   \
+  out1 = (v8i16)__msa_ilvl_d((v2i64)tmp1_m, (v2i64)tmp0_m);                   \
+  out2 = (v8i16)__msa_ilvr_d((v2i64)tmp3_m, (v2i64)tmp2_m);                   \
+  out3 = (v8i16)__msa_ilvl_d((v2i64)tmp3_m, (v2i64)tmp2_m);                   \
+                                                                              \
+  out4 = zero_m;                                                              \
+  out5 = zero_m;                                                              \
+  out6 = zero_m;                                                              \
+  out7 = zero_m;                                                              \
+}
+
+/* Description : Transposes 8x4 block with half word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                 Return Type - signed halfword
+   Details     :
+*/
+#define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
+                                                                          \
+  ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m);                         \
+  ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m);                         \
+  ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2);                 \
+  ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3);                 \
+}
+
+/* Description : Transposes 8x8 block with half word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                 Return Type - signed halfword
+   Details     :
+*/
+#define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,     \
+                       out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v8i16 s0_m, s1_m;                                                       \
+  v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
+  v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                   \
+                                                                          \
+  ILVR_H2_SH(in6, in4, in7, in5, s0_m, s1_m);                             \
+  ILVRL_H2_SH(s1_m, s0_m, tmp0_m, tmp1_m);                                \
+  ILVL_H2_SH(in6, in4, in7, in5, s0_m, s1_m);                             \
+  ILVRL_H2_SH(s1_m, s0_m, tmp2_m, tmp3_m);                                \
+  ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m);                             \
+  ILVRL_H2_SH(s1_m, s0_m, tmp4_m, tmp5_m);                                \
+  ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m);                             \
+  ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m);                                \
+  PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m,         \
+           tmp3_m, tmp7_m, out0, out2, out4, out6);                       \
+  out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m);              \
+  out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m);              \
+  out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m);              \
+  out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m);              \
+}
+#define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)
+
+/* Description : Transposes 4x4 block with word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1, out2, out3
+                 Return Type - signed word
+   Details     :
+*/
+#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  v4i32 s0_m, s1_m, s2_m, s3_m;                                           \
+                                                                          \
+  ILVRL_W2_SW(in1, in0, s0_m, s1_m);                                      \
+  ILVRL_W2_SW(in3, in2, s2_m, s3_m);                                      \
+                                                                          \
+  out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m);                   \
+  out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m);                   \
+  out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m);                   \
+  out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m);                   \
+}
+
+/* Description : Add block 4x4
+   Arguments   : Inputs  - in0, in1, in2, in3, pdst, stride
+                 Outputs -
+                 Return Type - unsigned bytes
+   Details     : Least significant 4 bytes from each input vector are added to
+                 the destination bytes, clipped between 0-255 and then stored.
+*/
+#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) {     \
+  uint32_t src0_m, src1_m, src2_m, src3_m;                      \
+  uint32_t out0_m, out1_m, out2_m, out3_m;                      \
+  v8i16 inp0_m, inp1_m, res0_m, res1_m;                         \
+  v16i8 dst0_m = { 0 };                                         \
+  v16i8 dst1_m = { 0 };                                         \
+  v16i8 zero_m = { 0 };                                         \
                                                                 \
-  out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0);                    \
-  out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1);                    \
-  out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0);                    \
-  out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1);                    \
+  ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m)                \
+  LW4(pdst, stride,  src0_m, src1_m, src2_m, src3_m);           \
+  INSERT_W2_SB(src0_m, src1_m, dst0_m);                         \
+  INSERT_W2_SB(src2_m, src3_m, dst1_m);                         \
+  ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m);   \
+  ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m);         \
+  CLIP_SH2_0_255(res0_m, res1_m);                               \
+  PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m);  \
                                                                 \
-  STORE_DWORD(dst_m, out0_m);                                   \
-  dst_m += stride;                                              \
-  STORE_DWORD(dst_m, out1_m);                                   \
-  dst_m += stride;                                              \
-  STORE_DWORD(dst_m, out2_m);                                   \
-  dst_m += stride;                                              \
-  STORE_DWORD(dst_m, out3_m);                                   \
-}
-
-#define PCKEV_B_AVG_STORE_VEC(in1, in2, dst, pdest) {        \
-  v16u8 tmp_m;                                               \
-                                                             \
-  tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2));  \
-  tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst));               \
-  STORE_UB(tmp_m, (pdest));                                  \
-}
-
-/* Generic for Vector types and GP operations */
-#define BUTTERFLY_4(in0, in1, in2, in3,        \
-                    out0, out1, out2, out3) {  \
-  out0 = (in0) + (in3);                        \
-  out1 = (in1) + (in2);                        \
-                                               \
-  out2 = (in1) - (in2);                        \
-  out3 = (in0) - (in3);                        \
-}
-
-/* Generic for Vector types and GP operations */
-#define BUTTERFLY_8(in0, in1, in2, in3,        \
-                    in4, in5, in6, in7,        \
-                    out0, out1, out2, out3,    \
-                    out4, out5, out6, out7) {  \
-  out0 = (in0) + (in7);                        \
-  out1 = (in1) + (in6);                        \
-  out2 = (in2) + (in5);                        \
-  out3 = (in3) + (in4);                        \
-                                               \
-  out4 = (in3) - (in4);                        \
-  out5 = (in2) - (in5);                        \
-  out6 = (in1) - (in6);                        \
-  out7 = (in0) - (in7);                        \
-}
-#endif  /* HAVE_MSA */
+  out0_m = __msa_copy_u_w((v4i32)dst0_m, 0);                    \
+  out1_m = __msa_copy_u_w((v4i32)dst0_m, 1);                    \
+  out2_m = __msa_copy_u_w((v4i32)dst1_m, 0);                    \
+  out3_m = __msa_copy_u_w((v4i32)dst1_m, 1);                    \
+  SW4(out0_m, out1_m, out2_m, out3_m, pdst, stride);            \
+}
+
+/* Description : Pack even elements of input vectors & xor with 128
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out_m
+                 Return Type - unsigned byte
+   Details     : Signed byte even elements from 'in0' and 'in1' are packed
+                 together in one vector and the resulting vector is xor'ed with
+                 128 to shift the range from signed to unsigned byte
+*/
+#define PCKEV_XORI128_UB(in0, in1) ({                    \
+  v16u8 out_m;                                           \
+                                                         \
+  out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0);  \
+  out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128);        \
+  out_m;                                                 \
+})
+
+/* Description : Converts inputs to unsigned bytes, interleave, average & store
+                 as 8x4 unsigned byte block
+   Arguments   : Inputs  - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                           pdst, stride
+*/
+#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3,                      \
+                                dst0, dst1, dst2, dst3, pdst, stride) {  \
+  v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                  \
+  uint8_t *pdst_m = (uint8_t *)(pdst);                                   \
+                                                                         \
+  tmp0_m = PCKEV_XORI128_UB(in0, in1);                                   \
+  tmp1_m = PCKEV_XORI128_UB(in2, in3);                                   \
+  ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                    \
+  AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);           \
+  ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                              \
+}
+
+/* Description : Pack even byte elements and store byte vector in destination
+                 memory
+   Arguments   : Inputs  - in0, in1, pdst
+*/
+#define PCKEV_ST_SB(in0, in1, pdst) {             \
+  v16i8 tmp_m;                                    \
+                                                  \
+  tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0);  \
+  ST_SB(tmp_m, (pdst));                           \
+}
+
+/* Description : Horizontal 2 tap filter kernel code
+   Arguments   : Inputs  - in0, in1, mask, coeff, shift
+*/
+#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) ({    \
+  v16i8 tmp0_m;                                                \
+  v8u16 tmp1_m;                                                \
+                                                               \
+  tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0);  \
+  tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff);        \
+  tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift);         \
+  tmp1_m = __msa_sat_u_h(tmp1_m, shift);                       \
+                                                               \
+  tmp1_m;                                                      \
+})
 #endif  /* VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
new file mode 100644
index 00000000000..64cb9a81835
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
@@ -0,0 +1,137 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
+                                    uint8_t *dst_ptr, int32_t dst_stride,
+                                    int32_t src_weight) {
+  int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+  int32_t row;
+  uint64_t src0_d, src1_d, dst0_d, dst1_d;
+  v16i8 src0 = { 0 };
+  v16i8 src1 = { 0 };
+  v16i8 dst0 = { 0 };
+  v16i8 dst1 = { 0 };
+  v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+  src_wt = __msa_fill_h(src_weight);
+  dst_wt = __msa_fill_h(dst_weight);
+
+  for (row = 2; row--;) {
+    LD2(src_ptr, src_stride, src0_d, src1_d);
+    src_ptr += (2 * src_stride);
+    LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
+    INSERT_D2_SB(src0_d, src1_d, src0);
+    INSERT_D2_SB(dst0_d, dst1_d, dst0);
+
+    LD2(src_ptr, src_stride, src0_d, src1_d);
+    src_ptr += (2 * src_stride);
+    LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
+    INSERT_D2_SB(src0_d, src1_d, src1);
+    INSERT_D2_SB(dst0_d, dst1_d, dst1);
+
+    UNPCK_UB_SH(src0, src_r, src_l);
+    UNPCK_UB_SH(dst0, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+    ST8x2_UB(dst0, dst_ptr, dst_stride);
+    dst_ptr += (2 * dst_stride);
+
+    UNPCK_UB_SH(src1, src_r, src_l);
+    UNPCK_UB_SH(dst1, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+    ST8x2_UB(dst1, dst_ptr, dst_stride);
+    dst_ptr += (2 * dst_stride);
+  }
+}
+
+static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
+                                      int32_t src_stride,
+                                      uint8_t *dst_ptr,
+                                      int32_t dst_stride,
+                                      int32_t src_weight) {
+  int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+  int32_t row;
+  v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+  v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+  src_wt = __msa_fill_h(src_weight);
+  dst_wt = __msa_fill_h(dst_weight);
+
+  for (row = 4; row--;) {
+    LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
+    src_ptr += (4 * src_stride);
+    LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
+
+    UNPCK_UB_SH(src0, src_r, src_l);
+    UNPCK_UB_SH(dst0, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src1, src_r, src_l);
+    UNPCK_UB_SH(dst1, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src2, src_r, src_l);
+    UNPCK_UB_SH(dst2, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+
+    UNPCK_UB_SH(src3, src_r, src_l);
+    UNPCK_UB_SH(dst3, dst_r, dst_l);
+    res_h_r = (src_r * src_wt);
+    res_h_r += (dst_r * dst_wt);
+    res_h_l = (src_l * src_wt);
+    res_h_l += (dst_l * dst_wt);
+    SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+    PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+    dst_ptr += dst_stride;
+  }
+}
+
+void vp9_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride,
+                                 int src_weight) {
+  filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
+}
+
+void vp9_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
+                                   uint8_t *dst, int dst_stride,
+                                   int src_weight) {
+  filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
index 222b88eff5d..8eda491de93 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
@@ -11,6 +11,7 @@
 #include "./vpx_config.h"
 #include "vpx_mem/vpx_mem.h"
 
+#include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_entropymv.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
index 018a9c2b975..64d379cab34 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
@@ -18,74 +18,28 @@
 #include "vpx_scale/yv12config.h"
 
 #include "vp9/common/vp9_common_data.h"
-#include "vp9/common/vp9_filter.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_mv.h"
 #include "vp9/common/vp9_scale.h"
+#include "vp9/common/vp9_seg_common.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#define BLOCK_SIZE_GROUPS 4
-#define SKIP_CONTEXTS 3
-#define INTER_MODE_CONTEXTS 7
-
-/* Segment Feature Masks */
-#define MAX_MV_REF_CANDIDATES 2
-
-#define INTRA_INTER_CONTEXTS 4
-#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 5
-
-typedef enum {
-  PLANE_TYPE_Y  = 0,
-  PLANE_TYPE_UV = 1,
-  PLANE_TYPES
-} PLANE_TYPE;
-
 #define MAX_MB_PLANE 3
 
-typedef char ENTROPY_CONTEXT;
-
-static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
-                                           ENTROPY_CONTEXT b) {
-  return (a != 0) + (b != 0);
-}
-
 typedef enum {
   KEY_FRAME = 0,
   INTER_FRAME = 1,
   FRAME_TYPES,
 } FRAME_TYPE;
 
-typedef enum {
-  DC_PRED,         // Average of above and left pixels
-  V_PRED,          // Vertical
-  H_PRED,          // Horizontal
-  D45_PRED,        // Directional 45  deg = round(arctan(1/1) * 180/pi)
-  D135_PRED,       // Directional 135 deg = 180 - 45
-  D117_PRED,       // Directional 117 deg = 180 - 63
-  D153_PRED,       // Directional 153 deg = 180 - 27
-  D207_PRED,       // Directional 207 deg = 180 + 27
-  D63_PRED,        // Directional 63  deg = round(arctan(2/1) * 180/pi)
-  TM_PRED,         // True-motion
-  NEARESTMV,
-  NEARMV,
-  ZEROMV,
-  NEWMV,
-  MB_MODE_COUNT
-} PREDICTION_MODE;
-
 static INLINE int is_inter_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWMV;
 }
 
-#define INTRA_MODES (TM_PRED + 1)
-
-#define INTER_MODES (1 + NEWMV - NEARESTMV)
-
-#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
-
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
    is a single probability table. */
@@ -96,16 +50,16 @@ typedef struct {
 } b_mode_info;
 
 // Note that the rate-distortion optimization loop, bit-stream writer, and
-// decoder implementation modules critically rely on the enum entry values
+// decoder implementation modules critically rely on the defined entry values
 // specified herein. They should be refactored concurrently.
-typedef enum {
-  NONE = -1,
-  INTRA_FRAME = 0,
-  LAST_FRAME = 1,
-  GOLDEN_FRAME = 2,
-  ALTREF_FRAME = 3,
-  MAX_REF_FRAMES = 4
-} MV_REFERENCE_FRAME;
+
+#define NONE           -1
+#define INTRA_FRAME     0
+#define LAST_FRAME      1
+#define GOLDEN_FRAME    2
+#define ALTREF_FRAME    3
+#define MAX_REF_FRAMES  4
+typedef int8_t MV_REFERENCE_FRAME;
 
 // This structure now relates to 8x8 block regions.
 typedef struct {
@@ -121,12 +75,17 @@ typedef struct {
   PREDICTION_MODE uv_mode;
 
   // Only for INTER blocks
+  INTERP_FILTER interp_filter;
   MV_REFERENCE_FRAME ref_frame[2];
+
+  // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
   int_mv mv[2];
+
+#if CONFIG_VP9_ENCODER
+  // TODO(slavarnway): Move to encoder
   int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
   uint8_t mode_context[MAX_REF_FRAMES];
-  INTERP_FILTER interp_filter;
-
+#endif
 } MB_MODE_INFO;
 
 typedef struct MODE_INFO {
@@ -170,9 +129,12 @@ struct macroblockd_plane {
   int subsampling_y;
   struct buf_2d dst;
   struct buf_2d pre[2];
-  const int16_t *dequant;
   ENTROPY_CONTEXT *above_context;
   ENTROPY_CONTEXT *left_context;
+  int16_t seg_dequant[MAX_SEGMENTS][2];
+
+  // encoder
+  const int16_t *dequant;
 };
 
 #define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
@@ -187,7 +149,7 @@ typedef struct RefBuffer {
 
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
-
+  FRAME_COUNTS *counts;
   int mi_stride;
 
   MODE_INFO **mi;
@@ -199,12 +161,17 @@ typedef struct macroblockd {
   int up_available;
   int left_available;
 
+  const vp9_prob (*partition_probs)[PARTITION_TYPES - 1];
+
   /* Distance of MB away from frame edges */
   int mb_to_left_edge;
   int mb_to_right_edge;
   int mb_to_top_edge;
   int mb_to_bottom_edge;
 
+  FRAME_CONTEXT *fc;
+  int frame_parallel_decoding_mode;
+
   /* pointers to reference frames */
   RefBuffer *block_refs[2];
 
@@ -217,13 +184,9 @@ typedef struct macroblockd {
   PARTITION_CONTEXT *above_seg_context;
   PARTITION_CONTEXT left_seg_context[8];
 
-  /* mc buffer */
-  DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
-
 #if CONFIG_VP9_HIGHBITDEPTH
   /* Bit depth: 8, 10, 12 */
   int bd;
-  DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
 #endif
 
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
@@ -285,6 +248,27 @@ static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
   return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
 }
 
+static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+    memset(pd->above_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
+    memset(pd->left_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
+  }
+}
+
+static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
+                                               const MODE_INFO *above_mi,
+                                               const MODE_INFO *left_mi,
+                                               int block) {
+  const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
+  const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
+  return vp9_kf_y_mode_prob[above][left];
+}
+
 typedef void (*foreach_transformed_block_visitor)(int plane, int block,
                                                   BLOCK_SIZE plane_bsize,
                                                   TX_SIZE tx_size,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
index d06b8e0405e..9c2d7791e75 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
@@ -27,12 +27,6 @@ extern "C" {
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 
-#define ROUND_POWER_OF_TWO(value, n) \
-    (((value) + (1 << ((n) - 1))) >> (n))
-
-#define ALIGN_POWER_OF_TWO(value, n) \
-    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-
 // Only need this for fixed-size arrays, for structs just assign.
 #define vp9_copy(dest, src) {            \
     assert(sizeof(dest) == sizeof(src)); \
@@ -83,9 +77,6 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
 typedef int64_t tran_high_t;
 typedef int32_t tran_low_t;
 
-#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
-#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1 ))
-
 #else
 
 // Note:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
index a2584e8da5b..ad6c04bcc46 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
@@ -133,12 +133,6 @@ const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = {
   0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
 };
 
-const vp9_tree_index vp9_coefmodel_tree[TREE_SIZE(UNCONSTRAINED_NODES + 1)] = {
-  -EOB_MODEL_TOKEN, 2,
-  -ZERO_TOKEN, 4,
-  -ONE_TOKEN, -TWO_TOKEN,
-};
-
 // Model obtained from a 2-sided zero-centerd distribuition derived
 // from a Pareto distribution. The cdf of the distribution is:
 // cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
index 5a9007b5417..2fc97c3f9ed 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
@@ -14,8 +14,8 @@
 #include "vpx/vpx_integer.h"
 
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
 #include "vp9/common/vp9_prob.h"
-#include "vp9/common/vp9_scan.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -74,7 +74,6 @@ DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #define EOB_MODEL_TOKEN 3
-extern const vp9_tree_index vp9_coefmodel_tree[];
 
 typedef struct {
   const vp9_tree_index *tree;
@@ -137,18 +136,6 @@ struct VP9Common;
 void vp9_default_coef_probs(struct VP9Common *cm);
 void vp9_adapt_coef_probs(struct VP9Common *cm);
 
-static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
-  int i;
-  for (i = 0; i < MAX_MB_PLANE; i++) {
-    struct macroblockd_plane *const pd = &xd->plane[i];
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-    memset(pd->above_context, 0,
-           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
-    memset(pd->left_context, 0,
-           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
-  }
-}
-
 // This is the index in the scan order beyond which all coefficients for
 // 8x8 transform and above are in the top band.
 // This macro is currently unused but may be used by certain implementations
@@ -185,6 +172,13 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
 
 void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
 
+typedef char ENTROPY_CONTEXT;
+
+static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
+                                           ENTROPY_CONTEXT b) {
+  return (a != 0) + (b != 0);
+}
+
 static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
                                       const ENTROPY_CONTEXT *l) {
   ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
@@ -214,18 +208,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
   return combine_entropy_contexts(above_ec, left_ec);
 }
 
-static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
-                                         PLANE_TYPE type, int block_idx) {
-  const MODE_INFO *const mi = xd->mi[0];
-
-  if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
-    return &vp9_default_scan_orders[tx_size];
-  } else {
-    const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
-    return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
-  }
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
index 424451fee39..22d431bb22d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
@@ -314,7 +314,7 @@ static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
   { 149, 144, },
 };
 
-void vp9_init_mode_probs(FRAME_CONTEXT *fc) {
+static void init_mode_probs(FRAME_CONTEXT *fc) {
   vp9_copy(fc->uv_mode_prob, default_if_uv_probs);
   vp9_copy(fc->y_mode_prob, default_if_y_probs);
   vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
@@ -444,7 +444,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
   lf->last_sharpness_level = -1;
 
   vp9_default_coef_probs(cm);
-  vp9_init_mode_probs(cm->fc);
+  init_mode_probs(cm->fc);
   vp9_init_mv_probs(cm);
   cm->fc->initialized = 1;
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
index f4e20e1af8b..8c9e6a7319d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
@@ -11,7 +11,7 @@
 #ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
 #define VP9_COMMON_VP9_ENTROPYMODE_H_
 
-#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymv.h"
 
@@ -19,8 +19,12 @@
 extern "C" {
 #endif
 
+#define BLOCK_SIZE_GROUPS 4
+
 #define TX_SIZE_CONTEXTS 2
 
+#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
+
 struct VP9Common;
 
 struct tx_probs {
@@ -86,8 +90,6 @@ extern const vp9_tree_index vp9_switchable_interp_tree
 
 void vp9_setup_past_independence(struct VP9Common *cm);
 
-void vp9_init_mode_probs(FRAME_CONTEXT *fc);
-
 void vp9_adapt_mode_probs(struct VP9Common *cm);
 
 void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
@@ -97,15 +99,6 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
 void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
                                     unsigned int (*ct_8x8p)[2]);
 
-static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
-                                               const MODE_INFO *above_mi,
-                                               const MODE_INFO *left_mi,
-                                               int block) {
-  const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
-  const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
-  return vp9_kf_y_mode_prob[above][left];
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
index 7938fc10a11..d089f23f970 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
@@ -12,6 +12,7 @@
 #define VP9_COMMON_VP9_ENUMS_H_
 
 #include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -40,23 +41,22 @@ typedef enum BITSTREAM_PROFILE {
   MAX_PROFILES
 } BITSTREAM_PROFILE;
 
-typedef enum BLOCK_SIZE {
-  BLOCK_4X4,
-  BLOCK_4X8,
-  BLOCK_8X4,
-  BLOCK_8X8,
-  BLOCK_8X16,
-  BLOCK_16X8,
-  BLOCK_16X16,
-  BLOCK_16X32,
-  BLOCK_32X16,
-  BLOCK_32X32,
-  BLOCK_32X64,
-  BLOCK_64X32,
-  BLOCK_64X64,
-  BLOCK_SIZES,
-  BLOCK_INVALID = BLOCK_SIZES
-} BLOCK_SIZE;
+#define BLOCK_4X4     0
+#define BLOCK_4X8     1
+#define BLOCK_8X4     2
+#define BLOCK_8X8     3
+#define BLOCK_8X16    4
+#define BLOCK_16X8    5
+#define BLOCK_16X16   6
+#define BLOCK_16X32   7
+#define BLOCK_32X16   8
+#define BLOCK_32X32   9
+#define BLOCK_32X64  10
+#define BLOCK_64X32  11
+#define BLOCK_64X64  12
+#define BLOCK_SIZES  13
+#define BLOCK_INVALID BLOCK_SIZES
+typedef uint8_t BLOCK_SIZE;
 
 typedef enum PARTITION_TYPE {
   PARTITION_NONE,
@@ -72,13 +72,12 @@ typedef char PARTITION_CONTEXT;
 #define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
 
 // block transform size
-typedef enum {
-  TX_4X4 = 0,                      // 4x4 transform
-  TX_8X8 = 1,                      // 8x8 transform
-  TX_16X16 = 2,                    // 16x16 transform
-  TX_32X32 = 3,                    // 32x32 transform
-  TX_SIZES
-} TX_SIZE;
+typedef uint8_t TX_SIZE;
+#define TX_4X4   ((TX_SIZE)0)   // 4x4 transform
+#define TX_8X8   ((TX_SIZE)1)   // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2)   // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3)   // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
 
 // frame transform mode
 typedef enum {
@@ -104,6 +103,43 @@ typedef enum {
   VP9_ALT_FLAG = 1 << 2,
 } VP9_REFFRAME;
 
+typedef enum {
+  PLANE_TYPE_Y  = 0,
+  PLANE_TYPE_UV = 1,
+  PLANE_TYPES
+} PLANE_TYPE;
+
+#define DC_PRED    0       // Average of above and left pixels
+#define V_PRED     1       // Vertical
+#define H_PRED     2       // Horizontal
+#define D45_PRED   3       // Directional 45  deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED  4       // Directional 135 deg = 180 - 45
+#define D117_PRED  5       // Directional 117 deg = 180 - 63
+#define D153_PRED  6       // Directional 153 deg = 180 - 27
+#define D207_PRED  7       // Directional 207 deg = 180 + 27
+#define D63_PRED   8       // Directional 63  deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED    9       // True-motion
+#define NEARESTMV 10
+#define NEARMV    11
+#define ZEROMV    12
+#define NEWMV     13
+#define MB_MODE_COUNT 14
+typedef uint8_t PREDICTION_MODE;
+
+#define INTRA_MODES (TM_PRED + 1)
+
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
+
+#define SKIP_CONTEXTS 3
+#define INTER_MODE_CONTEXTS 7
+
+/* Segment Feature Masks */
+#define MAX_MV_REF_CANDIDATES 2
+
+#define INTRA_INTER_CONTEXTS 4
+#define COMP_INTER_CONTEXTS 5
+#define REF_CONTEXTS 5
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
index afcdf22ec63..14654a5ab2a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
@@ -12,7 +12,8 @@
 
 #include "vp9/common/vp9_filter.h"
 
-const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+                bilinear_filters[SUBPEL_SHIFTS]) = {
   { 0, 0, 0, 128,   0, 0, 0, 0 },
   { 0, 0, 0, 120,   8, 0, 0, 0 },
   { 0, 0, 0, 112,  16, 0, 0, 0 },
@@ -32,8 +33,8 @@ const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
 };
 
 // Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const InterpKernel,
-                vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8[SUBPEL_SHIFTS]) = {
   { 0,   0,   0, 128,   0,   0,   0,  0},
   { 0,   1,  -5, 126,   8,  -3,   1,  0},
   { -1,   3, -10, 122,  18,  -6,   2,  0},
@@ -53,8 +54,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
 };
 
 // DCT based filter
-DECLARE_ALIGNED(256, const InterpKernel,
-                vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   3,  -7, 127,   8,  -3,   1, 0},
   {-2,   5, -13, 125,  17,  -6,   3, -1},
@@ -74,8 +75,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
 };
 
 // freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const InterpKernel,
-                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
   { 0,  0,  0, 128,  0,  0,  0,  0},
   {-3, -1, 32,  64, 38,  1, -3,  0},
   {-2, -2, 29,  63, 41,  2, -3,  0},
@@ -95,15 +96,15 @@ DECLARE_ALIGNED(256, const InterpKernel,
 };
 
 
-static const InterpKernel* vp9_filter_kernels[4] = {
-  vp9_sub_pel_filters_8,
-  vp9_sub_pel_filters_8lp,
-  vp9_sub_pel_filters_8s,
-  vp9_bilinear_filters
+static const InterpKernel* filter_kernels[4] = {
+  sub_pel_filters_8,
+  sub_pel_filters_8lp,
+  sub_pel_filters_8s,
+  bilinear_filters
 };
 
 const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
   assert(filter != SWITCHABLE);
-  return vp9_filter_kernels[filter];
+  return filter_kernels[filter];
 }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
index d963ee23569..13d38affbaa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
@@ -27,30 +27,21 @@ extern "C" {
 #define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
 #define SUBPEL_TAPS 8
 
-typedef enum {
-  EIGHTTAP = 0,
-  EIGHTTAP_SMOOTH = 1,
-  EIGHTTAP_SHARP = 2,
-  SWITCHABLE_FILTERS = 3, /* Number of switchable filters */
-  BILINEAR = 3,
-  // The codec can operate in four possible inter prediction filter mode:
-  // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
-  SWITCHABLE_FILTER_CONTEXTS = SWITCHABLE_FILTERS + 1,
-  SWITCHABLE = 4  /* should be the last one */
-} INTERP_FILTER;
+#define EIGHTTAP            0
+#define EIGHTTAP_SMOOTH     1
+#define EIGHTTAP_SHARP      2
+#define SWITCHABLE_FILTERS  3 /* Number of switchable filters */
+#define BILINEAR            3
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#define SWITCHABLE 4 /* should be the last one */
+typedef uint8_t INTERP_FILTER;
 
 typedef int16_t InterpKernel[SUBPEL_TAPS];
 
 const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
 
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
-  (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
index 3b214371c47..174b96e21ad 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
@@ -11,6 +11,7 @@
 #include <math.h>
 
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
index 6e2551dd4bc..cee1682a67f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
@@ -14,6 +14,7 @@
 #include <assert.h>
 
 #include "./vpx_config.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
index 69d393ef469..9816728364a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -13,6 +13,7 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_seg_common.h"
 
@@ -266,8 +267,8 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
 
   for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
     int lvl_seg = default_filt_lvl;
-    if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
-      const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+    if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+      const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
       lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
                       data : default_filt_lvl + data,
                       0, MAX_LOOP_FILTER);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
index 2e32c40b85e..3cf4c32253e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
@@ -9,6 +9,7 @@
  */
 
 #include "./vpx_config.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
index 57189df16ec..bebb37eda07 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
@@ -171,13 +171,13 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
   get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
 
   if (bs == BLOCK_16X16) {
-    vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
+    vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
     sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
   } else if (bs == BLOCK_32X32) {
-    vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
+    vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
     sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
   } else /* if (bs == BLOCK_64X64) */ {
-    vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
+    vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
     sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
   }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
index 51e147e0056..5f8ee0fcc50 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
@@ -18,7 +18,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                              int_mv *mv_ref_list,
                              int block, int mi_row, int mi_col,
-                             find_mv_refs_sync sync, void *const data) {
+                             find_mv_refs_sync sync, void *const data,
+                             uint8_t *mode_context) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
   const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -138,7 +139,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
 
  Done:
 
-  mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
+  mode_context[ref_frame] = counter_to_context[context_counter];
 
   // Clamp vectors
   for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
@@ -150,9 +151,10 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       int_mv *mv_ref_list,
                       int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data) {
+                      find_mv_refs_sync sync, void *const data,
+                      uint8_t *mode_context) {
   find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
-                   mi_row, mi_col, sync, data);
+                   mi_row, mi_col, sync, data, mode_context);
 }
 
 static void lower_mv_precision(MV *mv, int allow_hp) {
@@ -181,7 +183,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
 void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
                                    const TileInfo *const tile,
                                    int block, int ref, int mi_row, int mi_col,
-                                   int_mv *nearest_mv, int_mv *near_mv) {
+                                   int_mv *nearest_mv, int_mv *near_mv,
+                                   uint8_t *mode_context) {
   int_mv mv_list[MAX_MV_REF_CANDIDATES];
   MODE_INFO *const mi = xd->mi[0];
   b_mode_info *bmi = mi->bmi;
@@ -190,7 +193,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
   assert(MAX_MV_REF_CANDIDATES == 2);
 
   find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
-                   mi_row, mi_col, NULL, NULL);
+                   mi_row, mi_col, NULL, NULL, mode_context);
 
   near_mv->as_int = 0;
   switch (block) {
@@ -223,6 +226,6 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
       break;
     }
     default:
-      assert("Invalid block index.");
+      assert(0 && "Invalid block index.");
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
index f1df521468f..621dc14be65 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
@@ -212,7 +212,8 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       const TileInfo *const tile,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       int_mv *mv_ref_list, int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data);
+                      find_mv_refs_sync sync, void *const data,
+                      uint8_t *mode_context);
 
 // check a list of motion vectors by sad score using a number rows of pixels
 // above and a number cols of pixels in the left to select the one with best
@@ -223,7 +224,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
 void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
                                    const TileInfo *const tile,
                                    int block, int ref, int mi_row, int mi_col,
-                                   int_mv *nearest_mv, int_mv *near_mv);
+                                   int_mv *nearest_mv, int_mv *near_mv,
+                                   uint8_t *mode_context);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
index f710f810618..1811d76dfd3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
@@ -14,6 +14,7 @@
 #include "./vpx_config.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "./vp9_rtcd.h"
+#include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_entropy.h"
@@ -161,7 +162,8 @@ typedef struct VP9Common {
   int show_existing_frame;
 
   // Flag signaling that the frame is encoded using only INTRA modes.
-  int intra_only;
+  uint8_t intra_only;
+  uint8_t last_intra_only;
 
   int allow_high_precision_mv;
 
@@ -263,6 +265,7 @@ typedef struct VP9Common {
 
   int log2_tile_cols, log2_tile_rows;
   int byte_alignment;
+  int skip_loop_filter;
 
   // Private data associated with the frame buffer callbacks.
   void *cb_priv;
@@ -307,8 +310,13 @@ static INLINE int get_free_fb(VP9_COMMON *cm) {
     if (frame_bufs[i].ref_count == 0)
       break;
 
-  assert(i < FRAME_BUFFERS);
-  frame_bufs[i].ref_count = 1;
+  if (i != FRAME_BUFFERS) {
+    frame_bufs[i].ref_count = 1;
+  } else {
+    // Reset i to be INVALID_IDX to indicate no free buffer found.
+    i = INVALID_IDX;
+  }
+
   unlock_buffer_pool(cm->buffer_pool);
   return i;
 }
@@ -328,6 +336,18 @@ static INLINE int mi_cols_aligned_to_sb(int n_mis) {
   return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
 }
 
+static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
+  return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE void set_partition_probs(const VP9_COMMON *const cm,
+                                       MACROBLOCKD *const xd) {
+  xd->partition_probs =
+      frame_is_intra_only(cm) ?
+          &vp9_kf_partition_probs[0] :
+          (const vp9_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob;
+}
+
 static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
   int i;
 
@@ -335,21 +355,26 @@ static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
     xd->plane[i].dqcoeff = xd->dqcoeff;
     xd->above_context[i] = cm->above_context +
         i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
+
+    if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
+      memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+    } else {
+      memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+    }
+    xd->fc = cm->fc;
+    xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode;
   }
 
   xd->above_seg_context = cm->above_seg_context;
   xd->mi_stride = cm->mi_stride;
   xd->error_info = &cm->error;
-}
 
-static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
-  return cm->frame_type == KEY_FRAME || cm->intra_only;
+  set_partition_probs(cm, xd);
 }
 
-static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm,
+static INLINE const vp9_prob* get_partition_probs(const MACROBLOCKD *xd,
                                                   int ctx) {
-  return frame_is_intra_only(cm) ? vp9_kf_partition_probs[ctx]
-                                 : cm->fc->partition_prob[ctx];
+  return xd->partition_probs[ctx];
 }
 
 static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
index 983a4744dd6..d26a6eb5c88 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
@@ -16,12 +16,10 @@
 #include "./vpx_scale_rtcd.h"
 #include "./vp9_rtcd.h"
 
+#include "vpx_ports/mem.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vpx_scale/yv12config.h"
 
-#if CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_postproc.h"
 #include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
index 564a3eb0ce3..d83f3c1a2f5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
@@ -266,8 +266,8 @@ int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
 
 int vp9_get_qindex(const struct segmentation *seg, int segment_id,
                    int base_qindex) {
-  if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
-    const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+  if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+    const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
     const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ?
         data : base_qindex + data;
     return clamp(seg_qindex, 0, MAXQ);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
index 825d03d69b2..1e9acb8d5d3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
@@ -12,6 +12,7 @@
 #include "./vp9_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_once.h"
 
 #include "vp9/common/vp9_reconintra.h"
@@ -78,6 +79,15 @@ static const uint8_t extend_modes[INTRA_MODES] = {
   intra_pred_highbd_sized(type, 16) \
   intra_pred_highbd_sized(type, 32)
 
+#define intra_pred_no_4x4(type) \
+  intra_pred_sized(type, 8) \
+  intra_pred_sized(type, 16) \
+  intra_pred_sized(type, 32) \
+  intra_pred_highbd_sized(type, 4) \
+  intra_pred_highbd_sized(type, 8) \
+  intra_pred_highbd_sized(type, 16) \
+  intra_pred_highbd_sized(type, 32)
+
 #else
 
 #define intra_pred_allsizes(type) \
@@ -85,8 +95,17 @@ static const uint8_t extend_modes[INTRA_MODES] = {
   intra_pred_sized(type, 8) \
   intra_pred_sized(type, 16) \
   intra_pred_sized(type, 32)
+
+#define intra_pred_no_4x4(type) \
+  intra_pred_sized(type, 8) \
+  intra_pred_sized(type, 16) \
+  intra_pred_sized(type, 32)
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+#define DST(x, y) dst[(x) + (y) * stride]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
                                          int bs, const uint16_t *above,
@@ -97,18 +116,16 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
 
   // First column.
   for (r = 0; r < bs - 1; ++r) {
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
+    dst[r * stride] = AVG2(left[r], left[r + 1]);
   }
   dst[(bs - 1) * stride] = left[bs - 1];
   dst++;
 
   // Second column.
   for (r = 0; r < bs - 2; ++r) {
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
-                                         left[r + 2], 2);
+    dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
   }
-  dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
-                                              left[bs - 1] * 3, 2);
+  dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
   dst[(bs - 1) * stride] = left[bs - 1];
   dst++;
 
@@ -130,11 +147,9 @@ static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
   (void) bd;
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
-      dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
-                                          above[r/2 + c + 1] * 2 +
-                                          above[r/2 + c + 2], 2)
-                     : ROUND_POWER_OF_TWO(above[r/2 + c] +
-                                          above[r/2 + c + 1], 1);
+      dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
+                            above[(r >> 1) + c + 2])
+                     : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
     }
     dst += stride;
   }
@@ -148,9 +163,8 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
   (void) bd;
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
-      dst[c] = r + c + 2 < bs * 2 ?  ROUND_POWER_OF_TWO(above[r + c] +
-                                                        above[r + c + 1] * 2 +
-                                                        above[r + c + 2], 2)
+      dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1],
+                                         above[r + c + 2])
                                   : above[bs * 2 - 1];
     }
     dst += stride;
@@ -165,20 +179,19 @@ static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
 
   // first row
   for (c = 0; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
+    dst[c] = AVG2(above[c - 1], above[c]);
   dst += stride;
 
   // second row
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
   for (c = 1; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+    dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
   dst += stride;
 
   // the rest of first col
-  dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[0] = AVG3(above[-1], left[0], left[1]);
   for (r = 3; r < bs; ++r)
-    dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
-                                               left[r - 1], 2);
+    dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
 
   // the rest of the block
   for (r = 2; r < bs; ++r) {
@@ -193,14 +206,13 @@ static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride,
                                          const uint16_t *left, int bd) {
   int r, c;
   (void) bd;
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
   for (c = 1; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+    dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
 
-  dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[stride] = AVG3(above[-1], left[0], left[1]);
   for (r = 2; r < bs; ++r)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
-                                         left[r], 2);
+    dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
 
   dst += stride;
   for (r = 1; r < bs; ++r) {
@@ -215,20 +227,19 @@ static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride,
                                          const uint16_t *left, int bd) {
   int r, c;
   (void) bd;
-  dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
+  dst[0] = AVG2(above[-1], left[0]);
   for (r = 1; r < bs; r++)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
+    dst[r * stride] = AVG2(left[r - 1], left[r]);
   dst++;
 
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
-  dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
+  dst[stride] = AVG3(above[-1], left[0], left[1]);
   for (r = 2; r < bs; r++)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
-                                         left[r], 2);
+    dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
   dst++;
 
   for (c = 0; c < bs - 2; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
+    dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
   dst += stride;
 
   for (r = 1; r < bs; ++r) {
@@ -344,22 +355,37 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int L = left[3];
+  (void)above;
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+      DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
 static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
   (void) above;
   // first column
   for (r = 0; r < bs - 1; ++r)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
+    dst[r * stride] = AVG2(left[r], left[r + 1]);
   dst[(bs - 1) * stride] = left[bs - 1];
   dst++;
 
   // second column
   for (r = 0; r < bs - 2; ++r)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
-                                         left[r + 2], 2);
-  dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
-                                              left[bs - 1] * 3, 2);
+    dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
+  dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
   dst[(bs - 1) * stride] = left[bs - 1];
   dst++;
 
@@ -371,38 +397,112 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
     for (c = 0; c < bs - 2; ++c)
       dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
 }
-intra_pred_allsizes(d207)
+intra_pred_no_4x4(d207)
+
+void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                             const uint8_t *above, const uint8_t *left) {
+  const int A = above[0];
+  const int B = above[1];
+  const int C = above[2];
+  const int D = above[3];
+  const int E = above[4];
+  const int F = above[5];
+  const int G = above[6];
+  (void)left;
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+              DST(3, 2) = AVG2(E, F);  // differs from vp8
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 3) = AVG3(E, F, G);  // differs from vp8
+}
 
 static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   int r, c;
-  (void) left;
-  for (r = 0; r < bs; ++r) {
-    for (c = 0; c < bs; ++c)
-      dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
-                                          above[r/2 + c + 1] * 2 +
-                                          above[r/2 + c + 2], 2)
-                     : ROUND_POWER_OF_TWO(above[r/2 + c] +
-                                          above[r/2 + c + 1], 1);
-    dst += stride;
+  int size;
+  (void)left;
+  for (c = 0; c < bs; ++c) {
+    dst[c] = AVG2(above[c], above[c + 1]);
+    dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]);
+  }
+  for (r = 2, size = bs - 2; r < bs; r += 2, --size) {
+    memcpy(dst + (r + 0) * stride, dst + (r >> 1), size);
+    memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
+    memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
+    memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
   }
 }
-intra_pred_allsizes(d63)
+intra_pred_no_4x4(d63)
+
+void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                             const uint8_t *above, const uint8_t *left) {
+  const int A = above[0];
+  const int B = above[1];
+  const int C = above[2];
+  const int D = above[3];
+  const int E = above[4];
+  const int F = above[5];
+  const int G = above[6];
+  const int H = above[7];
+  (void)stride;
+  (void)left;
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+                                      DST(3, 3) = H;  // differs from vp8
+}
 
 static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
-  int r, c;
-  (void) left;
-  for (r = 0; r < bs; ++r) {
-    for (c = 0; c < bs; ++c)
-      dst[c] = r + c + 2 < bs * 2 ?  ROUND_POWER_OF_TWO(above[r + c] +
-                                                        above[r + c + 1] * 2 +
-                                                        above[r + c + 2], 2)
-                                  : above[bs * 2 - 1];
+  const uint8_t above_right = above[bs - 1];
+  const uint8_t *const dst_row0 = dst;
+  int x, size;
+  (void)left;
+
+  for (x = 0; x < bs - 1; ++x) {
+    dst[x] = AVG3(above[x], above[x + 1], above[x + 2]);
+  }
+  dst[bs - 1] = above_right;
+  dst += stride;
+  for (x = 1, size = bs - 2; x < bs; ++x, --size) {
+    memcpy(dst, dst_row0 + x, size);
+    memset(dst + size, above_right, x + 1);
     dst += stride;
   }
 }
-intra_pred_allsizes(d45)
+intra_pred_no_4x4(d45)
+
+void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int X = above[-1];
+  const int A = above[0];
+  const int B = above[1];
+  const int C = above[2];
+  const int D = above[3];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
 
 static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
@@ -410,20 +510,19 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
 
   // first row
   for (c = 0; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
+    dst[c] = AVG2(above[c - 1], above[c]);
   dst += stride;
 
   // second row
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
   for (c = 1; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+    dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
   dst += stride;
 
   // the rest of first col
-  dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[0] = AVG3(above[-1], left[0], left[1]);
   for (r = 3; r < bs; ++r)
-    dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
-                                               left[r - 1], 2);
+    dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
 
   // the rest of the block
   for (r = 2; r < bs; ++r) {
@@ -432,19 +531,39 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
     dst += stride;
   }
 }
-intra_pred_allsizes(d117)
+intra_pred_no_4x4(d117)
+
+void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int L = left[3];
+  const int X = above[-1];
+  const int A = above[0];
+  const int B = above[1];
+  const int C = above[2];
+  const int D = above[3];
+  (void)stride;
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
+  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
+  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+                                      DST(3, 0) = AVG3(D, C, B);
+}
 
 static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
   for (c = 1; c < bs; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+    dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
 
-  dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[stride] = AVG3(above[-1], left[0], left[1]);
   for (r = 2; r < bs; ++r)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
-                                         left[r], 2);
+    dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
 
   dst += stride;
   for (r = 1; r < bs; ++r) {
@@ -453,25 +572,48 @@ static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
     dst += stride;
   }
 }
-intra_pred_allsizes(d135)
+intra_pred_no_4x4(d135)
+
+void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int L = left[3];
+  const int X = above[-1];
+  const int A = above[0];
+  const int B = above[1];
+  const int C = above[2];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
 
 static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
-  dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
+  dst[0] = AVG2(above[-1], left[0]);
   for (r = 1; r < bs; r++)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
+    dst[r * stride] = AVG2(left[r - 1], left[r]);
   dst++;
 
-  dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
-  dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+  dst[0] = AVG3(left[0], above[-1], above[0]);
+  dst[stride] = AVG3(above[-1], left[0], left[1]);
   for (r = 2; r < bs; r++)
-    dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
-                                         left[r], 2);
+    dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
   dst++;
 
   for (c = 0; c < bs - 2; c++)
-    dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
+    dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
   dst += stride;
 
   for (r = 1; r < bs; ++r) {
@@ -480,7 +622,7 @@ static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
     dst += stride;
   }
 }
-intra_pred_allsizes(d153)
+intra_pred_no_4x4(d153)
 
 static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                const uint8_t *above, const uint8_t *left) {
@@ -658,7 +800,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   DECLARE_ALIGNED(16, uint16_t, left_col[32]);
-  DECLARE_ALIGNED(16, uint16_t, above_data[128 + 16]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
   uint16_t *above_row = above_data + 16;
   const uint16_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
@@ -781,7 +923,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
                                    int plane) {
   int i;
   DECLARE_ALIGNED(16, uint8_t, left_col[32]);
-  DECLARE_ALIGNED(16, uint8_t, above_data[128 + 16]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
   uint8_t *above_row = above_data + 16;
   const uint8_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
@@ -943,6 +1085,6 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
                          have_top, have_left, have_right, x, y, plane);
 }
 
-void vp9_init_intra_predictors() {
+void vp9_init_intra_predictors(void) {
   once(vp9_init_intra_predictors_internal);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
index 845f3bcaac7..da5e435b132 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
@@ -18,7 +18,7 @@
 extern "C" {
 #endif
 
-void vp9_init_intra_predictors();
+void vp9_init_intra_predictors(void);
 
 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index d05afa525c3..22a5efdd5b2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -60,52 +60,52 @@ add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
 specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_4x4/;
 
 add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d135_predictor_4x4/;
+specialize qw/vp9_d135_predictor_4x4 neon/;
 
 add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
 
 add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_8x8/;
@@ -117,34 +117,34 @@ add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
 specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
 
 add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_16x16/;
@@ -156,22 +156,22 @@ add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
 specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
@@ -183,7 +183,7 @@ add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c
 specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_32x32/;
@@ -192,68 +192,68 @@ add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride,
 specialize qw/vp9_d135_predictor_32x32/;
 
 add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d153_predictor_32x32/;
+specialize qw/vp9_d153_predictor_32x32/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
 
 add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
 
 #
 # Loopfilter
 #
 add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
 $vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
 
 add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
 $vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
 
 add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/;
 $vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
 
 add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
 $vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
 
 add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/;
 
 add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/;
 
 add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
 $vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
 
 add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/;
 $vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
 
 add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
 $vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
 
 add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/;
 
 add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
 
 #
 # post proc
@@ -276,10 +276,10 @@ specialize qw/vp9_plane_add_noise sse2/;
 $vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
 
 add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp9_filter_by_weight16x16 sse2/;
+specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
 
 add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp9_filter_by_weight8x8 sse2/;
+specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
 }
 
 #
@@ -301,13 +301,13 @@ add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride
 specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
 
 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2 msa/;
 
 add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
 
 add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
 
 #
 # dct
@@ -419,19 +419,19 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     specialize qw/vp9_iwht4x4_16_add/;
   } else {
     add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+    specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+    specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+    specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64";
+    specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
 
     add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_idct8x8_12_add sse2 neon dspr2/, "$ssse3_x86_64";
+    specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
 
     add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
     specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
@@ -454,10 +454,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
-    specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+    specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
-    specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+    specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/;
 
     add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
     specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;
@@ -465,10 +465,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     # dct and add
 
     add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_iwht4x4_1_add/;
+    specialize qw/vp9_iwht4x4_1_add msa/;
 
     add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/vp9_iwht4x4_16_add/;
+    specialize qw/vp9_iwht4x4_16_add msa/;
   }
 }
 
@@ -797,51 +797,6 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
 
 
 # variance
-add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x32/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x16/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x8 neon/, "$sse2_x86inc";
-
-add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get8x8var neon/, "$sse2_x86inc";
-
-add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get16x16var avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x4/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance4x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance4x4/, "$sse2_x86inc";
-
 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance64x64 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
 
@@ -922,26 +877,11 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
 specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
 
-add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x16/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
-specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
-
 add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2 neon/;
+specialize qw/vp9_avg_8x8 sse2 neon msa/;
 
 add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
-specialize qw/vp9_avg_4x4 sse2/;
+specialize qw/vp9_avg_4x4 sse2 msa/;
 
 add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
 specialize qw/vp9_minmax_8x8 sse2/;
@@ -969,14 +909,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_highbd_avg_8x8/;
   add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
   specialize qw/vp9_highbd_avg_4x4/;
-  add_proto qw/unsigned int vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+  add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   specialize qw/vp9_highbd_minmax_8x8/;
 }
 
 # ENCODEMB INVOKE
 
 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
+specialize qw/vp9_subtract_block neon msa/, "$sse2_x86inc";
 
 #
 # Denoiser
@@ -1008,7 +948,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_fdct8x8_quant/;
 } else {
   add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-  specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
+  specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc";
 
   add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
   specialize qw/vp9_block_error_fp sse2/;
@@ -1083,43 +1023,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_fdct32x32_rd sse2/;
 } else {
   add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-  specialize qw/vp9_fht4x4 sse2/;
+  specialize qw/vp9_fht4x4 sse2 msa/;
 
   add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-  specialize qw/vp9_fht8x8 sse2/;
+  specialize qw/vp9_fht8x8 sse2 msa/;
 
   add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-  specialize qw/vp9_fht16x16 sse2/;
+  specialize qw/vp9_fht16x16 sse2 msa/;
 
   add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
+  specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
 
   add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
   specialize qw/vp9_fdct4x4_1 sse2/;
 
   add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct4x4 sse2/;
+  specialize qw/vp9_fdct4x4 sse2 msa/;
 
   add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct8x8_1 sse2 neon/;
+  specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
 
   add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64";
+  specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
 
   add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct16x16_1 sse2/;
+  specialize qw/vp9_fdct16x16_1 sse2 msa/;
 
   add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct16x16 sse2/;
+  specialize qw/vp9_fdct16x16 sse2 msa/;
 
   add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct32x32_1 sse2/;
+  specialize qw/vp9_fdct32x32_1 sse2 msa/;
 
   add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct32x32 sse2 avx2/;
+  specialize qw/vp9_fdct32x32 sse2 avx2 msa/;
 
   add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+  specialize qw/vp9_fdct32x32_rd sse2 avx2 msa/;
 }
 
 #
@@ -1137,146 +1077,10 @@ add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const stru
 specialize qw/vp9_full_range_search/;
 
 add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
-specialize qw/vp9_temporal_filter_apply sse2/;
+specialize qw/vp9_temporal_filter_apply sse2 msa/;
 
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 
-  # variance
-  add_proto qw/unsigned int vp9_highbd_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance32x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance16x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance64x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance32x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance32x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance64x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance16x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance8x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance8x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance8x4/;
-
-  add_proto qw/unsigned int vp9_highbd_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance4x8/;
-
-  add_proto qw/unsigned int vp9_highbd_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_variance4x4/;
-
-  add_proto qw/void vp9_highbd_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_get8x8var/, "$sse2_x86inc";
-
-  add_proto qw/void vp9_highbd_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_get16x16var/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance32x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance16x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance64x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance32x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance32x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance64x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance16x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance8x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance8x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance8x4/;
-
-  add_proto qw/unsigned int vp9_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance4x8/;
-
-  add_proto qw/unsigned int vp9_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_variance4x4/;
-
-  add_proto qw/void vp9_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_10_get8x8var/, "$sse2_x86inc";
-
-  add_proto qw/void vp9_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_10_get16x16var/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance32x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance16x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance64x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance32x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance32x32/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance64x64/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance16x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance8x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance8x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance8x4/;
-
-  add_proto qw/unsigned int vp9_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance4x8/;
-
-  add_proto qw/unsigned int vp9_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_variance4x4/;
-
-  add_proto qw/void vp9_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_12_get8x8var/, "$sse2_x86inc";
-
-  add_proto qw/void vp9_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  specialize qw/vp9_highbd_12_get16x16var/, "$sse2_x86inc";
-
   add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   specialize qw/vp9_highbd_sub_pixel_variance64x64/, "$sse2_x86inc";
 
@@ -1511,41 +1315,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
   specialize qw/vp9_highbd_12_sub_pixel_avg_variance4x4/;
 
-  add_proto qw/unsigned int vp9_highbd_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_mse16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_mse8x16/;
-
-  add_proto qw/unsigned int vp9_highbd_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_mse16x8/;
-
-  add_proto qw/unsigned int vp9_highbd_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_mse8x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_mse16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_10_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_mse8x16/;
-
-  add_proto qw/unsigned int vp9_highbd_10_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_mse16x8/;
-
-  add_proto qw/unsigned int vp9_highbd_10_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_10_mse8x8/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_mse16x16/, "$sse2_x86inc";
-
-  add_proto qw/unsigned int vp9_highbd_12_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_mse8x16/;
-
-  add_proto qw/unsigned int vp9_highbd_12_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_mse16x8/;
-
-  add_proto qw/unsigned int vp9_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/vp9_highbd_12_mse8x8/, "$sse2_x86inc";
 
   # ENCODEMB INVOKE
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
index 65e2aa69a54..1d86b5cfe2c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
@@ -38,6 +38,18 @@ static INLINE int get_coef_context(const int16_t *neighbors,
           token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
 }
 
+static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+                                         PLANE_TYPE type, int block_idx) {
+  const MODE_INFO *const mi = xd->mi[0];
+
+  if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
+    return &vp9_default_scan_orders[tx_size];
+  } else {
+    const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
+    return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
+  }
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
index 910200ecc9c..471e238ccff 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
@@ -25,12 +25,6 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
 // the coding mechanism is still subject to change so these provide a
 // convenient single point of change.
 
-int vp9_segfeature_active(const struct segmentation *seg, int segment_id,
-                          SEG_LVL_FEATURES feature_id) {
-  return seg->enabled &&
-         (seg->feature_mask[segment_id] & (1 << feature_id));
-}
-
 void vp9_clearall_segfeatures(struct segmentation *seg) {
   vp9_zero(seg->feature_data);
   vp9_zero(seg->feature_mask);
@@ -60,12 +54,6 @@ void vp9_set_segdata(struct segmentation *seg, int segment_id,
   seg->feature_data[segment_id][feature_id] = seg_data;
 }
 
-int vp9_get_segdata(const struct segmentation *seg, int segment_id,
-                    SEG_LVL_FEATURES feature_id) {
-  return seg->feature_data[segment_id][feature_id];
-}
-
-
 const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
   2,  4,  6,  8, 10, 12,
   0, -1, -2, -3, -4, -5, -6, -7
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
index ff2d66a3658..95c9918303d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
@@ -49,9 +49,12 @@ struct segmentation {
   unsigned int feature_mask[MAX_SEGMENTS];
 };
 
-int vp9_segfeature_active(const struct segmentation *seg,
-                          int segment_id,
-                          SEG_LVL_FEATURES feature_id);
+static INLINE int segfeature_active(const struct segmentation *seg,
+                                    int segment_id,
+                                    SEG_LVL_FEATURES feature_id) {
+  return seg->enabled &&
+         (seg->feature_mask[segment_id] & (1 << feature_id));
+}
 
 void vp9_clearall_segfeatures(struct segmentation *seg);
 
@@ -68,9 +71,10 @@ void vp9_set_segdata(struct segmentation *seg,
                      SEG_LVL_FEATURES feature_id,
                      int seg_data);
 
-int vp9_get_segdata(const struct segmentation *seg,
-                    int segment_id,
-                    SEG_LVL_FEATURES feature_id);
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+                              SEG_LVL_FEATURES feature_id) {
+  return seg->feature_data[segment_id][feature_id];
+}
 
 extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
index 161c381ad08..fc77762def5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
@@ -11,15 +11,14 @@
 #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
 #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
 
+#include "vpx_ports/msvc.h"
+
 #ifdef _MSC_VER
 # include <math.h>  // the ceil() definition must precede intrin.h
 # if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
 #  include <intrin.h>
-#  define USE_MSC_INTRIN
+#  define USE_MSC_INTRINSICS
 # endif
-#if _MSC_VER < 1900
-# define snprintf _snprintf
-#endif
 #endif
 
 #ifdef __cplusplus
@@ -50,7 +49,7 @@ static INLINE int round(double x) {
 static INLINE int get_msb(unsigned int n) {
   return 31 ^ __builtin_clz(n);
 }
-#elif defined(USE_MSC_INTRIN)
+#elif defined(USE_MSC_INTRINSICS)
 #pragma intrinsic(_BitScanReverse)
 
 static INLINE int get_msb(unsigned int n) {
@@ -58,7 +57,7 @@ static INLINE int get_msb(unsigned int n) {
   _BitScanReverse(&first_set_bit, n);
   return first_set_bit;
 }
-#undef USE_MSC_INTRIN
+#undef USE_MSC_INTRINSICS
 #else
 // Returns (int)floor(log2(n)). n must be > 0.
 static INLINE int get_msb(unsigned int n) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h
new file mode 100644
index 00000000000..de2df47e5e5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h
@@ -0,0 +1,296 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_COMMON_X86_CONVOLVE_H_
+#define VP9_COMMON_X86_CONVOLVE_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+typedef void filter8_1dfunction (
+  const uint8_t *src_ptr,
+  ptrdiff_t src_pitch,
+  uint8_t *output_ptr,
+  ptrdiff_t out_pitch,
+  uint32_t output_height,
+  const int16_t *filter
+);
+
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+  void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+                                    uint8_t *dst, ptrdiff_t dst_stride, \
+                                    const int16_t *filter_x, int x_step_q4, \
+                                    const int16_t *filter_y, int y_step_q4, \
+                                    int w, int h) { \
+  if (step_q4 == 16 && filter[3] != 128) { \
+    if (filter[0] || filter[1] || filter[2]) { \
+      while (w >= 16) { \
+        vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+                                                 src_stride, \
+                                                 dst, \
+                                                 dst_stride, \
+                                                 h, \
+                                                 filter); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+                                                src_stride, \
+                                                dst, \
+                                                dst_stride, \
+                                                h, \
+                                                filter); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+                                                src_stride, \
+                                                dst, \
+                                                dst_stride, \
+                                                h, \
+                                                filter); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
+    } else { \
+      while (w >= 16) { \
+        vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+                                                 src_stride, \
+                                                 dst, \
+                                                 dst_stride, \
+                                                 h, \
+                                                 filter); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+                                                src_stride, \
+                                                dst, \
+                                                dst_stride, \
+                                                h, \
+                                                filter); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+                                                src_stride, \
+                                                dst, \
+                                                dst_stride, \
+                                                h, \
+                                                filter); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
+    } \
+  } \
+  if (w) { \
+    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+                             filter_x, x_step_q4, filter_y, y_step_q4, \
+                             w, h); \
+  } \
+}
+
+#define FUN_CONV_2D(avg, opt) \
+void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+                              uint8_t *dst, ptrdiff_t dst_stride, \
+                              const int16_t *filter_x, int x_step_q4, \
+                              const int16_t *filter_y, int y_step_q4, \
+                              int w, int h) { \
+  assert(w <= 64); \
+  assert(h <= 64); \
+  if (x_step_q4 == 16 && y_step_q4 == 16) { \
+    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
+        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
+      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+                                filter_x, x_step_q4, filter_y, y_step_q4, \
+                                w, h + 7); \
+      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+                                      filter_x, x_step_q4, filter_y, \
+                                      y_step_q4, w, h); \
+    } else { \
+      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
+      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+                                filter_x, x_step_q4, filter_y, y_step_q4, \
+                                w, h + 1); \
+      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+                                      filter_x, x_step_q4, filter_y, \
+                                      y_step_q4, w, h); \
+    } \
+  } else { \
+    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+                           filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+  } \
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+
+typedef void highbd_filter8_1dfunction (
+  const uint16_t *src_ptr,
+  const ptrdiff_t src_pitch,
+  uint16_t *output_ptr,
+  ptrdiff_t out_pitch,
+  unsigned int output_height,
+  const int16_t *filter,
+  int bd
+);
+
+#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+  void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
+                                           ptrdiff_t src_stride, \
+                                           uint8_t *dst8, \
+                                           ptrdiff_t dst_stride, \
+                                           const int16_t *filter_x, \
+                                           int x_step_q4, \
+                                           const int16_t *filter_y, \
+                                           int y_step_q4, \
+                                           int w, int h, int bd) { \
+  if (step_q4 == 16 && filter[3] != 128) { \
+    uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+    if (filter[0] || filter[1] || filter[2]) { \
+      while (w >= 16) { \
+        vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
+                                                        src_stride, \
+                                                        dst, \
+                                                        dst_stride, \
+                                                        h, \
+                                                        filter, \
+                                                        bd); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
+                                                       src_stride, \
+                                                       dst, \
+                                                       dst_stride, \
+                                                       h, \
+                                                       filter, \
+                                                       bd); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
+                                                       src_stride, \
+                                                       dst, \
+                                                       dst_stride, \
+                                                       h, \
+                                                       filter, \
+                                                       bd); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
+    } else { \
+      while (w >= 16) { \
+        vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
+                                                        src_stride, \
+                                                        dst, \
+                                                        dst_stride, \
+                                                        h, \
+                                                        filter, \
+                                                        bd); \
+        src += 16; \
+        dst += 16; \
+        w -= 16; \
+      } \
+      while (w >= 8) { \
+        vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
+                                                       src_stride, \
+                                                       dst, \
+                                                       dst_stride, \
+                                                       h, \
+                                                       filter, \
+                                                       bd); \
+        src += 8; \
+        dst += 8; \
+        w -= 8; \
+      } \
+      while (w >= 4) { \
+        vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
+                                                       src_stride, \
+                                                       dst, \
+                                                       dst_stride, \
+                                                       h, \
+                                                       filter, \
+                                                       bd); \
+        src += 4; \
+        dst += 4; \
+        w -= 4; \
+      } \
+    } \
+  } \
+  if (w) { \
+    vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
+                                    filter_x, x_step_q4, filter_y, y_step_q4, \
+                                    w, h, bd); \
+  } \
+}
+
+#define HIGH_FUN_CONV_2D(avg, opt) \
+void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+                                     uint8_t *dst, ptrdiff_t dst_stride, \
+                                     const int16_t *filter_x, int x_step_q4, \
+                                     const int16_t *filter_y, int y_step_q4, \
+                                     int w, int h, int bd) { \
+  assert(w <= 64); \
+  assert(h <= 64); \
+  if (x_step_q4 == 16 && y_step_q4 == 16) { \
+    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
+        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+      DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+      vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+                                       CONVERT_TO_BYTEPTR(fdata2), 64, \
+                                       filter_x, x_step_q4, \
+                                       filter_y, y_step_q4, \
+                                       w, h + 7, bd); \
+      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
+                                             64, dst, dst_stride, \
+                                             filter_x, x_step_q4, \
+                                             filter_y, y_step_q4, \
+                                             w, h, bd); \
+    } else { \
+      DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
+      vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
+                                       CONVERT_TO_BYTEPTR(fdata2), 64, \
+                                       filter_x, x_step_q4, \
+                                       filter_y, y_step_q4, \
+                                       w, h + 1, bd); \
+      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
+                                             dst, dst_stride, \
+                                             filter_x, x_step_q4, \
+                                             filter_y, y_step_q4, \
+                                             w, h, bd); \
+    } \
+  } else { \
+    vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+                                  filter_x, x_step_q4, filter_y, y_step_q4, w, \
+                                  h, bd); \
+  } \
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#endif  // VP9_COMMON_X86_CONVOLVE_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
index 963023c53b1..fd55fb8c664 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
@@ -8,421 +8,9 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include <assert.h>
-
-#include "./vpx_config.h"
 #include "./vp9_rtcd.h"
-#include "vpx_ports/mem.h"
-
-typedef void filter8_1dfunction (
-  const unsigned char *src_ptr,
-  const ptrdiff_t src_pitch,
-  unsigned char *output_ptr,
-  ptrdiff_t out_pitch,
-  unsigned int output_height,
-  const short *filter
-);
-
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
-                                   uint8_t *dst, ptrdiff_t dst_stride, \
-                                   const int16_t *filter_x, int x_step_q4, \
-                                   const int16_t *filter_y, int y_step_q4, \
-                                   int w, int h) { \
-  if (step_q4 == 16 && filter[3] != 128) { \
-    if (filter[0] || filter[1] || filter[2]) { \
-      while (w >= 16) { \
-        vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
-                                                 src_stride, \
-                                                 dst, \
-                                                 dst_stride, \
-                                                 h, \
-                                                 filter); \
-        src += 16; \
-        dst += 16; \
-        w -= 16; \
-      } \
-      while (w >= 8) { \
-        vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
-                                                src_stride, \
-                                                dst, \
-                                                dst_stride, \
-                                                h, \
-                                                filter); \
-        src += 8; \
-        dst += 8; \
-        w -= 8; \
-      } \
-      while (w >= 4) { \
-        vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
-                                                src_stride, \
-                                                dst, \
-                                                dst_stride, \
-                                                h, \
-                                                filter); \
-        src += 4; \
-        dst += 4; \
-        w -= 4; \
-      } \
-    } else { \
-      while (w >= 16) { \
-        vp9_filter_block1d16_##dir##2_##avg##opt(src, \
-                                                 src_stride, \
-                                                 dst, \
-                                                 dst_stride, \
-                                                 h, \
-                                                 filter); \
-        src += 16; \
-        dst += 16; \
-        w -= 16; \
-      } \
-      while (w >= 8) { \
-        vp9_filter_block1d8_##dir##2_##avg##opt(src, \
-                                                src_stride, \
-                                                dst, \
-                                                dst_stride, \
-                                                h, \
-                                                filter); \
-        src += 8; \
-        dst += 8; \
-        w -= 8; \
-      } \
-      while (w >= 4) { \
-        vp9_filter_block1d4_##dir##2_##avg##opt(src, \
-                                                src_stride, \
-                                                dst, \
-                                                dst_stride, \
-                                                h, \
-                                                filter); \
-        src += 4; \
-        dst += 4; \
-        w -= 4; \
-      } \
-    } \
-  } \
-  if (w) { \
-    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
-                             filter_x, x_step_q4, filter_y, y_step_q4, \
-                             w, h); \
-  } \
-}
-
-#define FUN_CONV_2D(avg, opt) \
-void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
-                              uint8_t *dst, ptrdiff_t dst_stride, \
-                              const int16_t *filter_x, int x_step_q4, \
-                              const int16_t *filter_y, int y_step_q4, \
-                              int w, int h) { \
-  assert(w <= 64); \
-  assert(h <= 64); \
-  if (x_step_q4 == 16 && y_step_q4 == 16) { \
-    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
-        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
-      DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 71]); \
-      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
-                                filter_x, x_step_q4, filter_y, y_step_q4, \
-                                w, h + 7); \
-      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
-                                      filter_x, x_step_q4, filter_y, \
-                                      y_step_q4, w, h); \
-    } else { \
-      DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 65]); \
-      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
-                                filter_x, x_step_q4, filter_y, y_step_q4, \
-                                w, h + 1); \
-      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
-                                      filter_x, x_step_q4, filter_y, \
-                                      y_step_q4, w, h); \
-    } \
-  } else { \
-    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
-                           filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
-  } \
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-
-typedef void highbd_filter8_1dfunction (
-  const uint16_t *src_ptr,
-  const ptrdiff_t src_pitch,
-  uint16_t *output_ptr,
-  ptrdiff_t out_pitch,
-  unsigned int output_height,
-  const int16_t *filter,
-  int bd
-);
-
-#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
-                                           ptrdiff_t src_stride, \
-                                           uint8_t *dst8, \
-                                           ptrdiff_t dst_stride, \
-                                           const int16_t *filter_x, \
-                                           int x_step_q4, \
-                                           const int16_t *filter_y, \
-                                           int y_step_q4, \
-                                           int w, int h, int bd) { \
-  if (step_q4 == 16 && filter[3] != 128) { \
-    uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
-    if (filter[0] || filter[1] || filter[2]) { \
-      while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
-                                                        src_stride, \
-                                                        dst, \
-                                                        dst_stride, \
-                                                        h, \
-                                                        filter, \
-                                                        bd); \
-        src += 16; \
-        dst += 16; \
-        w -= 16; \
-      } \
-      while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
-                                                       src_stride, \
-                                                       dst, \
-                                                       dst_stride, \
-                                                       h, \
-                                                       filter, \
-                                                       bd); \
-        src += 8; \
-        dst += 8; \
-        w -= 8; \
-      } \
-      while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
-                                                       src_stride, \
-                                                       dst, \
-                                                       dst_stride, \
-                                                       h, \
-                                                       filter, \
-                                                       bd); \
-        src += 4; \
-        dst += 4; \
-        w -= 4; \
-      } \
-    } else { \
-      while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
-                                                        src_stride, \
-                                                        dst, \
-                                                        dst_stride, \
-                                                        h, \
-                                                        filter, \
-                                                        bd); \
-        src += 16; \
-        dst += 16; \
-        w -= 16; \
-      } \
-      while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
-                                                       src_stride, \
-                                                       dst, \
-                                                       dst_stride, \
-                                                       h, \
-                                                       filter, \
-                                                       bd); \
-        src += 8; \
-        dst += 8; \
-        w -= 8; \
-      } \
-      while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
-                                                       src_stride, \
-                                                       dst, \
-                                                       dst_stride, \
-                                                       h, \
-                                                       filter, \
-                                                       bd); \
-        src += 4; \
-        dst += 4; \
-        w -= 4; \
-      } \
-    } \
-  } \
-  if (w) { \
-    vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
-                                    filter_x, x_step_q4, filter_y, y_step_q4, \
-                                    w, h, bd); \
-  } \
-}
-
-#define HIGH_FUN_CONV_2D(avg, opt) \
-void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
-                                     uint8_t *dst, ptrdiff_t dst_stride, \
-                                     const int16_t *filter_x, int x_step_q4, \
-                                     const int16_t *filter_y, int y_step_q4, \
-                                     int w, int h, int bd) { \
-  assert(w <= 64); \
-  assert(h <= 64); \
-  if (x_step_q4 == 16 && y_step_q4 == 16) { \
-    if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
-        filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
-      DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
-      vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
-                                       CONVERT_TO_BYTEPTR(fdata2), 64, \
-                                       filter_x, x_step_q4, \
-                                       filter_y, y_step_q4, \
-                                       w, h + 7, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
-                                             64, dst, dst_stride, \
-                                             filter_x, x_step_q4, \
-                                             filter_y, y_step_q4, \
-                                             w, h, bd); \
-    } else { \
-      DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
-      vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
-                                       CONVERT_TO_BYTEPTR(fdata2), 64, \
-                                       filter_x, x_step_q4, \
-                                       filter_y, y_step_q4, \
-                                       w, h + 1, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
-                                             dst, dst_stride, \
-                                             filter_x, x_step_q4, \
-                                             filter_y, y_step_q4, \
-                                             w, h, bd); \
-    } \
-  } else { \
-    vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
-                                  filter_x, x_step_q4, filter_y, y_step_q4, w, \
-                                  h, bd); \
-  } \
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_AVX2 && HAVE_SSSE3
-filter8_1dfunction vp9_filter_block1d16_v8_avx2;
-filter8_1dfunction vp9_filter_block1d16_h8_avx2;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-#if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
-#else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
-#endif  // ARCH_X86_64 / ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
-#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
-#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
-#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
-#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
-#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
-#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
-// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
-//                                uint8_t *dst, ptrdiff_t dst_stride,
-//                                const int16_t *filter_x, int x_step_q4,
-//                                const int16_t *filter_y, int y_step_q4,
-//                                int w, int h);
-// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
-//                               uint8_t *dst, ptrdiff_t dst_stride,
-//                               const int16_t *filter_x, int x_step_q4,
-//                               const int16_t *filter_y, int y_step_q4,
-//                               int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
-
-// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
-//                          uint8_t *dst, ptrdiff_t dst_stride,
-//                          const int16_t *filter_x, int x_step_q4,
-//                          const int16_t *filter_y, int y_step_q4,
-//                          int w, int h);
-FUN_CONV_2D(, avx2);
-#endif  // HAVE_AX2 && HAVE_SSSE3
-#if HAVE_SSSE3
-#if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
-#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
-#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
-#else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#endif  // ARCH_X86_64 / ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
-
-// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                                uint8_t *dst, ptrdiff_t dst_stride,
-//                                const int16_t *filter_x, int x_step_q4,
-//                                const int16_t *filter_y, int y_step_q4,
-//                                int w, int h);
-// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                               uint8_t *dst, ptrdiff_t dst_stride,
-//                               const int16_t *filter_x, int x_step_q4,
-//                               const int16_t *filter_y, int y_step_q4,
-//                               int w, int h);
-// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                                    uint8_t *dst, ptrdiff_t dst_stride,
-//                                    const int16_t *filter_x, int x_step_q4,
-//                                    const int16_t *filter_y, int y_step_q4,
-//                                    int w, int h);
-// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                                   uint8_t *dst, ptrdiff_t dst_stride,
-//                                   const int16_t *filter_x, int x_step_q4,
-//                                   const int16_t *filter_y, int y_step_q4,
-//                                   int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
-            ssse3);
-
-// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                          uint8_t *dst, ptrdiff_t dst_stride,
-//                          const int16_t *filter_x, int x_step_q4,
-//                          const int16_t *filter_y, int y_step_q4,
-//                          int w, int h);
-// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-//                              uint8_t *dst, ptrdiff_t dst_stride,
-//                              const int16_t *filter_x, int x_step_q4,
-//                              const int16_t *filter_y, int y_step_q4,
-//                              int w, int h);
-FUN_CONV_2D(, ssse3);
-FUN_CONV_2D(avg_ , ssse3);
-#endif  // HAVE_SSSE3
+#include "./vpx_config.h"
+#include "vp9/common/x86/convolve.h"
 
 #if HAVE_SSE2
 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
index 1637f0e545a..b40669c6375 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
@@ -11,6 +11,7 @@
 #include <emmintrin.h>  // SSE2
 
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vpx_ports/emmintrin_compat.h"
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 0385c7955c9..ce010df3b8a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"
 #include "vp9/common/vp9_idct.h"
 
@@ -3203,34 +3205,20 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
   // idct constants for each stage
   const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
   const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
-  const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64);
-  const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64);
-  const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64);
-  const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64);
   const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
   const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
   const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
   const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
-  const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64);
-  const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64);
-  const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64);
-  const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64);
   const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
   const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
 
   const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
   const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
-  const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
-  const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
-  const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
-  const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
   const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
   const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
 
   const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
   const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
-  const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
-  const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
   const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
   const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
   const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
@@ -3240,8 +3228,6 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
 
   const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
   const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
-  const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
   const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
   const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
   const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
@@ -3261,47 +3247,29 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
           stp2_30, stp2_31;
   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int i;
-  // Load input data.
-  LOAD_DQCOEFF(in[0], input);
-  LOAD_DQCOEFF(in[8], input);
-  LOAD_DQCOEFF(in[16], input);
-  LOAD_DQCOEFF(in[24], input);
-  LOAD_DQCOEFF(in[1], input);
-  LOAD_DQCOEFF(in[9], input);
-  LOAD_DQCOEFF(in[17], input);
-  LOAD_DQCOEFF(in[25], input);
-  LOAD_DQCOEFF(in[2], input);
-  LOAD_DQCOEFF(in[10], input);
-  LOAD_DQCOEFF(in[18], input);
-  LOAD_DQCOEFF(in[26], input);
-  LOAD_DQCOEFF(in[3], input);
-  LOAD_DQCOEFF(in[11], input);
-  LOAD_DQCOEFF(in[19], input);
-  LOAD_DQCOEFF(in[27], input);
-
-  LOAD_DQCOEFF(in[4], input);
-  LOAD_DQCOEFF(in[12], input);
-  LOAD_DQCOEFF(in[20], input);
-  LOAD_DQCOEFF(in[28], input);
-  LOAD_DQCOEFF(in[5], input);
-  LOAD_DQCOEFF(in[13], input);
-  LOAD_DQCOEFF(in[21], input);
-  LOAD_DQCOEFF(in[29], input);
-  LOAD_DQCOEFF(in[6], input);
-  LOAD_DQCOEFF(in[14], input);
-  LOAD_DQCOEFF(in[22], input);
-  LOAD_DQCOEFF(in[30], input);
-  LOAD_DQCOEFF(in[7], input);
-  LOAD_DQCOEFF(in[15], input);
-  LOAD_DQCOEFF(in[23], input);
-  LOAD_DQCOEFF(in[31], input);
+
+  // Load input data. Only need to load the top left 8x8 block.
+  in[0] = _mm_load_si128((const __m128i *)input);
+  in[1] = _mm_load_si128((const __m128i *)(input + 32));
+  in[2] = _mm_load_si128((const __m128i *)(input + 64));
+  in[3] = _mm_load_si128((const __m128i *)(input + 96));
+  in[4] = _mm_load_si128((const __m128i *)(input + 128));
+  in[5] = _mm_load_si128((const __m128i *)(input + 160));
+  in[6] = _mm_load_si128((const __m128i *)(input + 192));
+  in[7] = _mm_load_si128((const __m128i *)(input + 224));
+
+  for (i = 8; i < 32; ++i) {
+    in[i] = _mm_setzero_si128();
+  }
 
   array_transpose_8x8(in, in);
+  // TODO(hkuang): Following transposes are unnecessary. But remove them will
+  // lead to performance drop on some devices.
   array_transpose_8x8(in + 8, in + 8);
   array_transpose_8x8(in + 16, in + 16);
   array_transpose_8x8(in + 24, in + 24);
 
-  IDCT32
+  IDCT32_34
 
   // 1_D: Store 32 intermediate results for each 8x32 block.
   col[0] = _mm_add_epi16(stp1_0, stp1_31);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
index 0cb0912ad62..770a65f4ca1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
@@ -9,6 +9,8 @@
  */
 
 #include <immintrin.h>  /* AVX2 */
+
+#include "./vp9_rtcd.h"
 #include "vpx_ports/mem.h"
 
 static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
index 8723d32836d..e321dbebe39 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -9,6 +9,8 @@
  */
 
 #include <emmintrin.h>  // SSE2
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vpx_ports/emmintrin_compat.h"
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index 3bc7d3918b7..cee8d1e76ac 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -8,7 +8,14 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+// Due to a header conflict between math.h and intrinsics includes with ceil()
+// in certain configurations under vs9 this include needs to precede
+// immintrin.h.
+#include "./vp9_rtcd.h"
+
 #include <immintrin.h>
+
+#include "vp9/common/x86/convolve.h"
 #include "vpx_ports/mem.h"
 
 // filters for 16_h8 and 16_v8
@@ -53,23 +60,23 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
 # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
 #endif  // __clang__
 
-void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
-                                  unsigned int src_pixels_per_line,
-                                  unsigned char *output_ptr,
-                                  unsigned int  output_pitch,
-                                  unsigned int  output_height,
-                                  int16_t *filter) {
+static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
+                                         ptrdiff_t src_pixels_per_line,
+                                         uint8_t *output_ptr,
+                                         ptrdiff_t output_pitch,
+                                         uint32_t output_height,
+                                         const int16_t *filter) {
   __m128i filtersReg;
   __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
   __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
   __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
   __m256i srcReg32b1, srcReg32b2, filtersReg32;
   unsigned int i;
-  unsigned int src_stride, dst_stride;
+  ptrdiff_t src_stride, dst_stride;
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to 8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -104,9 +111,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
   for (i = output_height; i > 1; i-=2) {
     // load the 2 strides of source
     srcReg32b1 = _mm256_castsi128_si256(
-                 _mm_loadu_si128((__m128i *)(src_ptr-3)));
+                 _mm_loadu_si128((const __m128i *)(src_ptr - 3)));
     srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
-                 _mm_loadu_si128((__m128i *)
+                 _mm_loadu_si128((const __m128i *)
                  (src_ptr+src_pixels_per_line-3)), 1);
 
     // filter the source buffer
@@ -135,9 +142,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
     // reading 2 strides of the next 16 bytes
     // (part of it was being read by earlier read)
     srcReg32b2 = _mm256_castsi128_si256(
-                 _mm_loadu_si128((__m128i *)(src_ptr+5)));
+                 _mm_loadu_si128((const __m128i *)(src_ptr + 5)));
     srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
-                 _mm_loadu_si128((__m128i *)
+                 _mm_loadu_si128((const __m128i *)
                  (src_ptr+src_pixels_per_line+5)), 1);
 
     // add and saturate the results together
@@ -202,7 +209,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
     __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
     __m128i srcRegFilt2, srcRegFilt3;
 
-    srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+    srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
 
     // filter the source buffer
     srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
@@ -237,7 +244,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
 
     // reading the next 16 bytes
     // (part of it was being read by earlier read)
-    srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+    srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
 
     // add and saturate the results together
     srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
@@ -297,12 +304,12 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
   }
 }
 
-void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
-                                  unsigned int src_pitch,
-                                  unsigned char *output_ptr,
-                                  unsigned int out_pitch,
-                                  unsigned int output_height,
-                                  int16_t *filter) {
+static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
+                                         ptrdiff_t src_pitch,
+                                         uint8_t *output_ptr,
+                                         ptrdiff_t out_pitch,
+                                         uint32_t output_height,
+                                         const int16_t *filter) {
   __m128i filtersReg;
   __m256i addFilterReg64;
   __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
@@ -310,11 +317,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
   __m256i srcReg32b11, srcReg32b12, filtersReg32;
   __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
   unsigned int i;
-  unsigned int src_stride, dst_stride;
+  ptrdiff_t src_stride, dst_stride;
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the
   // same data in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -344,19 +351,19 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
 
   // load 16 bytes 7 times in stride of src_pitch
   srcReg32b1 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr)));
+               _mm_loadu_si128((const __m128i *)(src_ptr)));
   srcReg32b2 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)));
   srcReg32b3 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)));
   srcReg32b4 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)));
   srcReg32b5 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)));
   srcReg32b6 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)));
   srcReg32b7 = _mm256_castsi128_si256(
-               _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)));
+               _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
 
   // have each consecutive loads on the same 256 register
   srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
@@ -393,11 +400,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
      // load the last 2 loads of 16 bytes and have every two
      // consecutive loads in the same 256 bit register
      srcReg32b8 = _mm256_castsi128_si256(
-     _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)));
+     _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)));
      srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
      _mm256_castsi256_si128(srcReg32b8), 1);
      srcReg32b9 = _mm256_castsi128_si256(
-     _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8)));
+     _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8)));
      srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
      _mm256_castsi256_si128(srcReg32b9), 1);
 
@@ -476,7 +483,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
     __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5;
     __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8;
     // load the last 16 bytes
-    srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
+    srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
 
     // merge the last 2 results together
     srcRegFilt4 = _mm_unpacklo_epi8(
@@ -542,3 +549,54 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
     _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
   }
 }
+
+#if HAVE_AVX2 && HAVE_SSSE3
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+#if ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
+#else  // ARCH_X86
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+#endif  // ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
+#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
+#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
+#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
+#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
+#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
+#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
+// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                                uint8_t *dst, ptrdiff_t dst_stride,
+//                                const int16_t *filter_x, int x_step_q4,
+//                                const int16_t *filter_y, int y_step_q4,
+//                                int w, int h);
+// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                               uint8_t *dst, ptrdiff_t dst_stride,
+//                               const int16_t *filter_x, int x_step_q4,
+//                               const int16_t *filter_y, int y_step_q4,
+//                               int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
+
+// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+//                          uint8_t *dst, ptrdiff_t dst_stride,
+//                          const int16_t *filter_x, int x_step_q4,
+//                          const int16_t *filter_y, int y_step_q4,
+//                          int w, int h);
+FUN_CONV_2D(, avx2);
+#endif  // HAVE_AX2 && HAVE_SSSE3
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
index 71dbb402dd4..5fd2857e140 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
@@ -8,7 +8,14 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+// Due to a header conflict between math.h and intrinsics includes with ceil()
+// in certain configurations under vs9 this include needs to precede
+// tmmintrin.h.
+#include "./vp9_rtcd.h"
+
 #include <tmmintrin.h>
+
+#include "vp9/common/x86/convolve.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/emmintrin_compat.h"
 
@@ -38,12 +45,17 @@ DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
   6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
 };
 
-void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
-                                         unsigned int src_pixels_per_line,
-                                         unsigned char *output_ptr,
-                                         unsigned int output_pitch,
-                                         unsigned int output_height,
-                                         int16_t *filter) {
+// These are reused by the avx2 intrinsics.
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+
+void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
+                                         ptrdiff_t src_pixels_per_line,
+                                         uint8_t *output_ptr,
+                                         ptrdiff_t output_pitch,
+                                         uint32_t output_height,
+                                         const int16_t *filter) {
   __m128i firstFilters, secondFilters, shuffle1, shuffle2;
   __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
   __m128i addFilterReg64, filtersReg, srcReg, minReg;
@@ -51,7 +63,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 =_mm_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -72,7 +84,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
   shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8);
 
   for (i = 0; i < output_height; i++) {
-    srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+    srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
 
     // filter the source buffer
     srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1);
@@ -109,12 +121,12 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
   }
 }
 
-void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
-                                         unsigned int src_pixels_per_line,
-                                         unsigned char *output_ptr,
-                                         unsigned int output_pitch,
-                                         unsigned int output_height,
-                                         int16_t *filter) {
+void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
+                                         ptrdiff_t src_pixels_per_line,
+                                         uint8_t *output_ptr,
+                                         ptrdiff_t output_pitch,
+                                         uint32_t output_height,
+                                         const int16_t *filter) {
   __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
   __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
   __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
@@ -123,7 +135,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -147,7 +159,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
   filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
 
   for (i = 0; i < output_height; i++) {
-    srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+    srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
 
     // filter the source buffer
     srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg);
@@ -189,12 +201,12 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
   }
 }
 
-void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
-                                          unsigned int src_pixels_per_line,
-                                          unsigned char *output_ptr,
-                                          unsigned int output_pitch,
-                                          unsigned int output_height,
-                                          int16_t *filter) {
+static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
+                                                 ptrdiff_t src_pixels_per_line,
+                                                 uint8_t *output_ptr,
+                                                 ptrdiff_t output_pitch,
+                                                 uint32_t output_height,
+                                                 const int16_t *filter) {
   __m128i addFilterReg64, filtersReg, srcReg1, srcReg2;
   __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
   __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
@@ -203,7 +215,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -227,7 +239,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
   filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
 
   for (i = 0; i < output_height; i++) {
-    srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+    srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
 
     // filter the source buffer
     srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
@@ -254,7 +266,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
 
     // reading the next 16 bytes.
     // (part of it was being read by earlier read)
-    srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+    srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
 
     // add and saturate the results together
     srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
@@ -306,12 +318,12 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
   }
 }
 
-void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
-                                         unsigned int src_pitch,
-                                         unsigned char *output_ptr,
-                                         unsigned int out_pitch,
-                                         unsigned int output_height,
-                                         int16_t *filter) {
+void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
+                                         ptrdiff_t src_pitch,
+                                         uint8_t *output_ptr,
+                                         ptrdiff_t out_pitch,
+                                         uint32_t output_height,
+                                         const int16_t *filter) {
   __m128i addFilterReg64, filtersReg, minReg;
   __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
   __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5;
@@ -321,7 +333,7 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -336,17 +348,17 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
   forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
 
   // load the first 7 rows of 8 bytes
-  srcReg1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]);
-  srcReg2 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch)[0]);
-  srcReg3 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 2)[0]);
-  srcReg4 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 3)[0]);
-  srcReg5 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 4)[0]);
-  srcReg6 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 5)[0]);
-  srcReg7 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 6)[0]);
+  srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr);
+  srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch));
+  srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2));
+  srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3));
+  srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4));
+  srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5));
+  srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6));
 
   for (i = 0; i < output_height; i++) {
     // load the last 8 bytes
-    srcReg8 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 7)[0]);
+    srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
 
     // merge the result together
     srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2);
@@ -394,12 +406,12 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
   }
 }
 
-void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
-                                          unsigned int src_pitch,
-                                          unsigned char *output_ptr,
-                                          unsigned int out_pitch,
-                                          unsigned int output_height,
-                                          int16_t *filter) {
+static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
+                                                 ptrdiff_t src_pitch,
+                                                 uint8_t *output_ptr,
+                                                 ptrdiff_t out_pitch,
+                                                 uint32_t output_height,
+                                                 const int16_t *filter) {
   __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3;
   __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
   __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
@@ -409,7 +421,7 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
 
   // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
   addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-  filtersReg = _mm_loadu_si128((__m128i *)filter);
+  filtersReg = _mm_loadu_si128((const __m128i *)filter);
   // converting the 16 bit (short) to  8 bit (byte) and have the same data
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -424,17 +436,17 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
   forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
 
   // load the first 7 rows of 16 bytes
-  srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr));
-  srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch));
-  srcReg3 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 2));
-  srcReg4 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 3));
-  srcReg5 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 4));
-  srcReg6 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 5));
-  srcReg7 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 6));
+  srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
+  srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+  srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+  srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+  srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
+  srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
+  srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
 
   for (i = 0; i < output_height; i++) {
     // load the last 16 bytes
-    srcReg8 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 7));
+    srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
 
     // merge the result together
     srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2);
@@ -508,3 +520,82 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
     output_ptr+=out_pitch;
   }
 }
+
+#if ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
+#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
+#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
+#else  // ARCH_X86
+filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#endif  // ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
+
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
+
+// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                uint8_t *dst, ptrdiff_t dst_stride,
+//                                const int16_t *filter_x, int x_step_q4,
+//                                const int16_t *filter_y, int y_step_q4,
+//                                int w, int h);
+// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                               uint8_t *dst, ptrdiff_t dst_stride,
+//                               const int16_t *filter_x, int x_step_q4,
+//                               const int16_t *filter_y, int y_step_q4,
+//                               int w, int h);
+// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                    uint8_t *dst, ptrdiff_t dst_stride,
+//                                    const int16_t *filter_x, int x_step_q4,
+//                                    const int16_t *filter_y, int y_step_q4,
+//                                    int w, int h);
+// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                                   uint8_t *dst, ptrdiff_t dst_stride,
+//                                   const int16_t *filter_x, int x_step_q4,
+//                                   const int16_t *filter_y, int y_step_q4,
+//                                   int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+            ssse3);
+
+// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                          uint8_t *dst, ptrdiff_t dst_stride,
+//                          const int16_t *filter_x, int x_step_q4,
+//                          const int16_t *filter_y, int y_step_q4,
+//                          int w, int h);
+// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+//                              uint8_t *dst, ptrdiff_t dst_stride,
+//                              const int16_t *filter_x, int x_step_q4,
+//                              const int16_t *filter_y, int y_step_q4,
+//                              int w, int h);
+FUN_CONV_2D(, ssse3);
+FUN_CONV_2D(avg_ , ssse3);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
index eb9b7971074..9311d8dad7d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -15,6 +15,7 @@
 #include "./vpx_scale_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_scale/vpx_scale.h"
 
@@ -289,9 +290,7 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
 }
 
 struct intra_args {
-  VP9_COMMON *cm;
   MACROBLOCKD *xd;
-  FRAME_COUNTS *counts;
   vp9_reader *r;
   int seg_id;
 };
@@ -300,7 +299,6 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
                                                 BLOCK_SIZE plane_bsize,
                                                 TX_SIZE tx_size, void *arg) {
   struct intra_args *const args = (struct intra_args *)arg;
-  VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
   MODE_INFO *const mi = xd->mi[0];
@@ -317,7 +315,7 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
                           x, y, plane);
 
   if (!mi->mbmi.skip) {
-    const int eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block,
+    const int eob = vp9_decode_block_tokens(xd, plane, block,
                                             plane_bsize, x, y, tx_size,
                                             args->r, args->seg_id);
     inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
@@ -326,10 +324,8 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
 }
 
 struct inter_args {
-  VP9_COMMON *cm;
   MACROBLOCKD *xd;
   vp9_reader *r;
-  FRAME_COUNTS *counts;
   int *eobtotal;
   int seg_id;
 };
@@ -338,12 +334,11 @@ static void reconstruct_inter_block(int plane, int block,
                                     BLOCK_SIZE plane_bsize,
                                     TX_SIZE tx_size, void *arg) {
   struct inter_args *args = (struct inter_args *)arg;
-  VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int x, y, eob;
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
-  eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block, plane_bsize,
+  eob = vp9_decode_block_tokens(xd, plane, block, plane_bsize,
                                 x, y, tx_size, args->r, args->seg_id);
   inverse_transform_block(xd, plane, block, tx_size,
                           &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
@@ -351,6 +346,357 @@ static void reconstruct_inter_block(int plane, int block,
   *args->eobtotal += eob;
 }
 
+static void build_mc_border(const uint8_t *src, int src_stride,
+                            uint8_t *dst, int dst_stride,
+                            int x, int y, int b_w, int b_h, int w, int h) {
+  // Get a pointer to the start of the real data for this row.
+  const uint8_t *ref_row = src - x - y * src_stride;
+
+  if (y >= h)
+    ref_row += (h - 1) * src_stride;
+  else if (y > 0)
+    ref_row += y * src_stride;
+
+  do {
+    int right = 0, copy;
+    int left = x < 0 ? -x : 0;
+
+    if (left > b_w)
+      left = b_w;
+
+    if (x + b_w > w)
+      right = x + b_w - w;
+
+    if (right > b_w)
+      right = b_w;
+
+    copy = b_w - left - right;
+
+    if (left)
+      memset(dst, ref_row[0], left);
+
+    if (copy)
+      memcpy(dst + left, ref_row + x + left, copy);
+
+    if (right)
+      memset(dst + left + copy, ref_row[w - 1], right);
+
+    dst += dst_stride;
+    ++y;
+
+    if (y > 0 && y < h)
+      ref_row += src_stride;
+  } while (--b_h);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void high_build_mc_border(const uint8_t *src8, int src_stride,
+                                 uint16_t *dst, int dst_stride,
+                                 int x, int y, int b_w, int b_h,
+                                 int w, int h) {
+  // Get a pointer to the start of the real data for this row.
+  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  const uint16_t *ref_row = src - x - y * src_stride;
+
+  if (y >= h)
+    ref_row += (h - 1) * src_stride;
+  else if (y > 0)
+    ref_row += y * src_stride;
+
+  do {
+    int right = 0, copy;
+    int left = x < 0 ? -x : 0;
+
+    if (left > b_w)
+      left = b_w;
+
+    if (x + b_w > w)
+      right = x + b_w - w;
+
+    if (right > b_w)
+      right = b_w;
+
+    copy = b_w - left - right;
+
+    if (left)
+      vpx_memset16(dst, ref_row[0], left);
+
+    if (copy)
+      memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
+
+    if (right)
+      vpx_memset16(dst + left + copy, ref_row[w - 1], right);
+
+    dst += dst_stride;
+    ++y;
+
+    if (y > 0 && y < h)
+      ref_row += src_stride;
+  } while (--b_h);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
+                               int x0, int y0, int b_w, int b_h,
+                               int frame_width, int frame_height,
+                               int border_offset,
+                               uint8_t *const dst, int dst_buf_stride,
+                               int subpel_x, int subpel_y,
+                               const InterpKernel *kernel,
+                               const struct scale_factors *sf,
+                               MACROBLOCKD *xd,
+                               int w, int h, int ref, int xs, int ys) {
+  DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
+  const uint8_t *buf_ptr;
+
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w,
+                         x0, y0, b_w, b_h, frame_width, frame_height);
+    buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset;
+  } else {
+    build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w,
+                    x0, y0, b_w, b_h, frame_width, frame_height);
+    buf_ptr = ((uint8_t *)mc_buf_high) + border_offset;
+  }
+
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+                         subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+  } else {
+    inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+                    subpel_y, sf, w, h, ref, kernel, xs, ys);
+  }
+}
+#else
+static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
+                               int x0, int y0, int b_w, int b_h,
+                               int frame_width, int frame_height,
+                               int border_offset,
+                               uint8_t *const dst, int dst_buf_stride,
+                               int subpel_x, int subpel_y,
+                               const InterpKernel *kernel,
+                               const struct scale_factors *sf,
+                               int w, int h, int ref, int xs, int ys) {
+  DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+  const uint8_t *buf_ptr;
+
+  build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
+                  x0, y0, b_w, b_h, frame_width, frame_height);
+  buf_ptr = mc_buf + border_offset;
+
+  inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+                  subpel_y, sf, w, h, ref, kernel, xs, ys);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
+                                       int plane, int bw, int bh, int x,
+                                       int y, int w, int h, int mi_x, int mi_y,
+                                       const InterpKernel *kernel,
+                                       const struct scale_factors *sf,
+                                       struct buf_2d *pre_buf,
+                                       struct buf_2d *dst_buf, const MV* mv,
+                                       RefCntBuffer *ref_frame_buf,
+                                       int is_scaled, int ref) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+  MV32 scaled_mv;
+  int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
+      buf_stride, subpel_x, subpel_y;
+  uint8_t *ref_frame, *buf_ptr;
+
+  // Get reference frame pointer, width and height.
+  if (plane == 0) {
+    frame_width = ref_frame_buf->buf.y_crop_width;
+    frame_height = ref_frame_buf->buf.y_crop_height;
+    ref_frame = ref_frame_buf->buf.y_buffer;
+  } else {
+    frame_width = ref_frame_buf->buf.uv_crop_width;
+    frame_height = ref_frame_buf->buf.uv_crop_height;
+    ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
+                         : ref_frame_buf->buf.v_buffer;
+  }
+
+  if (is_scaled) {
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
+                                               pd->subsampling_x,
+                                               pd->subsampling_y);
+    // Co-ordinate of containing block to pixel precision.
+    int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+    int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+    // Co-ordinate of the block to 1/16th pixel precision.
+    x0_16 = (x_start + x) << SUBPEL_BITS;
+    y0_16 = (y_start + y) << SUBPEL_BITS;
+
+    // Co-ordinate of current block in reference frame
+    // to 1/16th pixel precision.
+    x0_16 = sf->scale_value_x(x0_16, sf);
+    y0_16 = sf->scale_value_y(y0_16, sf);
+
+    // Map the top left corner of the block into the reference frame.
+    x0 = sf->scale_value_x(x_start + x, sf);
+    y0 = sf->scale_value_y(y_start + y, sf);
+
+    // Scale the MV and incorporate the sub-pixel offset of the block
+    // in the reference frame.
+    scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+    xs = sf->x_step_q4;
+    ys = sf->y_step_q4;
+  } else {
+    // Co-ordinate of containing block to pixel precision.
+    x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+    y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+    // Co-ordinate of the block to 1/16th pixel precision.
+    x0_16 = x0 << SUBPEL_BITS;
+    y0_16 = y0 << SUBPEL_BITS;
+
+    scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
+    scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
+    xs = ys = 16;
+  }
+  subpel_x = scaled_mv.col & SUBPEL_MASK;
+  subpel_y = scaled_mv.row & SUBPEL_MASK;
+
+  // Calculate the top left corner of the best matching block in the
+  // reference frame.
+  x0 += scaled_mv.col >> SUBPEL_BITS;
+  y0 += scaled_mv.row >> SUBPEL_BITS;
+  x0_16 += scaled_mv.col;
+  y0_16 += scaled_mv.row;
+
+  // Get reference block pointer.
+  buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
+  buf_stride = pre_buf->stride;
+
+  // Do border extension if there is motion or the
+  // width/height is not a multiple of 8 pixels.
+  if (is_scaled || scaled_mv.col || scaled_mv.row ||
+      (frame_width & 0x7) || (frame_height & 0x7)) {
+    int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
+
+    // Get reference block bottom right horizontal coordinate.
+    int x1 = (x0_16 + (w - 1) * xs) >> SUBPEL_BITS;
+    int x_pad = 0, y_pad = 0;
+
+    if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+      x0 -= VP9_INTERP_EXTEND - 1;
+      x1 += VP9_INTERP_EXTEND;
+      x_pad = 1;
+    }
+
+    if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+      y0 -= VP9_INTERP_EXTEND - 1;
+      y1 += VP9_INTERP_EXTEND;
+      y_pad = 1;
+    }
+
+    // Wait until reference block is ready. Pad 7 more pixels as last 7
+    // pixels of each superblock row can be changed by next superblock row.
+    if (pbi->frame_parallel_decode)
+      vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
+                           MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+
+    // Skip border extension if block is inside the frame.
+    if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
+        y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
+      // Extend the border.
+      const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0;
+      const int b_w = x1 - x0 + 1;
+      const int b_h = y1 - y0 + 1;
+      const int border_offset = y_pad * 3 * b_w + x_pad * 3;
+
+      extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
+                         frame_width, frame_height, border_offset,
+                         dst, dst_buf->stride,
+                         subpel_x, subpel_y,
+                         kernel, sf,
+#if CONFIG_VP9_HIGHBITDEPTH
+                         xd,
+#endif
+                         w, h, ref, xs, ys);
+      return;
+    }
+  } else {
+    // Wait until reference block is ready. Pad 7 more pixels as last 7
+    // pixels of each superblock row can be changed by next superblock row.
+     if (pbi->frame_parallel_decode) {
+       const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
+       vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
+                            MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+     }
+  }
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+                         subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+  } else {
+    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+                    subpel_y, sf, w, h, ref, kernel, xs, ys);
+  }
+#else
+  inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+                  subpel_y, sf, w, h, ref, kernel, xs, ys);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+
+static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
+                                          MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          BLOCK_SIZE bsize) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  const MODE_INFO *mi = xd->mi[0];
+  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
+  const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
+  const int is_compound = has_second_ref(&mi->mbmi);
+
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    struct macroblockd_plane *const pd = &xd->plane[plane];
+    struct buf_2d *const dst_buf = &pd->dst;
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+    int ref;
+
+    for (ref = 0; ref < 1 + is_compound; ++ref) {
+      const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+      struct buf_2d *const pre_buf = &pd->pre[ref];
+      const int idx = xd->block_refs[ref]->idx;
+      BufferPool *const pool = pbi->common.buffer_pool;
+      RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+      const int is_scaled = vp9_is_scaled(sf);
+
+      if (sb_type < BLOCK_8X8) {
+        int i = 0, x, y;
+        assert(bsize == BLOCK_8X8);
+        for (y = 0; y < num_4x4_h; ++y) {
+          for (x = 0; x < num_4x4_w; ++x) {
+            const MV mv = average_split_mvs(pd, mi, ref, i++);
+            dec_build_inter_predictors(pbi, xd, plane, bw, bh,
+                                       4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
+                                       sf, pre_buf, dst_buf, &mv,
+                                       ref_frame_buf, is_scaled, ref);
+          }
+        }
+      } else {
+        const MV mv = mi->mbmi.mv[ref].as_mv;
+        dec_build_inter_predictors(pbi, xd, plane, bw, bh,
+                                   0, 0, bw, bh, mi_x, mi_y, kernel,
+                                   sf, pre_buf, dst_buf, &mv, ref_frame_buf,
+                                   is_scaled, ref);
+      }
+    }
+  }
+}
+
 static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                                  const TileInfo *const tile,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col) {
@@ -380,14 +726,22 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 }
 
 static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
-                         FRAME_COUNTS *counts,
                          const TileInfo *const tile,
                          int mi_row, int mi_col,
                          vp9_reader *r, BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &pbi->common;
   const int less8x8 = bsize < BLOCK_8X8;
   MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
-  vp9_read_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r);
+
+  if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+    const BLOCK_SIZE uv_subsize =
+        ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+    if (uv_subsize == BLOCK_INVALID)
+      vpx_internal_error(xd->error_info,
+                         VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
+  }
+
+  vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r);
 
   if (less8x8)
     bsize = BLOCK_8X8;
@@ -397,17 +751,17 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
   }
 
   if (!is_inter_block(mbmi)) {
-    struct intra_args arg = {cm, xd, counts, r, mbmi->segment_id};
+    struct intra_args arg = {xd, r, mbmi->segment_id};
     vp9_foreach_transformed_block(xd, bsize,
                                   predict_and_reconstruct_intra_block, &arg);
   } else {
     // Prediction
-    vp9_dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
+    dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
 
     // Reconstruction
     if (!mbmi->skip) {
       int eobtotal = 0;
-      struct inter_args arg = {cm, xd, r, counts, &eobtotal, mbmi->segment_id};
+      struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id};
       vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
       if (!less8x8 && eobtotal == 0)
         mbmi->skip = 1;  // skip loopfilter
@@ -417,14 +771,12 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
   xd->corrupted |= vp9_reader_has_error(r);
 }
 
-static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
-                                     FRAME_COUNTS *counts, int hbs,
-                                     int mi_row, int mi_col, BLOCK_SIZE bsize,
-                                     vp9_reader *r) {
+static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize, vp9_reader *r,
+                                     int has_rows, int has_cols) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  const vp9_prob *const probs = get_partition_probs(cm, ctx);
-  const int has_rows = (mi_row + hbs) < cm->mi_rows;
-  const int has_cols = (mi_col + hbs) < cm->mi_cols;
+  const vp9_prob *const probs = get_partition_probs(xd, ctx);
+  FRAME_COUNTS *counts = xd->counts;
   PARTITION_TYPE p;
 
   if (has_rows && has_cols)
@@ -436,56 +788,50 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
   else
     p = PARTITION_SPLIT;
 
-  if (!cm->frame_parallel_decoding_mode)
+  if (counts)
     ++counts->partition[ctx][p];
 
   return p;
 }
 
 static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
-                             FRAME_COUNTS *counts,
                              const TileInfo *const tile,
                              int mi_row, int mi_col,
                              vp9_reader* r, BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &pbi->common;
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition;
-  BLOCK_SIZE subsize, uv_subsize;
+  BLOCK_SIZE subsize;
+  const int has_rows = (mi_row + hbs) < cm->mi_rows;
+  const int has_cols = (mi_col + hbs) < cm->mi_cols;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  partition = read_partition(cm, xd, counts, hbs, mi_row, mi_col, bsize, r);
+  partition = read_partition(xd, mi_row, mi_col, bsize, r, has_rows, has_cols);
   subsize = get_subsize(bsize, partition);
-  uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y];
-  if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID)
-    vpx_internal_error(xd->error_info,
-                       VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
-  if (subsize < BLOCK_8X8) {
-    decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
+  if (bsize == BLOCK_8X8) {
+    decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
   } else {
     switch (partition) {
       case PARTITION_NONE:
-        decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
+        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
         break;
       case PARTITION_HORZ:
-        decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
-        if (mi_row + hbs < cm->mi_rows)
-          decode_block(pbi, xd, counts, tile, mi_row + hbs, mi_col, r, subsize);
+        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        if (has_rows)
+          decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
         break;
       case PARTITION_VERT:
-        decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
-        if (mi_col + hbs < cm->mi_cols)
-          decode_block(pbi, xd, counts, tile, mi_row, mi_col + hbs, r, subsize);
+        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        if (has_cols)
+          decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
         break;
       case PARTITION_SPLIT:
-        decode_partition(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
-        decode_partition(pbi, xd, counts, tile, mi_row, mi_col + hbs, r,
-                         subsize);
-        decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col, r,
-                         subsize);
-        decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col + hbs, r,
-                         subsize);
+        decode_partition(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        decode_partition(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
+        decode_partition(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
+        decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
         break;
       default:
         assert(0 && "Invalid partition type");
@@ -673,12 +1019,6 @@ static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
                              : literal_to_filter[vp9_rb_read_literal(rb, 2)];
 }
 
-void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
-                         int *width, int *height) {
-  *width = vp9_rb_read_literal(rb, 16) + 1;
-  *height = vp9_rb_read_literal(rb, 16) + 1;
-}
-
 static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
   cm->display_width = cm->width;
   cm->display_height = cm->height;
@@ -698,7 +1038,8 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Width and height beyond allowed size.");
+                       "Dimensions of %dx%d beyond allowed size of %dx%d.",
+                       width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
 #endif
   if (cm->width != width || cm->height != height) {
     const int new_mi_rows =
@@ -929,7 +1270,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
   int mi_row, mi_col;
   TileData *tile_data = NULL;
 
-  if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
+  if (cm->lf.filter_level && !cm->skip_loop_filter &&
+      pbi->lf_worker.data1 == NULL) {
     CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
                     vpx_memalign(32, sizeof(LFWorkerData)));
     pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
@@ -939,7 +1281,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
     }
   }
 
-  if (cm->lf.filter_level) {
+  if (cm->lf.filter_level && !cm->skip_loop_filter) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     // Be sure to sync as we might be resuming after a failed frame decode.
     winterface->sync(&pbi->lf_worker);
@@ -979,6 +1321,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
       tile_data->cm = cm;
       tile_data->xd = pbi->mb;
       tile_data->xd.corrupted = 0;
+      tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
+                             NULL : &cm->counts;
       vp9_tile_init(&tile, tile_data->cm, tile_row, tile_col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader, pbi->decrypt_cb,
@@ -1001,7 +1345,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
         vp9_zero(tile_data->xd.left_seg_context);
         for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
              mi_col += MI_BLOCK_SIZE) {
-          decode_partition(pbi, &tile_data->xd, &cm->counts, &tile, mi_row,
+          decode_partition(pbi, &tile_data->xd, &tile, mi_row,
                            mi_col, &tile_data->bit_reader, BLOCK_64X64);
         }
         pbi->mb.corrupted |= tile_data->xd.corrupted;
@@ -1010,7 +1354,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
                                "Failed to decode tile data");
       }
       // Loopfilter one row.
-      if (cm->lf.filter_level) {
+      if (cm->lf.filter_level && !cm->skip_loop_filter) {
         const int lf_start = mi_row - MI_BLOCK_SIZE;
         LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
@@ -1039,7 +1383,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
   }
 
   // Loopfilter remaining rows in the frame.
-  if (cm->lf.filter_level) {
+  if (cm->lf.filter_level && !cm->skip_loop_filter) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     winterface->sync(&pbi->lf_worker);
     lf_data->start = lf_data->stop;
@@ -1074,7 +1418,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
     vp9_zero(tile_data->xd.left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE) {
-      decode_partition(tile_data->pbi, &tile_data->xd, &tile_data->counts,
+      decode_partition(tile_data->pbi, &tile_data->xd,
                        tile, mi_row, mi_col, &tile_data->bit_reader,
                        BLOCK_64X64);
     }
@@ -1112,8 +1456,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
   if (pbi->num_tile_workers == 0) {
     const int num_threads = pbi->max_threads & ~1;
     int i;
-    // TODO(jzern): Allocate one less worker, as in the current code we only
-    // use num_threads - 1 workers.
     CHECK_MEM_ERROR(cm, pbi->tile_workers,
                     vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
     // Ensure tile data offsets will be properly aligned. This may fail on
@@ -1198,6 +1540,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
       tile_data->pbi = pbi;
       tile_data->xd = pbi->mb;
       tile_data->xd.corrupted = 0;
+      tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
+                             0 : &tile_data->counts;
       vp9_tile_init(tile, cm, 0, buf->col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader, pbi->decrypt_cb,
@@ -1251,20 +1595,6 @@ static void error_handler(void *data) {
   vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
 }
 
-int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
-  return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
-         vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
-         vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
-}
-
-BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
-  int profile = vp9_rb_read_bit(rb);
-  profile |= vp9_rb_read_bit(rb) << 1;
-  if (profile > 2)
-    profile += vp9_rb_read_bit(rb);
-  return (BITSTREAM_PROFILE) profile;
-}
-
 static void read_bitdepth_colorspace_sampling(
     VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
   if (cm->profile >= PROFILE_2) {
@@ -1311,12 +1641,13 @@ static void read_bitdepth_colorspace_sampling(
 static size_t read_uncompressed_header(VP9Decoder *pbi,
                                        struct vp9_read_bit_buffer *rb) {
   VP9_COMMON *const cm = &pbi->common;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-  BufferPool *const pool = pbi->common.buffer_pool;
+  BufferPool *const pool = cm->buffer_pool;
+  RefCntBuffer *const frame_bufs = pool->frame_bufs;
   int i, mask, ref_index = 0;
   size_t sz;
 
   cm->last_frame_type = cm->frame_type;
+  cm->last_intra_only = cm->intra_only;
 
   if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
       vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
@@ -1593,12 +1924,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
 }
 #endif  // NDEBUG
 
-static struct vp9_read_bit_buffer* init_read_bit_buffer(
+static struct vp9_read_bit_buffer *init_read_bit_buffer(
     VP9Decoder *pbi,
     struct vp9_read_bit_buffer *rb,
     const uint8_t *data,
     const uint8_t *data_end,
-    uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
+    uint8_t clear_data[MAX_VP9_HEADER_SIZE]) {
   rb->bit_offset = 0;
   rb->error_handler = error_handler;
   rb->error_handler_data = &pbi->common;
@@ -1614,12 +1945,34 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer(
   return rb;
 }
 
+//------------------------------------------------------------------------------
+
+int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
+  return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
+         vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
+         vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
+}
+
+void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+                         int *width, int *height) {
+  *width = vp9_rb_read_literal(rb, 16) + 1;
+  *height = vp9_rb_read_literal(rb, 16) + 1;
+}
+
+BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
+  int profile = vp9_rb_read_bit(rb);
+  profile |= vp9_rb_read_bit(rb) << 1;
+  if (profile > 2)
+    profile += vp9_rb_read_bit(rb);
+  return (BITSTREAM_PROFILE) profile;
+}
+
 void vp9_decode_frame(VP9Decoder *pbi,
                       const uint8_t *data, const uint8_t *data_end,
                       const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
-  struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
+  struct vp9_read_bit_buffer rb;
   int context_updated = 0;
   uint8_t clear_data[MAX_VP9_HEADER_SIZE];
   const size_t first_partition_size = read_uncompressed_header(pbi,
@@ -1643,8 +1996,9 @@ void vp9_decode_frame(VP9Decoder *pbi,
   cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
                            cm->width == cm->last_width &&
                            cm->height == cm->last_height &&
-                           !cm->intra_only &&
-                           cm->last_show_frame;
+                           !cm->last_intra_only &&
+                           cm->last_show_frame &&
+                           (cm->last_frame_type != KEY_FRAME);
 
   vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
 
@@ -1661,7 +2015,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Decode failed. Frame data header is corrupted.");
 
-  if (cm->lf.filter_level) {
+  if (cm->lf.filter_level && !cm->skip_loop_filter) {
     vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
   }
 
@@ -1687,11 +2041,13 @@ void vp9_decode_frame(VP9Decoder *pbi,
     // Multi-threaded tile decoder
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
     if (!xd->corrupted) {
-      // If multiple threads are used to decode tiles, then we use those threads
-      // to do parallel loopfiltering.
-      vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
-                               0, 0, pbi->tile_workers, pbi->num_tile_workers,
-                               &pbi->lf_row_sync);
+      if (!cm->skip_loop_filter) {
+        // If multiple threads are used to decode tiles, then we use those
+        // threads to do parallel loopfiltering.
+        vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+                                 cm->lf.filter_level, 0, 0, pbi->tile_workers,
+                                 pbi->num_tile_workers, &pbi->lf_row_sync);
+      }
     } else {
       vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                          "Decode failed. Frame data is corrupted.");
@@ -1721,327 +2077,3 @@ void vp9_decode_frame(VP9Decoder *pbi,
   if (cm->refresh_frame_context && !context_updated)
     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
 }
-
-static void build_mc_border(const uint8_t *src, int src_stride,
-                            uint8_t *dst, int dst_stride,
-                            int x, int y, int b_w, int b_h, int w, int h) {
-  // Get a pointer to the start of the real data for this row.
-  const uint8_t *ref_row = src - x - y * src_stride;
-
-  if (y >= h)
-    ref_row += (h - 1) * src_stride;
-  else if (y > 0)
-    ref_row += y * src_stride;
-
-  do {
-    int right = 0, copy;
-    int left = x < 0 ? -x : 0;
-
-    if (left > b_w)
-      left = b_w;
-
-    if (x + b_w > w)
-      right = x + b_w - w;
-
-    if (right > b_w)
-      right = b_w;
-
-    copy = b_w - left - right;
-
-    if (left)
-      memset(dst, ref_row[0], left);
-
-    if (copy)
-      memcpy(dst + left, ref_row + x + left, copy);
-
-    if (right)
-      memset(dst + left + copy, ref_row[w - 1], right);
-
-    dst += dst_stride;
-    ++y;
-
-    if (y > 0 && y < h)
-      ref_row += src_stride;
-  } while (--b_h);
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void high_build_mc_border(const uint8_t *src8, int src_stride,
-                                 uint16_t *dst, int dst_stride,
-                                 int x, int y, int b_w, int b_h,
-                                 int w, int h) {
-  // Get a pointer to the start of the real data for this row.
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  const uint16_t *ref_row = src - x - y * src_stride;
-
-  if (y >= h)
-    ref_row += (h - 1) * src_stride;
-  else if (y > 0)
-    ref_row += y * src_stride;
-
-  do {
-    int right = 0, copy;
-    int left = x < 0 ? -x : 0;
-
-    if (left > b_w)
-      left = b_w;
-
-    if (x + b_w > w)
-      right = x + b_w - w;
-
-    if (right > b_w)
-      right = b_w;
-
-    copy = b_w - left - right;
-
-    if (left)
-      vpx_memset16(dst, ref_row[0], left);
-
-    if (copy)
-      memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
-
-    if (right)
-      vpx_memset16(dst + left + copy, ref_row[w - 1], right);
-
-    dst += dst_stride;
-    ++y;
-
-    if (y > 0 && y < h)
-      ref_row += src_stride;
-  } while (--b_h);
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                                int plane, int bw, int bh, int x,
-                                int y, int w, int h, int mi_x, int mi_y,
-                                const InterpKernel *kernel,
-                                const struct scale_factors *sf,
-                                struct buf_2d *pre_buf, struct buf_2d *dst_buf,
-                                const MV* mv, RefCntBuffer *ref_frame_buf,
-                                int is_scaled, int ref) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-  MV32 scaled_mv;
-  int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
-      buf_stride, subpel_x, subpel_y;
-  uint8_t *ref_frame, *buf_ptr;
-
-  // Get reference frame pointer, width and height.
-  if (plane == 0) {
-    frame_width = ref_frame_buf->buf.y_crop_width;
-    frame_height = ref_frame_buf->buf.y_crop_height;
-    ref_frame = ref_frame_buf->buf.y_buffer;
-  } else {
-    frame_width = ref_frame_buf->buf.uv_crop_width;
-    frame_height = ref_frame_buf->buf.uv_crop_height;
-    ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
-                         : ref_frame_buf->buf.v_buffer;
-  }
-
-  if (is_scaled) {
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
-                                               pd->subsampling_x,
-                                               pd->subsampling_y);
-    // Co-ordinate of containing block to pixel precision.
-    int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
-    int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
-    // Co-ordinate of the block to 1/16th pixel precision.
-    x0_16 = (x_start + x) << SUBPEL_BITS;
-    y0_16 = (y_start + y) << SUBPEL_BITS;
-
-    // Co-ordinate of current block in reference frame
-    // to 1/16th pixel precision.
-    x0_16 = sf->scale_value_x(x0_16, sf);
-    y0_16 = sf->scale_value_y(y0_16, sf);
-
-    // Map the top left corner of the block into the reference frame.
-    x0 = sf->scale_value_x(x_start + x, sf);
-    y0 = sf->scale_value_y(y_start + y, sf);
-
-    // Scale the MV and incorporate the sub-pixel offset of the block
-    // in the reference frame.
-    scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
-    xs = sf->x_step_q4;
-    ys = sf->y_step_q4;
-  } else {
-    // Co-ordinate of containing block to pixel precision.
-    x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-    y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
-    // Co-ordinate of the block to 1/16th pixel precision.
-    x0_16 = x0 << SUBPEL_BITS;
-    y0_16 = y0 << SUBPEL_BITS;
-
-    scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
-    scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
-    xs = ys = 16;
-  }
-  subpel_x = scaled_mv.col & SUBPEL_MASK;
-  subpel_y = scaled_mv.row & SUBPEL_MASK;
-
-  // Calculate the top left corner of the best matching block in the
-  // reference frame.
-  x0 += scaled_mv.col >> SUBPEL_BITS;
-  y0 += scaled_mv.row >> SUBPEL_BITS;
-  x0_16 += scaled_mv.col;
-  y0_16 += scaled_mv.row;
-
-  // Get reference block pointer.
-  buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
-  buf_stride = pre_buf->stride;
-
-  // Do border extension if there is motion or the
-  // width/height is not a multiple of 8 pixels.
-  if (is_scaled || scaled_mv.col || scaled_mv.row ||
-      (frame_width & 0x7) || (frame_height & 0x7)) {
-    int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
-
-    // Get reference block bottom right horizontal coordinate.
-    int x1 = (x0_16 + (w - 1) * xs) >> SUBPEL_BITS;
-    int x_pad = 0, y_pad = 0;
-
-    if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
-      x0 -= VP9_INTERP_EXTEND - 1;
-      x1 += VP9_INTERP_EXTEND;
-      x_pad = 1;
-    }
-
-    if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
-      y0 -= VP9_INTERP_EXTEND - 1;
-      y1 += VP9_INTERP_EXTEND;
-      y_pad = 1;
-    }
-
-    // Wait until reference block is ready. Pad 7 more pixels as last 7
-    // pixels of each superblock row can be changed by next superblock row.
-    if (pbi->frame_parallel_decode)
-      vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
-                           MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
-
-    // Skip border extension if block is inside the frame.
-    if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
-        y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
-      uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
-      // Extend the border.
-#if CONFIG_VP9_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        high_build_mc_border(buf_ptr1,
-                             pre_buf->stride,
-                             xd->mc_buf_high,
-                             x1 - x0 + 1,
-                             x0,
-                             y0,
-                             x1 - x0 + 1,
-                             y1 - y0 + 1,
-                             frame_width,
-                             frame_height);
-        buf_stride = x1 - x0 + 1;
-        buf_ptr = CONVERT_TO_BYTEPTR(xd->mc_buf_high) +
-            y_pad * 3 * buf_stride + x_pad * 3;
-      } else {
-        build_mc_border(buf_ptr1,
-                        pre_buf->stride,
-                        xd->mc_buf,
-                        x1 - x0 + 1,
-                        x0,
-                        y0,
-                        x1 - x0 + 1,
-                        y1 - y0 + 1,
-                        frame_width,
-                        frame_height);
-        buf_stride = x1 - x0 + 1;
-        buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
-      }
-#else
-      build_mc_border(buf_ptr1,
-                      pre_buf->stride,
-                      xd->mc_buf,
-                      x1 - x0 + 1,
-                      x0,
-                      y0,
-                      x1 - x0 + 1,
-                      y1 - y0 + 1,
-                      frame_width,
-                      frame_height);
-      buf_stride = x1 - x0 + 1;
-      buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    }
-  } else {
-    // Wait until reference block is ready. Pad 7 more pixels as last 7
-    // pixels of each superblock row can be changed by next superblock row.
-     if (pbi->frame_parallel_decode) {
-       const int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-       vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
-                            MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
-     }
-  }
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                         subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
-  } else {
-    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                    subpel_y, sf, w, h, ref, kernel, xs, ys);
-  }
-#else
-  inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                  subpel_y, sf, w, h, ref, kernel, xs, ys);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-}
-
-void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                                       int mi_row, int mi_col,
-                                       BLOCK_SIZE bsize) {
-  int plane;
-  const int mi_x = mi_col * MI_SIZE;
-  const int mi_y = mi_row * MI_SIZE;
-  const MODE_INFO *mi = xd->mi[0];
-  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
-  const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
-  const int is_compound = has_second_ref(&mi->mbmi);
-
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
-                                                        &xd->plane[plane]);
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    struct buf_2d *const dst_buf = &pd->dst;
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-    int ref;
-
-    for (ref = 0; ref < 1 + is_compound; ++ref) {
-      const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
-      struct buf_2d *const pre_buf = &pd->pre[ref];
-      const int idx = xd->block_refs[ref]->idx;
-      BufferPool *const pool = pbi->common.buffer_pool;
-      RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
-      const int is_scaled = vp9_is_scaled(sf);
-
-      if (sb_type < BLOCK_8X8) {
-        int i = 0, x, y;
-        assert(bsize == BLOCK_8X8);
-        for (y = 0; y < num_4x4_h; ++y) {
-          for (x = 0; x < num_4x4_w; ++x) {
-            const MV mv = average_split_mvs(pd, mi, ref, i++);
-            dec_build_inter_predictors(pbi, xd, plane, bw, bh,
-                                       4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
-                                       sf, pre_buf, dst_buf, &mv,
-                                       ref_frame_buf, is_scaled, ref);
-          }
-        }
-      } else {
-        const MV mv = mi->mbmi.mv[ref].as_mv;
-        dec_build_inter_predictors(pbi, xd, plane, bw, bh,
-                                   0, 0, bw, bh, mi_x, mi_y, kernel,
-                                   sf, pre_buf, dst_buf, &mv, ref_frame_buf,
-                                   is_scaled, ref);
-      }
-    }
-  }
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
index 8410c541e45..a876e7c60f1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
@@ -16,24 +16,18 @@
 extern "C" {
 #endif
 
-struct VP9Common;
 struct VP9Decoder;
 struct vp9_read_bit_buffer;
 
-void vp9_init_dequantizer(struct VP9Common *cm);
-
-void vp9_decode_frame(struct VP9Decoder *pbi,
-                      const uint8_t *data, const uint8_t *data_end,
-                      const uint8_t **p_data_end);
-
 int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb);
 void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
                          int *width, int *height);
 BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb);
 
-void vp9_dec_build_inter_predictors_sb(struct VP9Decoder *const pbi,
-                                       MACROBLOCKD *xd, int mi_row, int mi_col,
-                                       BLOCK_SIZE bsize);
+void vp9_decode_frame(struct VP9Decoder *pbi,
+                      const uint8_t *data, const uint8_t *data_end,
+                      const uint8_t **p_data_end);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
index ce6ff997778..8a8d8ddd8e6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
@@ -27,30 +27,33 @@ static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
   return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p);
 }
 
-static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd,
                                          vp9_reader *r, int size_group) {
   const PREDICTION_MODE y_mode =
       read_intra_mode(r, cm->fc->y_mode_prob[size_group]);
-  if (!cm->frame_parallel_decoding_mode)
+  FRAME_COUNTS *counts = xd->counts;
+  if (counts)
     ++counts->y_mode[size_group][y_mode];
   return y_mode;
 }
 
-static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd,
                                           vp9_reader *r,
                                           PREDICTION_MODE y_mode) {
   const PREDICTION_MODE uv_mode = read_intra_mode(r,
                                          cm->fc->uv_mode_prob[y_mode]);
-  if (!cm->frame_parallel_decoding_mode)
+  FRAME_COUNTS *counts = xd->counts;
+  if (counts)
     ++counts->uv_mode[y_mode][uv_mode];
   return uv_mode;
 }
 
-static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd,
                                        vp9_reader *r, int ctx) {
   const int mode = vp9_read_tree(r, vp9_inter_mode_tree,
                                  cm->fc->inter_mode_probs[ctx]);
-  if (!cm->frame_parallel_decoding_mode)
+  FRAME_COUNTS *counts = xd->counts;
+  if (counts)
     ++counts->inter_mode[ctx][mode];
 
   return NEARESTMV + mode;
@@ -61,8 +64,8 @@ static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
 }
 
 static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
-                                     FRAME_COUNTS *counts,
                                      TX_SIZE max_tx_size, vp9_reader *r) {
+  FRAME_COUNTS *counts = xd->counts;
   const int ctx = vp9_get_tx_size_context(xd);
   const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
   int tx_size = vp9_read(r, tx_probs[0]);
@@ -72,19 +75,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
       tx_size += vp9_read(r, tx_probs[2]);
   }
 
-  if (!cm->frame_parallel_decoding_mode)
+  if (counts)
     ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size];
   return (TX_SIZE)tx_size;
 }
 
 static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
-                            FRAME_COUNTS *counts,
                             int allow_select, vp9_reader *r) {
   TX_MODE tx_mode = cm->tx_mode;
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
   if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
-    return read_selected_tx_size(cm, xd, counts, max_tx_size, r);
+    return read_selected_tx_size(cm, xd, max_tx_size, r);
   else
     return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
 }
@@ -174,14 +176,14 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 }
 
 static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                     FRAME_COUNTS *counts,
                      int segment_id, vp9_reader *r) {
-  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
     const int ctx = vp9_get_skip_context(xd);
     const int skip = vp9_read(r, cm->fc->skip_probs[ctx]);
-    if (!cm->frame_parallel_decoding_mode)
+    FRAME_COUNTS *counts = xd->counts;
+    if (counts)
       ++counts->skip[ctx][skip];
     return skip;
   }
@@ -189,7 +191,6 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
 
 static void read_intra_frame_mode_info(VP9_COMMON *const cm,
                                        MACROBLOCKD *const xd,
-                                       FRAME_COUNTS *counts,
                                        int mi_row, int mi_col, vp9_reader *r) {
   MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -199,8 +200,8 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,
   int i;
 
   mbmi->segment_id = read_intra_segment_id(cm, bsize, mi_row, mi_col, r);
-  mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r);
-  mbmi->tx_size = read_tx_size(cm, xd, counts, 1, r);
+  mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+  mbmi->tx_size = read_tx_size(cm, xd, 1, r);
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE;
 
@@ -285,13 +286,13 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
 
 static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
                                                 const MACROBLOCKD *xd,
-                                                FRAME_COUNTS *counts,
                                                 vp9_reader *r) {
   if (cm->reference_mode == REFERENCE_MODE_SELECT) {
     const int ctx = vp9_get_reference_mode_context(cm, xd);
     const REFERENCE_MODE mode =
         (REFERENCE_MODE)vp9_read(r, cm->fc->comp_inter_prob[ctx]);
-    if (!cm->frame_parallel_decoding_mode)
+    FRAME_COUNTS *counts = xd->counts;
+    if (counts)
       ++counts->comp_inter[ctx][mode];
     return mode;  // SINGLE_REFERENCE or COMPOUND_REFERENCE
   } else {
@@ -301,34 +302,35 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
 
 // Read the referncence frame
 static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                            FRAME_COUNTS *counts, vp9_reader *r,
+                            vp9_reader *r,
                             int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
   FRAME_CONTEXT *const fc = cm->fc;
+  FRAME_COUNTS *counts = xd->counts;
 
-  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
-    ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
-                                                       SEG_LVL_REF_FRAME);
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+    ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
+                                                   SEG_LVL_REF_FRAME);
     ref_frame[1] = NONE;
   } else {
-    const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, counts, r);
+    const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
     // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
     if (mode == COMPOUND_REFERENCE) {
       const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
       const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
       const int bit = vp9_read(r, fc->comp_ref_prob[ctx]);
-      if (!cm->frame_parallel_decoding_mode)
+      if (counts)
         ++counts->comp_ref[ctx][bit];
       ref_frame[idx] = cm->comp_fixed_ref;
       ref_frame[!idx] = cm->comp_var_ref[bit];
     } else if (mode == SINGLE_REFERENCE) {
       const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
       const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
-      if (!cm->frame_parallel_decoding_mode)
+      if (counts)
         ++counts->single_ref[ctx0][0][bit0];
       if (bit0) {
         const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
         const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
-        if (!cm->frame_parallel_decoding_mode)
+        if (counts)
           ++counts->single_ref[ctx1][1][bit1];
         ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
       } else {
@@ -345,18 +347,19 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 
 static INLINE INTERP_FILTER read_switchable_interp_filter(
     VP9_COMMON *const cm, MACROBLOCKD *const xd,
-    FRAME_COUNTS *counts, vp9_reader *r) {
+    vp9_reader *r) {
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
   const INTERP_FILTER type =
       (INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree,
                                    cm->fc->switchable_interp_prob[ctx]);
-  if (!cm->frame_parallel_decoding_mode)
+  FRAME_COUNTS *counts = xd->counts;
+  if (counts)
     ++counts->switchable_interp[ctx][type];
   return type;
 }
 
 static void read_intra_block_mode_info(VP9_COMMON *const cm,
-                                       FRAME_COUNTS *counts, MODE_INFO *mi,
+                                       MACROBLOCKD *const xd, MODE_INFO *mi,
                                        vp9_reader *r) {
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
@@ -368,26 +371,26 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm,
   switch (bsize) {
     case BLOCK_4X4:
       for (i = 0; i < 4; ++i)
-        mi->bmi[i].as_mode = read_intra_mode_y(cm, counts, r, 0);
+        mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0);
       mbmi->mode = mi->bmi[3].as_mode;
       break;
     case BLOCK_4X8:
-      mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, counts,
+      mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd,
                                                                   r, 0);
       mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode_y(cm, counts, r, 0);
+          read_intra_mode_y(cm, xd, r, 0);
       break;
     case BLOCK_8X4:
-      mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, counts,
+      mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd,
                                                                   r, 0);
       mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode_y(cm, counts, r, 0);
+          read_intra_mode_y(cm, xd, r, 0);
       break;
     default:
-      mbmi->mode = read_intra_mode_y(cm, counts, r, size_group_lookup[bsize]);
+      mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
   }
 
-  mbmi->uv_mode = read_intra_mode_uv(cm, counts, r, mbmi->mode);
+  mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
 }
 
 static INLINE int is_mv_valid(const MV *mv) {
@@ -395,7 +398,7 @@ static INLINE int is_mv_valid(const MV *mv) {
          mv->col > MV_LOW && mv->col < MV_UPP;
 }
 
-static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
                             PREDICTION_MODE mode,
                             int_mv mv[2], int_mv ref_mv[2],
                             int_mv nearest_mv[2], int_mv near_mv[2],
@@ -405,8 +408,8 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
 
   switch (mode) {
     case NEWMV: {
-      nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ?
-                                            NULL : &counts->mv;
+      FRAME_COUNTS *counts = xd->counts;
+      nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
       for (i = 0; i < 1 + is_compound; ++i) {
         read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
                 allow_hp);
@@ -440,15 +443,14 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
 }
 
 static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                               FRAME_COUNTS *counts,
                                int segment_id, vp9_reader *r) {
-  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
-    return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) !=
-           INTRA_FRAME;
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+    return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
   } else {
     const int ctx = vp9_get_intra_inter_context(xd);
     const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
-    if (!cm->frame_parallel_decoding_mode)
+    FRAME_COUNTS *counts = xd->counts;
+    if (counts)
       ++counts->intra_inter[ctx][is_inter];
     return is_inter;
   }
@@ -462,7 +464,6 @@ static void fpm_sync(void *const data, int mi_row) {
 
 static void read_inter_block_mode_info(VP9Decoder *const pbi,
                                        MACROBLOCKD *const xd,
-                                       FRAME_COUNTS *counts,
                                        const TileInfo *const tile,
                                        MODE_INFO *const mi,
                                        int mi_row, int mi_col, vp9_reader *r) {
@@ -471,9 +472,11 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int allow_hp = cm->allow_high_precision_mv;
   int_mv nearestmv[2], nearmv[2];
-  int inter_mode_ctx, ref, is_compound;
+  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+  int ref, is_compound;
+  uint8_t inter_mode_ctx[MAX_REF_FRAMES];
 
-  read_ref_frames(cm, xd, counts, r, mbmi->segment_id, mbmi->ref_frame);
+  read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
   is_compound = has_second_ref(mbmi);
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -485,13 +488,11 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
                          "Reference frame has invalid dimensions");
     vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
                          &ref_buf->sf);
-    vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
-                     mi_row, mi_col, fpm_sync, (void *)pbi);
+    vp9_find_mv_refs(cm, xd, tile, mi, frame, ref_mvs[frame],
+                     mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
   }
 
-  inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
-
-  if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+  if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     mbmi->mode = ZEROMV;
     if (bsize < BLOCK_8X8) {
         vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
@@ -500,18 +501,19 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
     }
   } else {
     if (bsize >= BLOCK_8X8)
-      mbmi->mode = read_inter_mode(cm, counts, r, inter_mode_ctx);
+      mbmi->mode = read_inter_mode(cm, xd, r,
+                                   inter_mode_ctx[mbmi->ref_frame[0]]);
   }
 
   if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
     for (ref = 0; ref < 1 + is_compound; ++ref) {
-      vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
+      vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
                             &nearestmv[ref], &nearmv[ref]);
     }
   }
 
   mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
-                      ? read_switchable_interp_filter(cm, xd, counts, r)
+                      ? read_switchable_interp_filter(cm, xd, r)
                       : cm->interp_filter;
 
   if (bsize < BLOCK_8X8) {
@@ -524,15 +526,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         int_mv block[2];
         const int j = idy * 2 + idx;
-        b_mode = read_inter_mode(cm, counts, r, inter_mode_ctx);
+        b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
 
-        if (b_mode == NEARESTMV || b_mode == NEARMV)
+        if (b_mode == NEARESTMV || b_mode == NEARMV) {
+          uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
           for (ref = 0; ref < 1 + is_compound; ++ref)
             vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, j, ref, mi_row, mi_col,
                                           &nearest_sub8x8[ref],
-                                          &near_sub8x8[ref]);
+                                          &near_sub8x8[ref],
+                                          dummy_mode_ctx);
+        }
 
-        if (!assign_mv(cm, counts, b_mode, block, nearestmv,
+        if (!assign_mv(cm, xd, b_mode, block, nearestmv,
                        nearest_sub8x8, near_sub8x8,
                        is_compound, allow_hp, r)) {
           xd->corrupted |= 1;
@@ -555,14 +560,13 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
-    xd->corrupted |= !assign_mv(cm, counts, mbmi->mode, mbmi->mv, nearestmv,
+    xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
                                 nearestmv, nearmv, is_compound, allow_hp, r);
   }
 }
 
 static void read_inter_frame_mode_info(VP9Decoder *const pbi,
                                        MACROBLOCKD *const xd,
-                                       FRAME_COUNTS *counts,
                                        const TileInfo *const tile,
                                        int mi_row, int mi_col, vp9_reader *r) {
   VP9_COMMON *const cm = &pbi->common;
@@ -573,18 +577,17 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
   mbmi->mv[0].as_int = 0;
   mbmi->mv[1].as_int = 0;
   mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
-  mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r);
-  inter_block = read_is_inter_block(cm, xd, counts, mbmi->segment_id, r);
-  mbmi->tx_size = read_tx_size(cm, xd, counts, !mbmi->skip || !inter_block, r);
+  mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+  inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+  mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
 
   if (inter_block)
-    read_inter_block_mode_info(pbi, xd, counts, tile, mi, mi_row, mi_col, r);
+    read_inter_block_mode_info(pbi, xd, tile, mi, mi_row, mi_col, r);
   else
-    read_intra_block_mode_info(cm, counts, mi, r);
+    read_intra_block_mode_info(cm, xd, mi, r);
 }
 
 void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                        FRAME_COUNTS *counts,
                         const TileInfo *const tile,
                         int mi_row, int mi_col, vp9_reader *r) {
   VP9_COMMON *const cm = &pbi->common;
@@ -596,19 +599,20 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
   MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
   int w, h;
 
-  if (frame_is_intra_only(cm))
-    read_intra_frame_mode_info(cm, xd, counts, mi_row, mi_col, r);
-  else
-    read_inter_frame_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r);
-
-  for (h = 0; h < y_mis; ++h) {
-    MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
-    for (w = 0; w < x_mis; ++w) {
-      MV_REF *const mv = frame_mv + w;
-      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
-      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
-      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
-      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+  if (frame_is_intra_only(cm)) {
+    read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
+  } else {
+    read_inter_frame_mode_info(pbi, xd, tile, mi_row, mi_col, r);
+
+    for (h = 0; h < y_mis; ++h) {
+      MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+      for (w = 0; w < x_mis; ++w) {
+        MV_REF *const mv = frame_mv + w;
+        mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+        mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+        mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+        mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+      }
     }
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
index c79dff71888..dd97d8da030 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
@@ -21,7 +21,6 @@ extern "C" {
 struct TileInfo;
 
 void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                        FRAME_COUNTS *counts,
                         const struct TileInfo *const tile,
                         int mi_row, int mi_col, vp9_reader *r);
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
index cf1f23fb98f..7991a39e610 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -211,6 +211,9 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
 
     // Find an empty frame buffer.
     const int free_fb = get_free_fb(cm);
+    if (cm->new_fb_idx == INVALID_IDX)
+      return VPX_CODEC_MEM_ERROR;
+
     // Decrease ref_count since it will be increased again in
     // ref_cnt_fb() below.
     --frame_bufs[free_fb].ref_count;
@@ -298,7 +301,10 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
       && frame_bufs[cm->new_fb_idx].ref_count == 0)
     pool->release_fb_cb(pool->cb_priv,
                         &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
+  // Find a free frame buffer. Return error if can not find any.
   cm->new_fb_idx = get_free_fb(cm);
+  if (cm->new_fb_idx == INVALID_IDX)
+    return VPX_CODEC_MEM_ERROR;
 
   // Assign a MV array to the frame buffer.
   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
index bb8c66fc09f..3304e64b2d5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
@@ -17,6 +17,7 @@
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
 #include "vp9/common/vp9_idct.h"
 #endif
+#include "vp9/common/vp9_scan.h"
 
 #include "vp9/decoder/vp9_detokenize.h"
 
@@ -34,7 +35,7 @@
 
 #define INCREMENT_COUNT(token)                              \
   do {                                                      \
-     if (!cm->frame_parallel_decoding_mode)                 \
+     if (counts)                                            \
        ++coef_counts[band][ctx][token];                     \
   } while (0)
 
@@ -45,22 +46,21 @@ static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
   return val;
 }
 
-static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                        FRAME_COUNTS *counts, PLANE_TYPE type,
+static int decode_coefs(const MACROBLOCKD *xd,
+                        PLANE_TYPE type,
                         tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
                         int ctx, const int16_t *scan, const int16_t *nb,
                         vp9_reader *r) {
+  FRAME_COUNTS *counts = xd->counts;
   const int max_eob = 16 << (tx_size << 1);
-  const FRAME_CONTEXT *const fc = cm->fc;
+  const FRAME_CONTEXT *const fc = xd->fc;
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
   int band, c = 0;
   const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
       fc->coef_probs[tx_size][type][ref];
   const vp9_prob *prob;
-  unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
-      counts->coef[tx_size][type][ref];
-  unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
-      counts->eob_branch[tx_size][type][ref];
+  unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+  unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
   uint8_t token_cache[32 * 32];
   const uint8_t *band_translate = get_band_translate(tx_size);
   const int dq_shift = (tx_size == TX_32X32);
@@ -73,9 +73,14 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
   const uint8_t *cat5_prob;
   const uint8_t *cat6_prob;
 
+  if (counts) {
+    coef_counts = counts->coef[tx_size][type][ref];
+    eob_branch_count = counts->eob_branch[tx_size][type][ref];
+  }
+
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (cm->use_highbitdepth) {
-    if (cm->bit_depth == VPX_BITS_10) {
+  if (xd->bd > VPX_BITS_8) {
+    if (xd->bd == VPX_BITS_10) {
       cat1_prob = vp9_cat1_prob_high10;
       cat2_prob = vp9_cat2_prob_high10;
       cat3_prob = vp9_cat3_prob_high10;
@@ -111,7 +116,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
     int val = -1;
     band = *band_translate++;
     prob = coef_probs[band][ctx];
-    if (!cm->frame_parallel_decoding_mode)
+    if (counts)
       ++eob_branch_count[band][ctx];
     if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) {
       INCREMENT_COUNT(EOB_MODEL_TOKEN);
@@ -161,7 +166,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
           break;
         case CATEGORY6_TOKEN:
 #if CONFIG_VP9_HIGHBITDEPTH
-          switch (cm->bit_depth) {
+          switch (xd->bd) {
             case VPX_BITS_8:
               val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
               break;
@@ -185,7 +190,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
 #if CONFIG_VP9_HIGHBITDEPTH
     dqcoeff[scan[c]] = highbd_check_range((vp9_read_bit(r) ? -v : v),
-                                          cm->bit_depth);
+                                          xd->bd);
 #else
     dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -201,18 +206,17 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
   return c;
 }
 
-int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
-                            FRAME_COUNTS *counts, int plane, int block,
+int vp9_decode_block_tokens(MACROBLOCKD *xd,
+                            int plane, int block,
                             BLOCK_SIZE plane_bsize, int x, int y,
                             TX_SIZE tx_size, vp9_reader *r,
                             int seg_id) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int16_t *const dequant = (plane == 0) ? cm->y_dequant[seg_id]
-                                              : cm->uv_dequant[seg_id];
+  const int16_t *const dequant = pd->seg_dequant[seg_id];
   const int ctx = get_entropy_context(tx_size, pd->above_context + x,
                                                pd->left_context + y);
   const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
-  const int eob = decode_coefs(cm, xd, counts, pd->plane_type,
+  const int eob = decode_coefs(xd, pd->plane_type,
                                BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
                                dequant, ctx, so->scan, so->neighbors, r);
   vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
index 86126b6a19e..df176066898 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
@@ -19,8 +19,8 @@
 extern "C" {
 #endif
 
-int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
-                            FRAME_COUNTS *counts, int plane, int block,
+int vp9_decode_block_tokens(MACROBLOCKD *xd,
+                            int plane, int block,
                             BLOCK_SIZE plane_bsize, int x, int y,
                             TX_SIZE tx_size, vp9_reader *r,
                             int seg_id);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
index cf82dd75d92..0ac194e92b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -10,91 +10,24 @@
 
 #include <arm_neon.h>
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_config.h"
 
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
-#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
 
-#include "vp9/encoder/vp9_variance.h"
-
-static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
-  const int32x4_t a = vpaddlq_s16(v_16x8);
-  const int64x2_t b = vpaddlq_s32(a);
-  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
-                               vreinterpret_s32_s64(vget_high_s64(b)));
-  return vget_lane_s32(c, 0);
-}
-
-static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
-  const int64x2_t b = vpaddlq_s32(v_32x4);
-  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
-                               vreinterpret_s32_s64(vget_high_s64(b)));
-  return vget_lane_s32(c, 0);
-}
-
-// w * h must be less than 2048 or local variable v_sum may overflow.
-static void variance_neon_w8(const uint8_t *a, int a_stride,
-                             const uint8_t *b, int b_stride,
-                             int w, int h, uint32_t *sse, int *sum) {
-  int i, j;
-  int16x8_t v_sum = vdupq_n_s16(0);
-  int32x4_t v_sse_lo = vdupq_n_s32(0);
-  int32x4_t v_sse_hi = vdupq_n_s32(0);
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; j += 8) {
-      const uint8x8_t v_a = vld1_u8(&a[j]);
-      const uint8x8_t v_b = vld1_u8(&b[j]);
-      const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
-      const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
-      v_sum = vaddq_s16(v_sum, sv_diff);
-      v_sse_lo = vmlal_s16(v_sse_lo,
-                           vget_low_s16(sv_diff),
-                           vget_low_s16(sv_diff));
-      v_sse_hi = vmlal_s16(v_sse_hi,
-                           vget_high_s16(sv_diff),
-                           vget_high_s16(sv_diff));
-    }
-    a += a_stride;
-    b += b_stride;
-  }
-
-  *sum = horizontal_add_s16x8(v_sum);
-  *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
-}
-
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride,
-                        const uint8_t *ref_ptr, int ref_stride,
-                        unsigned int *sse, int *sum) {
-  variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 8,
-                   8, sse, sum);
-}
-
-unsigned int vp9_variance8x8_neon(const uint8_t *a, int a_stride,
-                                  const uint8_t *b, int b_stride,
-                                  unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 6);  //  >> 6 = / 8 * 8
-}
-
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride,
-                          const uint8_t *ref_ptr, int ref_stride,
-                          unsigned int *sse, int *sum) {
-  variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 16,
-                   16, sse, sum);
-}
-
-unsigned int vp9_variance16x16_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 8);  //  >> 8 = / 16 * 16
-}
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
 
 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       uint8_t *output_ptr,
@@ -102,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       int pixel_step,
                                       unsigned int output_height,
                                       unsigned int output_width,
-                                      const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                      const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i;
   for (i = 0; i < output_height; ++i) {
     const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -125,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
                                        int pixel_step,
                                        unsigned int output_height,
                                        unsigned int output_width,
-                                       const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                       const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i, j;
   for (i = 0; i < output_height; ++i) {
     for (j = 0; j < output_width; j += 16) {
@@ -159,10 +92,10 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
                             9, 8,
-                            BILINEAR_FILTERS_2TAP(xoffset));
+                            bilinear_filters[xoffset]);
   var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
-                            8, BILINEAR_FILTERS_2TAP(yoffset));
-  return vp9_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
+                            8, bilinear_filters[yoffset]);
+  return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
 }
 
 unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
@@ -177,80 +110,10 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              17, 16,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
-                             16, BILINEAR_FILTERS_2TAP(yoffset));
-  return vp9_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
-}
-
-void vp9_get32x32var_neon(const uint8_t *src_ptr, int source_stride,
-                          const uint8_t *ref_ptr, int ref_stride,
-                          unsigned int *sse, int *sum) {
-  variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 32,
-                   32, sse, sum);
-}
-
-unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 10);  // >> 10 = / 32 * 32
-}
-
-unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
-  variance_neon_w8(a + (32 * a_stride), a_stride,
-                   b + (32 * b_stride), b_stride, 32, 32,
-                   &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
-}
-
-unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride,
-                   b + (16 * b_stride), b_stride, 64, 16,
-                   &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
-}
-
-unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride,
-                   b + (16 * b_stride), b_stride, 64, 16,
-                   &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 2 * a_stride), a_stride,
-                   b + (16 * 2 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 3 * a_stride), a_stride,
-                   b + (16 * 3 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 12);  // >> 12 = / 64 * 64
+                             16, bilinear_filters[yoffset]);
+  return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
 }
 
 unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
@@ -265,10 +128,10 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              33, 32,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
-                             32, BILINEAR_FILTERS_2TAP(yoffset));
-  return vp9_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
+                             32, bilinear_filters[yoffset]);
+  return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
 }
 
 unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
@@ -283,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              65, 64,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
-                             64, BILINEAR_FILTERS_2TAP(yoffset));
-  return vp9_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
+                             64, bilinear_filters[yoffset]);
+  return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c
new file mode 100644
index 00000000000..f2e8b275a6a
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
+  uint32_t sum_out;
+  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+  v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
+  v4u32 sum = { 0 };
+
+  LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+  HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);
+  HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);
+  ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);
+  ADD2(sum0, sum2, sum4, sum6, sum0, sum4);
+  sum0 += sum4;
+
+  sum = __msa_hadd_u_w(sum0, sum0);
+  sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);
+  sum = __msa_hadd_u_w(sum0, sum0);
+  sum = (v4u32)__msa_srari_w((v4i32)sum, 6);
+  sum_out = __msa_copy_u_w((v4i32)sum, 0);
+
+  return sum_out;
+}
+
+uint32_t vp9_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
+  uint32_t sum_out;
+  uint32_t src0, src1, src2, src3;
+  v16u8 vec = { 0 };
+  v8u16 sum0;
+  v4u32 sum1;
+  v2u64 sum2;
+
+  LW4(src, src_stride, src0, src1, src2, src3);
+  INSERT_W4_UB(src0, src1, src2, src3, vec);
+
+  sum0 = __msa_hadd_u_h(vec, vec);
+  sum1 = __msa_hadd_u_w(sum0, sum0);
+  sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);
+  sum1 = __msa_hadd_u_w(sum0, sum0);
+  sum2 = __msa_hadd_u_d(sum1, sum1);
+  sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);
+  sum_out = __msa_copy_u_w((v4i32)sum1, 0);
+
+  return sum_out;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c
new file mode 100644
index 00000000000..9709092fcce
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c
@@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                   \
+static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr,     \
+                                             const int16_t *dq_coeff_ptr,  \
+                                             int64_t *ssz) {               \
+  int64_t err = 0;                                                         \
+  uint32_t loop_cnt;                                                       \
+  v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
+  v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
+  v2i64 sq_coeff_r, sq_coeff_l;                                            \
+  v2i64 err0, err_dup0, err1, err_dup1;                                    \
+                                                                           \
+  coeff = LD_SH(coeff_ptr);                                                \
+  dq_coeff = LD_SH(dq_coeff_ptr);                                          \
+  UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
+  ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
+  HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
+  DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w,                  \
+              sq_coeff_r, sq_coeff_l);                                     \
+  DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
+                                                                           \
+  coeff = LD_SH(coeff_ptr + 8);                                            \
+  dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
+  UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
+  ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
+  HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
+  DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
+  DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
+                                                                           \
+  coeff_ptr += 16;                                                         \
+  dq_coeff_ptr += 16;                                                      \
+                                                                           \
+  for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
+    coeff = LD_SH(coeff_ptr);                                              \
+    dq_coeff = LD_SH(dq_coeff_ptr);                                        \
+    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
+    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
+    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
+    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
+    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
+                                                                           \
+    coeff = LD_SH(coeff_ptr + 8);                                          \
+    dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
+    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
+    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
+    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
+    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
+    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
+                                                                           \
+    coeff_ptr += 16;                                                       \
+    dq_coeff_ptr += 16;                                                    \
+  }                                                                        \
+                                                                           \
+  err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
+  err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
+  sq_coeff_r += err_dup0;                                                  \
+  sq_coeff_l += err_dup1;                                                  \
+  *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
+  *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
+                                                                           \
+  err_dup0 = __msa_splati_d(err0, 1);                                      \
+  err_dup1 = __msa_splati_d(err1, 1);                                      \
+  err0 += err_dup0;                                                        \
+  err1 += err_dup1;                                                        \
+  err = __msa_copy_s_d(err0, 0);                                           \
+  err += __msa_copy_s_d(err1, 0);                                          \
+                                                                           \
+  return err;                                                              \
+}
+
+BLOCK_ERROR_BLOCKSIZE_MSA(16);
+BLOCK_ERROR_BLOCKSIZE_MSA(64);
+BLOCK_ERROR_BLOCKSIZE_MSA(256);
+BLOCK_ERROR_BLOCKSIZE_MSA(1024);
+
+int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
+                            const tran_low_t *dq_coeff_ptr,
+                            intptr_t blk_size, int64_t *ssz) {
+  int64_t err;
+  const int16_t *coeff = (const int16_t *)coeff_ptr;
+  const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
+
+  switch (blk_size) {
+    case 16:
+      err = block_error_16size_msa(coeff, dq_coeff, ssz);
+      break;
+    case 64:
+      err = block_error_64size_msa(coeff, dq_coeff, ssz);
+      break;
+    case 256:
+      err = block_error_256size_msa(coeff, dq_coeff, ssz);
+      break;
+    case 1024:
+      err = block_error_1024size_msa(coeff, dq_coeff, ssz);
+      break;
+    default:
+      err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
+      break;
+  }
+
+  return err;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
new file mode 100644
index 00000000000..a3ebfab1f7d
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
@@ -0,0 +1,688 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+static void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,
+                               int32_t src_stride) {
+  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 stp21, stp22, stp23, stp24, stp25, stp26, stp30;
+  v8i16 stp31, stp32, stp33, stp34, stp35, stp36, stp37;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, cnst0, cnst1, cnst4, cnst5;
+  v8i16 coeff = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64,
+                 -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 };
+  v8i16 coeff1 = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64,
+                   cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 };
+  v8i16 coeff2 = { -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64,
+                   0, 0, 0, 0 };
+
+  LD_SH16(input, src_stride,
+          in0, in1, in2, in3, in4, in5, in6, in7,
+          in8, in9, in10, in11, in12, in13, in14, in15);
+  SLLI_4V(in0, in1, in2, in3, 2);
+  SLLI_4V(in4, in5, in6, in7, 2);
+  SLLI_4V(in8, in9, in10, in11, 2);
+  SLLI_4V(in12, in13, in14, in15, 2);
+  ADD4(in0, in15, in1, in14, in2, in13, in3, in12, tmp0, tmp1, tmp2, tmp3);
+  ADD4(in4, in11, in5, in10, in6, in9, in7, in8, tmp4, tmp5, tmp6, tmp7);
+  VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+                    tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+  ST_SH8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp_ptr, 32);
+  SUB4(in0, in15, in1, in14, in2, in13, in3, in12, in15, in14, in13, in12);
+  SUB4(in4, in11, in5, in10, in6, in9, in7, in8, in11, in10, in9, in8);
+
+  tmp_ptr += 16;
+
+  /* stp 1 */
+  ILVL_H2_SH(in10, in13, in11, in12, vec2, vec4);
+  ILVR_H2_SH(in10, in13, in11, in12, vec3, vec5);
+
+  cnst4 = __msa_splati_h(coeff, 0);
+  stp25 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst4);
+
+  cnst5 = __msa_splati_h(coeff, 1);
+  cnst5 = __msa_ilvev_h(cnst5, cnst4);
+  stp22 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst5);
+  stp24 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst4);
+  stp23 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst5);
+
+  /* stp2 */
+  BUTTERFLY_4(in8, in9, stp22, stp23, stp30, stp31, stp32, stp33);
+  BUTTERFLY_4(in15, in14, stp25, stp24, stp37, stp36, stp35, stp34);
+  ILVL_H2_SH(stp36, stp31, stp35, stp32, vec2, vec4);
+  ILVR_H2_SH(stp36, stp31, stp35, stp32, vec3, vec5);
+  SPLATI_H2_SH(coeff, 2, 3, cnst0, cnst1);
+  cnst0 = __msa_ilvev_h(cnst0, cnst1);
+  stp26 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst0);
+
+  cnst0 = __msa_splati_h(coeff, 4);
+  cnst1 = __msa_ilvev_h(cnst1, cnst0);
+  stp21 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst1);
+
+  BUTTERFLY_4(stp30, stp37, stp26, stp21, in8, in15, in14, in9);
+  ILVRL_H2_SH(in15, in8, vec1, vec0);
+  SPLATI_H2_SH(coeff1, 0, 1, cnst0, cnst1);
+  cnst0 = __msa_ilvev_h(cnst0, cnst1);
+
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr);
+
+  cnst0 = __msa_splati_h(coeff2, 0);
+  cnst0 = __msa_ilvev_h(cnst1, cnst0);
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr + 224);
+
+  ILVRL_H2_SH(in14, in9, vec1, vec0);
+  SPLATI_H2_SH(coeff1, 2, 3, cnst0, cnst1);
+  cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
+  ST_SH(in8, tmp_ptr + 128);
+
+  cnst1 = __msa_splati_h(coeff2, 2);
+  cnst0 = __msa_ilvev_h(cnst0, cnst1);
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr + 96);
+
+  SPLATI_H2_SH(coeff, 2, 5, cnst0, cnst1);
+  cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+  stp25 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);
+
+  cnst1 = __msa_splati_h(coeff, 3);
+  cnst1 = __msa_ilvev_h(cnst0, cnst1);
+  stp22 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);
+
+  /* stp4 */
+  ADD2(stp34, stp25, stp33, stp22, in13, in10);
+
+  ILVRL_H2_SH(in13, in10, vec1, vec0);
+  SPLATI_H2_SH(coeff1, 4, 5, cnst0, cnst1);
+  cnst0 = __msa_ilvev_h(cnst0, cnst1);
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr + 64);
+
+  cnst0 = __msa_splati_h(coeff2, 1);
+  cnst0 = __msa_ilvev_h(cnst1, cnst0);
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr + 160);
+
+  SUB2(stp34, stp25, stp33, stp22, in12, in11);
+  ILVRL_H2_SH(in12, in11, vec1, vec0);
+  SPLATI_H2_SH(coeff1, 6, 7, cnst0, cnst1);
+  cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
+  ST_SH(in8, tmp_ptr + 192);
+
+  cnst1 = __msa_splati_h(coeff2, 3);
+  cnst0 = __msa_ilvev_h(cnst0, cnst1);
+  in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+  ST_SH(in8, tmp_ptr + 32);
+}
+
+static void fdct16x8_1d_row(int16_t *input, int16_t *output) {
+  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+
+  LD_SH8(input, 16, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8((input + 8), 16, in8, in9, in10, in11, in12, in13, in14, in15);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+                     in8, in9, in10, in11, in12, in13, in14, in15);
+  ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+  ADD4(in4, 1, in5, 1, in6, 1, in7, 1, in4, in5, in6, in7);
+  ADD4(in8, 1, in9, 1, in10, 1, in11, 1, in8, in9, in10, in11);
+  ADD4(in12, 1, in13, 1, in14, 1, in15, 1, in12, in13, in14, in15);
+  SRA_4V(in0, in1, in2, in3, 2);
+  SRA_4V(in4, in5, in6, in7, 2);
+  SRA_4V(in8, in9, in10, in11, 2);
+  SRA_4V(in12, in13, in14, in15, 2);
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
+               in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5,
+               tmp6, tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
+  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, input, 16);
+  VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+                    tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+  LD_SH8(input, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+  VP9_FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
+                     tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
+  ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, output, 16);
+  TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
+                     tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
+  ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, output + 8, 16);
+}
+
+void vp9_fdct16x16_msa(const int16_t *input, int16_t *output,
+                       int32_t src_stride) {
+  int32_t i;
+  DECLARE_ALIGNED(32, int16_t, tmp_buf[16 * 16]);
+
+  /* column transform */
+  for (i = 0; i < 2; ++i) {
+    fdct8x16_1d_column((input + 8 * i), (&tmp_buf[0] + 8 * i), src_stride);
+  }
+
+  /* row transform */
+  for (i = 0; i < 2; ++i) {
+    fdct16x8_1d_row((&tmp_buf[0] + (128 * i)), (output + (128 * i)));
+  }
+}
+
+void vp9_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+  out[1] = 0;
+
+  out[0] = VP9_LD_HADD(input, stride);
+  out[0] += VP9_LD_HADD(input + 8, stride);
+  out[0] += VP9_LD_HADD(input + 16 * 8, stride);
+  out[0] += VP9_LD_HADD(input + 16 * 8 + 8, stride);
+  out[0] >>= 1;
+}
+
+static void fadst16_cols_step1_msa(const int16_t *input, int32_t stride,
+                                   const int32_t *const0, int16_t *int_buf) {
+  v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+  v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+  v4i32 k0, k1, k2, k3;
+
+  /* load input data */
+  r0 = LD_SH(input);
+  r15 = LD_SH(input + 15 * stride);
+  r7 = LD_SH(input + 7 * stride);
+  r8 = LD_SH(input + 8 * stride);
+  SLLI_4V(r0, r15, r7, r8, 2);
+
+  /* stage 1 */
+  LD_SW2(const0, 4, k0, k1);
+  LD_SW2(const0 + 8, 4, k2, k3);
+  VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+  r3 = LD_SH(input + 3 * stride);
+  r4 = LD_SH(input + 4 * stride);
+  r11 = LD_SH(input + 11 * stride);
+  r12 = LD_SH(input + 12 * stride);
+  SLLI_4V(r3, r4, r11, r12, 2);
+
+  LD_SW2(const0 + 4 * 4, 4, k0, k1);
+  LD_SW2(const0 + 4 * 6, 4, k2, k3);
+  VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+  /* stage 2 */
+  BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+  ST_SH2(tp0, tp2, int_buf, 8);
+  ST_SH2(tp1, tp3, int_buf + 4 * 8, 8);
+
+  LD_SW2(const0 + 4 * 8, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 10);
+  VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+
+  ST_SH2(h0, h1, int_buf + 8 * 8, 8);
+  ST_SH2(h3, h2, int_buf + 12 * 8, 8);
+
+  r9 = LD_SH(input + 9 * stride);
+  r6 = LD_SH(input + 6 * stride);
+  r1 = LD_SH(input + stride);
+  r14 = LD_SH(input + 14 * stride);
+  SLLI_4V(r9, r6, r1, r14, 2);
+
+  LD_SW2(const0 + 4 * 11, 4, k0, k1);
+  LD_SW2(const0 + 4 * 13, 4, k2, k3);
+  VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+
+  ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+  r13 = LD_SH(input + 13 * stride);
+  r2 = LD_SH(input + 2 * stride);
+  r5 = LD_SH(input + 5 * stride);
+  r10 = LD_SH(input + 10 * stride);
+  SLLI_4V(r13, r2, r5, r10, 2);
+
+  LD_SW2(const0 + 4 * 15, 4, k0, k1);
+  LD_SW2(const0 + 4 * 17, 4, k2, k3);
+  VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+
+  ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+
+  BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+  ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_cols_step2_msa(int16_t *int_buf, const int32_t *const0,
+                                   int16_t *out) {
+  int16_t *out_ptr = out + 128;
+  v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+  v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+  v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+  v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+  v4i32 k0, k1, k2, k3;
+
+  LD_SH2(int_buf + 3 * 8, 4 * 8, g13, g15);
+  LD_SH2(int_buf + 11 * 8, 4 * 8, g5, g7);
+  LD_SW2(const0 + 4 * 19, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 21);
+  VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+  tp0 = LD_SH(int_buf + 4 * 8);
+  tp1 = LD_SH(int_buf + 5 * 8);
+  tp3 = LD_SH(int_buf + 10 * 8);
+  tp2 = LD_SH(int_buf + 14 * 8);
+  LD_SW2(const0 + 4 * 22, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 24);
+  VP9_MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+  out4 = -out4;
+  ST_SH(out4, (out + 3 * 16));
+  ST_SH(out5, (out_ptr + 4 * 16));
+
+  h1 = LD_SH(int_buf + 9 * 8);
+  h3 = LD_SH(int_buf + 12 * 8);
+  VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+  out13 = -out13;
+  ST_SH(out12, (out + 2 * 16));
+  ST_SH(out13, (out_ptr + 5 * 16));
+
+  tp0 = LD_SH(int_buf);
+  tp1 = LD_SH(int_buf + 8);
+  tp2 = LD_SH(int_buf + 2 * 8);
+  tp3 = LD_SH(int_buf + 6 * 8);
+
+  BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+  out1 = -out1;
+  ST_SH(out0, (out));
+  ST_SH(out1, (out_ptr + 7 * 16));
+
+  h0 = LD_SH(int_buf + 8 * 8);
+  h2 = LD_SH(int_buf + 13 * 8);
+
+  BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+  out8 = -out8;
+  ST_SH(out8, (out + 16));
+  ST_SH(out9, (out_ptr + 6 * 16));
+
+  /* stage 4 */
+  LD_SW2(const0 + 4 * 25, 4, k0, k1);
+  LD_SW2(const0 + 4 * 27, 4, k2, k3);
+  VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
+  ST_SH(out2, (out + 7 * 16));
+  ST_SH(out3, (out_ptr));
+
+  VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
+  ST_SH(out6, (out + 4 * 16));
+  ST_SH(out7, (out_ptr + 3 * 16));
+
+  VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
+  ST_SH(out10, (out + 6 * 16));
+  ST_SH(out11, (out_ptr + 16));
+
+  VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
+  ST_SH(out14, (out + 5 * 16));
+  ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
+  v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+  v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+  /* load input data */
+  LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+  TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+                     r0, r1, r2, r3, r4, r5, r6, r7);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r0, r1);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r2, r3);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r4, r5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r6, r7);
+  ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+  out += 64;
+
+  LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+  TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+                     r8, r9, r10, r11, r12, r13, r14, r15);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r8, r9);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r10, r11);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r12, r13);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r14, r15);
+  ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+  out += 64;
+
+  /* load input data */
+  input += 128;
+  LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+  TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+                     r0, r1, r2, r3, r4, r5, r6, r7);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r0, r1);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r2, r3);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r4, r5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r6, r7);
+  ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+  out += 64;
+
+  LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+  TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+                     r8, r9, r10, r11, r12, r13, r14, r15);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r8, r9);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r10, r11);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r12, r13);
+  VP9_FDCT_POSTPROC_2V_NEG_H(r14, r15);
+  ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+}
+
+static void fadst16_rows_step1_msa(int16_t *input, const int32_t *const0,
+                                   int16_t *int_buf) {
+  v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+  v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+  v4i32 k0, k1, k2, k3;
+
+  /* load input data */
+  r0 = LD_SH(input);
+  r7 = LD_SH(input + 7 * 8);
+  r8 = LD_SH(input + 8 * 8);
+  r15 = LD_SH(input + 15 * 8);
+
+  /* stage 1 */
+  LD_SW2(const0, 4, k0, k1);
+  LD_SW2(const0 + 4 * 2, 4, k2, k3);
+  VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+  r3 = LD_SH(input + 3 * 8);
+  r4 = LD_SH(input + 4 * 8);
+  r11 = LD_SH(input + 11 * 8);
+  r12 = LD_SH(input + 12 * 8);
+
+  LD_SW2(const0 + 4 * 4, 4, k0, k1);
+  LD_SW2(const0 + 4 * 6, 4, k2, k3);
+  VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+  /* stage 2 */
+  BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+  ST_SH2(tp0, tp1, int_buf, 4 * 8);
+  ST_SH2(tp2, tp3, int_buf + 8, 4 * 8);
+
+  LD_SW2(const0 + 4 * 8, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 10);
+  VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+  ST_SH2(h0, h3, int_buf + 8 * 8, 4 * 8);
+  ST_SH2(h1, h2, int_buf + 9 * 8, 4 * 8);
+
+  r1 = LD_SH(input + 8);
+  r6 = LD_SH(input + 6 * 8);
+  r9 = LD_SH(input + 9 * 8);
+  r14 = LD_SH(input + 14 * 8);
+
+  LD_SW2(const0 + 4 * 11, 4, k0, k1);
+  LD_SW2(const0 + 4 * 13, 4, k2, k3);
+  VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+  ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+  r2 = LD_SH(input + 2 * 8);
+  r5 = LD_SH(input + 5 * 8);
+  r10 = LD_SH(input + 10 * 8);
+  r13 = LD_SH(input + 13 * 8);
+
+  LD_SW2(const0 + 4 * 15, 4, k0, k1);
+  LD_SW2(const0 + 4 * 17, 4, k2, k3);
+  VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+  ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+  BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+  ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_rows_step2_msa(int16_t *int_buf, const int32_t *const0,
+                                   int16_t *out) {
+  int16_t *out_ptr = out + 8;
+  v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+  v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+  v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+  v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+  v4i32 k0, k1, k2, k3;
+
+  g13 = LD_SH(int_buf + 3 * 8);
+  g15 = LD_SH(int_buf + 7 * 8);
+  g5 = LD_SH(int_buf + 11 * 8);
+  g7 = LD_SH(int_buf + 15 * 8);
+
+  LD_SW2(const0 + 4 * 19, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 21);
+  VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+  tp0 = LD_SH(int_buf + 4 * 8);
+  tp1 = LD_SH(int_buf + 5 * 8);
+  tp3 = LD_SH(int_buf + 10 * 8);
+  tp2 = LD_SH(int_buf + 14 * 8);
+
+  LD_SW2(const0 + 4 * 22, 4, k0, k1);
+  k2 = LD_SW(const0 + 4 * 24);
+  VP9_MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+  out4 = -out4;
+  ST_SH(out4, (out + 3 * 16));
+  ST_SH(out5, (out_ptr + 4 * 16));
+
+  h1 = LD_SH(int_buf + 9 * 8);
+  h3 = LD_SH(int_buf + 12 * 8);
+  VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+  out13 = -out13;
+  ST_SH(out12, (out + 2 * 16));
+  ST_SH(out13, (out_ptr + 5 * 16));
+
+  tp0 = LD_SH(int_buf);
+  tp1 = LD_SH(int_buf + 8);
+  tp2 = LD_SH(int_buf + 2 * 8);
+  tp3 = LD_SH(int_buf + 6 * 8);
+
+  BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+  out1 = -out1;
+  ST_SH(out0, (out));
+  ST_SH(out1, (out_ptr + 7 * 16));
+
+  h0 = LD_SH(int_buf + 8 * 8);
+  h2 = LD_SH(int_buf + 13 * 8);
+  BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+  out8 = -out8;
+  ST_SH(out8, (out + 16));
+  ST_SH(out9, (out_ptr + 6 * 16));
+
+  /* stage 4 */
+  LD_SW2(const0 + 4 * 25, 4, k0, k1);
+  LD_SW2(const0 + 4 * 27, 4, k2, k3);
+  VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
+  ST_SH(out2, (out + 7 * 16));
+  ST_SH(out3, (out_ptr));
+
+  VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
+  ST_SH(out6, (out + 4 * 16));
+  ST_SH(out7, (out_ptr + 3 * 16));
+
+  VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
+  ST_SH(out10, (out + 6 * 16));
+  ST_SH(out11, (out_ptr + 16));
+
+  VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
+  ST_SH(out14, (out + 5 * 16));
+  ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
+  v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+  v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+  /* load input data */
+  LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
+          l4, l12, l5, l13, l6, l14, l7, l15);
+  TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+                     r0, r1, r2, r3, r4, r5, r6, r7);
+  TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+                     r8, r9, r10, r11, r12, r13, r14, r15);
+  ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+  ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+  out += 16 * 8;
+
+  /* load input data */
+  input += 128;
+  LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
+          l4, l12, l5, l13, l6, l14, l7, l15);
+  TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+                     r0, r1, r2, r3, r4, r5, r6, r7);
+  TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+                     r8, r9, r10, r11, r12, r13, r14, r15);
+  ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+  ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+}
+
+static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
+  int16_t *temp = intermediate;
+  int16_t *out = output;
+  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11;
+  v8i16 in12, in13, in14, in15;
+
+  LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
+  temp = intermediate + 8;
+  LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+                     in8, in9, in10, in11, in12, in13, in14, in15);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in0, in1);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in2, in3);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in6, in7);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in8, in9);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in10, in11);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in12, in13);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in14, in15);
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+  temp = intermediate;
+  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
+  VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+                    tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+  temp = intermediate;
+  LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+  VP9_FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
+                     tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
+  ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
+  TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
+                     tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
+  out = output + 8;
+  ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
+}
+
+void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
+                      int32_t stride, int32_t tx_type) {
+  DECLARE_ALIGNED(32, int16_t, tmp[256]);
+  DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
+  DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
+  int32_t i;
+  int16_t *ptmpbuf = &tmp_buf[0];
+  int16_t *trans = &trans_buf[0];
+  const int32_t const_arr[29 * 4] = {
+    52707308, 52707308, 52707308, 52707308,
+    -1072430300, -1072430300, -1072430300, -1072430300,
+    795618043, 795618043, 795618043, 795618043,
+    -721080468, -721080468, -721080468, -721080468,
+    459094491, 459094491, 459094491, 459094491,
+    -970646691, -970646691, -970646691, -970646691,
+    1010963856, 1010963856, 1010963856, 1010963856,
+    -361743294, -361743294, -361743294, -361743294,
+    209469125, 209469125, 209469125, 209469125,
+    -1053094788, -1053094788, -1053094788, -1053094788,
+    1053160324, 1053160324, 1053160324, 1053160324,
+    639644520, 639644520, 639644520, 639644520,
+    -862444000, -862444000, -862444000, -862444000,
+    1062144356, 1062144356, 1062144356, 1062144356,
+    -157532337, -157532337, -157532337, -157532337,
+    260914709, 260914709, 260914709, 260914709,
+    -1041559667, -1041559667, -1041559667, -1041559667,
+    920985831, 920985831, 920985831, 920985831,
+    -551995675, -551995675, -551995675, -551995675,
+    596522295, 596522295, 596522295, 596522295,
+    892853362, 892853362, 892853362, 892853362,
+    -892787826, -892787826, -892787826, -892787826,
+    410925857, 410925857, 410925857, 410925857,
+    -992012162, -992012162, -992012162, -992012162,
+    992077698, 992077698, 992077698, 992077698,
+    759246145, 759246145, 759246145, 759246145,
+    -759180609, -759180609, -759180609, -759180609,
+    -759222975, -759222975, -759222975, -759222975,
+    759288511, 759288511, 759288511, 759288511 };
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* column transform */
+      for (i = 0; i < 2; ++i) {
+        fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+      }
+
+      /* row transform */
+      for (i = 0; i < 2; ++i) {
+        fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+      }
+      break;
+    case ADST_DCT:
+      /* column transform */
+      for (i = 0; i < 2; ++i) {
+        fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+        fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+      }
+
+      /* row transform */
+      for (i = 0; i < 2; ++i) {
+        postproc_fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+      }
+      break;
+    case DCT_ADST:
+      /* column transform */
+      for (i = 0; i < 2; ++i) {
+        fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+      }
+
+      fadst16_transpose_postproc_msa(tmp, trans);
+
+      /* row transform */
+      for (i = 0; i < 2; ++i) {
+        fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+        fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+      }
+
+      fadst16_transpose_msa(tmp, output);
+      break;
+    case ADST_ADST:
+      /* column transform */
+      for (i = 0; i < 2; ++i) {
+        fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+        fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+      }
+
+      fadst16_transpose_postproc_msa(tmp, trans);
+
+      /* row transform */
+      for (i = 0; i < 2; ++i) {
+        fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+        fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+      }
+
+      fadst16_transpose_msa(tmp, output);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c
new file mode 100644
index 00000000000..3a740232228
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c
@@ -0,0 +1,956 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+static void fdct8x32_1d_column_load_butterfly(const int16_t *input,
+                                              int32_t src_stride,
+                                              int16_t *temp_buff) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 step0, step1, step2, step3;
+  v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1;
+  v8i16 step0_1, step1_1, step2_1, step3_1;
+
+  /* 1st and 2nd set */
+  LD_SH4(input, src_stride, in0, in1, in2, in3);
+  LD_SH4(input + (28 * src_stride), src_stride, in4, in5, in6, in7);
+  LD_SH4(input + (4 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1);
+  LD_SH4(input + (24 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1);
+  SLLI_4V(in0, in1, in2, in3, 2);
+  SLLI_4V(in4, in5, in6, in7, 2);
+  SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2);
+  SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2);
+  BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,
+              step0, step1, step2, step3, in4, in5, in6, in7);
+  BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+              step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1);
+  ST_SH4(step0, step1, step2, step3, temp_buff, 8);
+  ST_SH4(in4, in5, in6, in7, temp_buff + (28 * 8), 8);
+  ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (4 * 8), 8);
+  ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (24 * 8), 8);
+
+  /* 3rd and 4th set */
+  LD_SH4(input + (8 * src_stride), src_stride, in0, in1, in2, in3);
+  LD_SH4(input + (20 * src_stride), src_stride, in4, in5, in6, in7);
+  LD_SH4(input + (12 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1);
+  LD_SH4(input + (16 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1);
+  SLLI_4V(in0, in1, in2, in3, 2);
+  SLLI_4V(in4, in5, in6, in7, 2);
+  SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2);
+  SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2);
+  BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,
+              step0, step1, step2, step3, in4, in5, in6, in7);
+  BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+              step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1);
+  ST_SH4(step0, step1, step2, step3, temp_buff + (8 * 8), 8);
+  ST_SH4(in4, in5, in6, in7, temp_buff + (20 * 8), 8);
+  ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (12 * 8), 8);
+  ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (15 * 8) + 8, 8);
+}
+
+static void fdct8x32_1d_column_even_store(int16_t *input, int16_t *temp) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8i16 temp0, temp1;
+
+  /* fdct even */
+  LD_SH4(input, 8, in0, in1, in2, in3);
+  LD_SH4(input + 96, 8, in12, in13, in14, in15);
+  BUTTERFLY_8(in0, in1, in2, in3, in12, in13, in14, in15,
+              vec0, vec1, vec2, vec3, in12, in13, in14, in15);
+  LD_SH4(input + 32, 8, in4, in5, in6, in7);
+  LD_SH4(input + 64, 8, in8, in9, in10, in11);
+  BUTTERFLY_8(in4, in5, in6, in7, in8, in9, in10, in11,
+              vec4, vec5, vec6, vec7, in8, in9, in10, in11);
+
+  /* Stage 3 */
+  ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+  BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0);
+  VP9_DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp);
+  ST_SH(temp1, temp + 512);
+
+  VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 256);
+  ST_SH(temp1, temp + 768);
+
+  SUB4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, vec7, vec6, vec5, vec4);
+  VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 128);
+  ST_SH(temp1, temp + 896);
+
+  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 640);
+  ST_SH(temp1, temp + 384);
+
+  VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+  VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+  ADD2(in0, in1, in2, in3, vec0, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 64);
+  ST_SH(temp1, temp + 960);
+
+  SUB2(in0, in1, in2, in3, in0, in2);
+  VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 576);
+  ST_SH(temp1, temp + 448);
+
+  SUB2(in9, vec2, in14, vec5, vec2, vec5);
+  VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 320);
+  ST_SH(temp1, temp + 704);
+
+  ADD2(in3, in2, in0, in1, vec3, vec4);
+  VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+  VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+  ST_SH(temp0, temp + 192);
+  ST_SH(temp1, temp + 832);
+}
+
+static void fdct8x32_1d_column_odd_store(int16_t *input, int16_t *temp_ptr) {
+  v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+  v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5;
+
+  in20 = LD_SH(input + 32);
+  in21 = LD_SH(input + 40);
+  in26 = LD_SH(input + 80);
+  in27 = LD_SH(input + 88);
+
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+  in18 = LD_SH(input + 16);
+  in19 = LD_SH(input + 24);
+  in28 = LD_SH(input + 96);
+  in29 = LD_SH(input + 104);
+
+  vec4 = in19 - in20;
+  ST_SH(vec4, input + 32);
+  vec4 = in18 - in21;
+  ST_SH(vec4, input + 40);
+  vec4 = in29 - in26;
+  ST_SH(vec4, input + 80);
+  vec4 = in28 - in27;
+  ST_SH(vec4, input + 88);
+
+  in21 = in18 + in21;
+  in20 = in19 + in20;
+  in27 = in28 + in27;
+  in26 = in29 + in26;
+
+  LD_SH4(input + 48, 8, in22, in23, in24, in25);
+  VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+  VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+
+  in16 = LD_SH(input);
+  in17 = LD_SH(input + 8);
+  in30 = LD_SH(input + 112);
+  in31 = LD_SH(input + 120);
+
+  vec4 = in17 - in22;
+  ST_SH(vec4, input + 16);
+  vec4 = in16 - in23;
+  ST_SH(vec4, input + 24);
+  vec4 = in31 - in24;
+  ST_SH(vec4, input + 96);
+  vec4 = in30 - in25;
+  ST_SH(vec4, input + 104);
+
+  ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+  ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+  ADD2(in27, in26, in25, in24, in23, in20);
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr);
+  ST_SH(vec4, temp_ptr + 960);
+
+  SUB2(in27, in26, in25, in24, in22, in21);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr + 448);
+  ST_SH(vec4, temp_ptr + 512);
+
+  SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+  VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+  SUB2(in26, in27, in24, in25, in23, in20);
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec4, temp_ptr + 704);
+  ST_SH(vec5, temp_ptr + 256);
+
+  ADD2(in26, in27, in24, in25, in22, in21);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec4, temp_ptr + 192);
+  ST_SH(vec5, temp_ptr + 768);
+
+  LD_SH4(input + 16, 8, in22, in23, in20, in21);
+  LD_SH4(input + 80, 8, in26, in27, in24, in25);
+  in16 = in20;
+  in17 = in21;
+  VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+  SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+  ADD2(in28, in29, in31, in30, in16, in19);
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr + 832);
+  ST_SH(vec4, temp_ptr + 128);
+
+  SUB2(in28, in29, in31, in30, in17, in18);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr + 320);
+  ST_SH(vec4, temp_ptr + 640);
+  ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+  VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+  SUB2(in29, in28, in30, in31, in16, in19);
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr + 576);
+  ST_SH(vec4, temp_ptr + 384);
+
+  ADD2(in29, in28, in30, in31, in17, in18);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+  VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+  ST_SH(vec5, temp_ptr + 64);
+  ST_SH(vec4, temp_ptr + 896);
+}
+
+static void fdct8x32_1d_column(const int16_t *input, int32_t src_stride,
+                               int16_t *tmp_buf, int16_t *tmp_buf_big) {
+  fdct8x32_1d_column_load_butterfly(input, src_stride, tmp_buf);
+  fdct8x32_1d_column_even_store(tmp_buf, tmp_buf_big);
+  fdct8x32_1d_column_odd_store(tmp_buf + 128, (tmp_buf_big + 32));
+}
+
+static void fdct8x32_1d_row_load_butterfly(int16_t *temp_buff,
+                                           int16_t *output) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 step0, step1, step2, step3, step4, step5, step6, step7;
+
+  LD_SH8(temp_buff, 32, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8(temp_buff + 24, 32, in8, in9, in10, in11, in12, in13, in14, in15);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+                     in8, in9, in10, in11, in12, in13, in14, in15);
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               step0, step1, step2, step3, step4, step5, step6, step7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+  ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7, output, 8);
+  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 24 * 8), 8);
+
+  /* 2nd set */
+  LD_SH8(temp_buff + 8, 32, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8(temp_buff + 16, 32, in8, in9, in10, in11, in12, in13, in14, in15);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+                     in8, in9, in10, in11, in12, in13, in14, in15);
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               step0, step1, step2, step3, step4, step5, step6, step7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+  ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7,
+         (output + 8 * 8), 8);
+  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 16 * 8), 8);
+}
+
+static void fdct8x32_1d_row_even_4x(int16_t *input, int16_t *interm_ptr,
+                                    int16_t *out) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v4i32 vec0_l, vec1_l, vec2_l, vec3_l, vec4_l, vec5_l, vec6_l, vec7_l;
+  v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec4_r, vec5_r, vec6_r, vec7_r;
+  v4i32 tmp0_w, tmp1_w, tmp2_w, tmp3_w;
+
+  /* fdct32 even */
+  /* stage 2 */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8(input + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+  ST_SH8(vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, interm_ptr, 8);
+  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, interm_ptr + 64, 8);
+
+  /* Stage 3 */
+  UNPCK_SH_SW(vec0, vec0_l, vec0_r);
+  UNPCK_SH_SW(vec1, vec1_l, vec1_r);
+  UNPCK_SH_SW(vec2, vec2_l, vec2_r);
+  UNPCK_SH_SW(vec3, vec3_l, vec3_r);
+  UNPCK_SH_SW(vec4, vec4_l, vec4_r);
+  UNPCK_SH_SW(vec5, vec5_l, vec5_r);
+  UNPCK_SH_SW(vec6, vec6_l, vec6_r);
+  UNPCK_SH_SW(vec7, vec7_l, vec7_r);
+  ADD4(vec0_r, vec7_r, vec1_r, vec6_r, vec2_r, vec5_r, vec3_r, vec4_r,
+       tmp0_w, tmp1_w, tmp2_w, tmp3_w);
+  BUTTERFLY_4(tmp0_w, tmp1_w, tmp2_w, tmp3_w, vec4_r, vec6_r, vec7_r, vec5_r);
+  ADD4(vec0_l, vec7_l, vec1_l, vec6_l, vec2_l, vec5_l, vec3_l, vec4_l,
+       vec0_r, vec1_r, vec2_r, vec3_r);
+
+  tmp3_w = vec0_r + vec3_r;
+  vec0_r = vec0_r - vec3_r;
+  vec3_r = vec1_r + vec2_r;
+  vec1_r = vec1_r - vec2_r;
+
+  VP9_DOTP_CONST_PAIR_W(vec4_r, vec6_r, tmp3_w, vec3_r, cospi_16_64,
+                        cospi_16_64, vec4_r, tmp3_w, vec6_r, vec3_r);
+  VP9_FDCT32_POSTPROC_NEG_W(vec4_r);
+  VP9_FDCT32_POSTPROC_NEG_W(tmp3_w);
+  VP9_FDCT32_POSTPROC_NEG_W(vec6_r);
+  VP9_FDCT32_POSTPROC_NEG_W(vec3_r);
+  PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
+  ST_SH2(vec5, vec4, out, 8);
+
+  VP9_DOTP_CONST_PAIR_W(vec5_r, vec7_r, vec0_r, vec1_r, cospi_24_64,
+                        cospi_8_64, vec4_r, tmp3_w, vec6_r, vec3_r);
+  VP9_FDCT32_POSTPROC_NEG_W(vec4_r);
+  VP9_FDCT32_POSTPROC_NEG_W(tmp3_w);
+  VP9_FDCT32_POSTPROC_NEG_W(vec6_r);
+  VP9_FDCT32_POSTPROC_NEG_W(vec3_r);
+  PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
+  ST_SH2(vec5, vec4, out + 16, 8);
+
+  LD_SH8(interm_ptr, 8, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7);
+  SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+  VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, in5, in4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 32);
+  ST_SH(in5, out + 56);
+
+  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, in5, in4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 40);
+  ST_SH(in5, out + 48);
+
+  LD_SH8(interm_ptr + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+  VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+  VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+  ADD2(in0, in1, in2, in3, vec0, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, in5, in4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 64);
+  ST_SH(in5, out + 120);
+
+  SUB2(in0, in1, in2, in3, in0, in2);
+  VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, in5, in4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 72);
+  ST_SH(in5, out + 112);
+
+  SUB2(in9, vec2, in14, vec5, vec2, vec5);
+  VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, in5, in4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 80);
+  ST_SH(in5, out + 104);
+
+  ADD2(in3, in2, in0, in1, vec3, vec4);
+  VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, in4, in5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+  ST_SH(in4, out + 96);
+  ST_SH(in5, out + 88);
+}
+
+static void fdct8x32_1d_row_even(int16_t *temp, int16_t *out) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1;
+
+  /* fdct32 even */
+  /* stage 2 */
+  LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+
+  /* Stage 3 */
+  ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+  BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0);
+  VP9_DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out);
+  ST_SH(temp1, out + 8);
+
+  VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 16);
+  ST_SH(temp1, out + 24);
+
+  SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+  VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 32);
+  ST_SH(temp1, out + 56);
+
+  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 40);
+  ST_SH(temp1, out + 48);
+
+  VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+  VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+  ADD2(in0, in1, in2, in3, vec0, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 64);
+  ST_SH(temp1, out + 120);
+
+  SUB2(in0, in1, in2, in3, in0, in2);
+  VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 72);
+  ST_SH(temp1, out + 112);
+
+  SUB2(in9, vec2, in14, vec5, vec2, vec5);
+  VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5)
+  VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 80);
+  ST_SH(temp1, out + 104);
+
+  ADD2(in3, in2, in0, in1, vec3, vec4);
+  VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+  VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+  ST_SH(temp0, out + 96);
+  ST_SH(temp1, out + 88);
+}
+
+static void fdct8x32_1d_row_odd(int16_t *temp, int16_t *interm_ptr,
+                                int16_t *out) {
+  v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+  v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5;
+
+  in20 = LD_SH(temp + 32);
+  in21 = LD_SH(temp + 40);
+  in26 = LD_SH(temp + 80);
+  in27 = LD_SH(temp + 88);
+
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+  in18 = LD_SH(temp + 16);
+  in19 = LD_SH(temp + 24);
+  in28 = LD_SH(temp + 96);
+  in29 = LD_SH(temp + 104);
+
+  vec4 = in19 - in20;
+  ST_SH(vec4, interm_ptr + 32);
+  vec4 = in18 - in21;
+  ST_SH(vec4, interm_ptr + 88);
+  vec4 = in28 - in27;
+  ST_SH(vec4, interm_ptr + 56);
+  vec4 = in29 - in26;
+  ST_SH(vec4, interm_ptr + 64);
+
+  ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26);
+
+  in22 = LD_SH(temp + 48);
+  in23 = LD_SH(temp + 56);
+  in24 = LD_SH(temp + 64);
+  in25 = LD_SH(temp + 72);
+
+  VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+  VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+
+  in16 = LD_SH(temp);
+  in17 = LD_SH(temp + 8);
+  in30 = LD_SH(temp + 112);
+  in31 = LD_SH(temp + 120);
+
+  vec4 = in17 - in22;
+  ST_SH(vec4, interm_ptr + 40);
+  vec4 = in30 - in25;
+  ST_SH(vec4, interm_ptr + 48);
+  vec4 = in31 - in24;
+  ST_SH(vec4, interm_ptr + 72);
+  vec4 = in16 - in23;
+  ST_SH(vec4, interm_ptr + 80);
+
+  ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+
+  ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+  ADD2(in27, in26, in25, in24, in23, in20);
+
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec5, out);
+  ST_SH(vec4, out + 120);
+
+  SUB2(in27, in26, in25, in24, in22, in21);
+
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec5, out + 112);
+  ST_SH(vec4, out + 8);
+
+  SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+  VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+  SUB2(in26, in27, in24, in25, in23, in20);
+
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec4, out + 16);
+  ST_SH(vec5, out + 104);
+
+  ADD2(in26, in27, in24, in25, in22, in21);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec4, out + 24);
+  ST_SH(vec5, out + 96);
+
+  in20 = LD_SH(interm_ptr + 32);
+  in21 = LD_SH(interm_ptr + 88);
+  in27 = LD_SH(interm_ptr + 56);
+  in26 = LD_SH(interm_ptr + 64);
+
+  in16 = in20;
+  in17 = in21;
+  VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+
+  in22 = LD_SH(interm_ptr + 40);
+  in25 = LD_SH(interm_ptr + 48);
+  in24 = LD_SH(interm_ptr + 72);
+  in23 = LD_SH(interm_ptr + 80);
+
+  SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+  ADD2(in28, in29, in31, in30, in16, in19);
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec5, out + 32);
+  ST_SH(vec4, out + 88);
+
+  SUB2(in28, in29, in31, in30, in17, in18);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec5, out + 40);
+  ST_SH(vec4, out + 80);
+
+  ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+  VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+  SUB2(in29, in28, in30, in31, in16, in19);
+
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec5, out + 72);
+  ST_SH(vec4, out + 48);
+
+  ADD2(in29, in28, in30, in31, in17, in18);
+
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+  ST_SH(vec4, out + 56);
+  ST_SH(vec5, out + 64);
+}
+
+static void fdct8x32_1d_row_transpose_store(int16_t *temp, int16_t *output) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1;
+
+  /* 1st set */
+  in0 = LD_SH(temp);
+  in4 = LD_SH(temp + 32);
+  in2 = LD_SH(temp + 64);
+  in6 = LD_SH(temp + 96);
+  in1 = LD_SH(temp + 128);
+  in7 = LD_SH(temp + 152);
+  in3 = LD_SH(temp + 192);
+  in5 = LD_SH(temp + 216);
+
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+
+  /* 2nd set */
+  in0_1 = LD_SH(temp + 16);
+  in1_1 = LD_SH(temp + 232);
+  in2_1 = LD_SH(temp + 80);
+  in3_1 = LD_SH(temp + 168);
+  in4_1 = LD_SH(temp + 48);
+  in5_1 = LD_SH(temp + 176);
+  in6_1 = LD_SH(temp + 112);
+  in7_1 = LD_SH(temp + 240);
+
+  ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 32);
+  TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+                     in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1);
+
+  /* 3rd set */
+  in0 = LD_SH(temp + 8);
+  in1 = LD_SH(temp + 136);
+  in2 = LD_SH(temp + 72);
+  in3 = LD_SH(temp + 200);
+  in4 = LD_SH(temp + 40);
+  in5 = LD_SH(temp + 208);
+  in6 = LD_SH(temp + 104);
+  in7 = LD_SH(temp + 144);
+
+  ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+         output + 8, 32);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output + 16, 32);
+
+  /* 4th set */
+  in0_1 = LD_SH(temp + 24);
+  in1_1 = LD_SH(temp + 224);
+  in2_1 = LD_SH(temp + 88);
+  in3_1 = LD_SH(temp + 160);
+  in4_1 = LD_SH(temp + 56);
+  in5_1 = LD_SH(temp + 184);
+  in6_1 = LD_SH(temp + 120);
+  in7_1 = LD_SH(temp + 248);
+
+  TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+                     in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1);
+  ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+         output + 24, 32);
+}
+
+static void fdct32x8_1d_row(int16_t *temp, int16_t *temp_buf,
+                            int16_t *output) {
+  fdct8x32_1d_row_load_butterfly(temp, temp_buf);
+  fdct8x32_1d_row_even(temp_buf, temp_buf);
+  fdct8x32_1d_row_odd(temp_buf + 128, temp, temp_buf + 128);
+  fdct8x32_1d_row_transpose_store(temp_buf, output);
+}
+
+static void fdct32x8_1d_row_4x(int16_t *tmp_buf_big, int16_t *tmp_buf,
+                               int16_t *output) {
+  fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf);
+  fdct8x32_1d_row_even_4x(tmp_buf, tmp_buf_big, tmp_buf);
+  fdct8x32_1d_row_odd(tmp_buf + 128, tmp_buf_big, tmp_buf + 128);
+  fdct8x32_1d_row_transpose_store(tmp_buf, output);
+}
+
+void vp9_fdct32x32_msa(const int16_t *input, int16_t *output,
+                       int32_t src_stride) {
+  int32_t i;
+  DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
+  DECLARE_ALIGNED(32, int16_t, tmp_buf[256]);
+
+  /* column transform */
+  for (i = 0; i < 4; ++i) {
+    fdct8x32_1d_column(input + (8 * i), src_stride, tmp_buf,
+                       tmp_buf_big + (8 * i));
+  }
+
+  /* row transform */
+  fdct32x8_1d_row_4x(tmp_buf_big, tmp_buf, output);
+
+  /* row transform */
+  for (i = 1; i < 4; ++i) {
+    fdct32x8_1d_row(tmp_buf_big + (i * 256), tmp_buf, output + (i * 256));
+  }
+}
+
+void vp9_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+  out[1] = 0;
+
+  out[0] = VP9_LD_HADD(input, stride);
+  out[0] += VP9_LD_HADD(input + 8, stride);
+  out[0] += VP9_LD_HADD(input + 16, stride);
+  out[0] += VP9_LD_HADD(input + 24, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 8, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 8 + 8, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 8 + 16, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 8 + 24, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 16, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 16 + 8, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 16 + 16, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 16 + 24, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 24, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 24 + 8, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 24 + 16, stride);
+  out[0] += VP9_LD_HADD(input + 32 * 24 + 24, stride);
+  out[0] >>= 3;
+}
+
+static void fdct8x32_1d_row_even_rd(int16_t *temp, int16_t *out) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1;
+
+  /* fdct32 even */
+  /* stage 2 */
+  LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+  LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+               in8, in9, in10, in11, in12, in13, in14, in15,
+               vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+               in8, in9, in10, in11, in12, in13, in14, in15);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec0, vec1);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec2, vec3);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec4, vec5);
+  VP9_FDCT_POSTPROC_2V_NEG_H(vec6, vec7);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in8, in9);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in10, in11);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in12, in13);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in14, in15);
+
+  /* Stage 3 */
+  ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+
+  temp0 = in0 + in3;
+  in0 = in0 - in3;
+  in3 = in1 + in2;
+  in1 = in1 - in2;
+
+  VP9_DOTP_CONST_PAIR(temp0, in3, cospi_16_64, cospi_16_64, temp1, temp0);
+  ST_SH(temp0, out);
+  ST_SH(temp1, out + 8);
+
+  VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+  ST_SH(temp0, out + 16);
+  ST_SH(temp1, out + 24);
+
+  SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+  VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+  VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+  ST_SH(temp0, out + 32);
+  ST_SH(temp1, out + 56);
+
+  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+  ST_SH(temp0, out + 40);
+  ST_SH(temp1, out + 48);
+
+  VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+  VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+  ADD2(in0, in1, in2, in3, vec0, vec7);
+  VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+  ST_SH(temp0, out + 64);
+  ST_SH(temp1, out + 120);
+
+  SUB2(in0, in1, in2, in3, in0, in2);
+  VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+  ST_SH(temp0, out + 72);
+  ST_SH(temp1, out + 112);
+
+  SUB2(in9, vec2, in14, vec5, vec2, vec5);
+  VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+  VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+  ST_SH(temp0, out + 80);
+  ST_SH(temp1, out + 104);
+
+  ADD2(in3, in2, in0, in1, vec3, vec4);
+  VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+  ST_SH(temp0, out + 96);
+  ST_SH(temp1, out + 88);
+}
+
+static void fdct8x32_1d_row_odd_rd(int16_t *temp, int16_t *interm_ptr,
+                                   int16_t *out) {
+  v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+  v8i16 in24, in25, in26, in27, in28, in29, in30, in31;
+  v8i16 vec4, vec5;
+
+  in20 = LD_SH(temp + 32);
+  in21 = LD_SH(temp + 40);
+  in26 = LD_SH(temp + 80);
+  in27 = LD_SH(temp + 88);
+
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+  VP9_FDCT_POSTPROC_2V_NEG_H(in20, in21);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in26, in27);
+
+  in18 = LD_SH(temp + 16);
+  in19 = LD_SH(temp + 24);
+  in28 = LD_SH(temp + 96);
+  in29 = LD_SH(temp + 104);
+
+  VP9_FDCT_POSTPROC_2V_NEG_H(in18, in19);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in28, in29);
+
+  vec4 = in19 - in20;
+  ST_SH(vec4, interm_ptr + 32);
+  vec4 = in18 - in21;
+  ST_SH(vec4, interm_ptr + 88);
+  vec4 = in29 - in26;
+  ST_SH(vec4, interm_ptr + 64);
+  vec4 = in28 - in27;
+  ST_SH(vec4, interm_ptr + 56);
+
+  ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26);
+
+  in22 = LD_SH(temp + 48);
+  in23 = LD_SH(temp + 56);
+  in24 = LD_SH(temp + 64);
+  in25 = LD_SH(temp + 72);
+
+  VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+  VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in22, in23);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in24, in25);
+
+  in16 = LD_SH(temp);
+  in17 = LD_SH(temp + 8);
+  in30 = LD_SH(temp + 112);
+  in31 = LD_SH(temp + 120);
+
+  VP9_FDCT_POSTPROC_2V_NEG_H(in16, in17);
+  VP9_FDCT_POSTPROC_2V_NEG_H(in30, in31);
+
+  vec4 = in17 - in22;
+  ST_SH(vec4, interm_ptr + 40);
+  vec4 = in30 - in25;
+  ST_SH(vec4, interm_ptr + 48);
+  vec4 = in31 - in24;
+  ST_SH(vec4, interm_ptr + 72);
+  vec4 = in16 - in23;
+  ST_SH(vec4, interm_ptr + 80);
+
+  ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+  VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+  VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+  ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+  ADD2(in27, in26, in25, in24, in23, in20);
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+  ST_SH(vec5, out);
+  ST_SH(vec4, out + 120);
+
+  SUB2(in27, in26, in25, in24, in22, in21);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+  ST_SH(vec5, out + 112);
+  ST_SH(vec4, out + 8);
+
+  SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+  VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+  SUB2(in26, in27, in24, in25, in23, in20);
+  VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+  ST_SH(vec4, out + 16);
+  ST_SH(vec5, out + 104);
+
+  ADD2(in26, in27, in24, in25, in22, in21);
+  VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+  ST_SH(vec4, out + 24);
+  ST_SH(vec5, out + 96);
+
+  in20 = LD_SH(interm_ptr + 32);
+  in21 = LD_SH(interm_ptr + 88);
+  in27 = LD_SH(interm_ptr + 56);
+  in26 = LD_SH(interm_ptr + 64);
+
+  in16 = in20;
+  in17 = in21;
+  VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+  VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+
+  in22 = LD_SH(interm_ptr + 40);
+  in25 = LD_SH(interm_ptr + 48);
+  in24 = LD_SH(interm_ptr + 72);
+  in23 = LD_SH(interm_ptr + 80);
+
+  SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+  in16 = in28 + in29;
+  in19 = in31 + in30;
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+  ST_SH(vec5, out + 32);
+  ST_SH(vec4, out + 88);
+
+  SUB2(in28, in29, in31, in30, in17, in18);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+  ST_SH(vec5, out + 40);
+  ST_SH(vec4, out + 80);
+
+  ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+  VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+  SUB2(in29, in28, in30, in31, in16, in19);
+  VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+  ST_SH(vec5, out + 72);
+  ST_SH(vec4, out + 48);
+
+  ADD2(in29, in28, in30, in31, in17, in18);
+  VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+  ST_SH(vec4, out + 56);
+  ST_SH(vec5, out + 64);
+}
+
+static void fdct32x8_1d_row_rd(int16_t *tmp_buf_big, int16_t *tmp_buf,
+                               int16_t *output) {
+  fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf);
+  fdct8x32_1d_row_even_rd(tmp_buf, tmp_buf);
+  fdct8x32_1d_row_odd_rd((tmp_buf + 128), tmp_buf_big, (tmp_buf + 128));
+  fdct8x32_1d_row_transpose_store(tmp_buf, output);
+}
+
+void vp9_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
+                          int32_t src_stride) {
+  int32_t i;
+  DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
+  DECLARE_ALIGNED(32, int16_t, tmp_buf[256]);
+
+  /* column transform */
+  for (i = 0; i < 4; ++i) {
+    fdct8x32_1d_column(input + (8 * i), src_stride, &tmp_buf[0],
+                       &tmp_buf_big[0] + (8 * i));
+  }
+
+  /* row transform */
+  for (i = 0; i < 4; ++i) {
+    fdct32x8_1d_row_rd(&tmp_buf_big[0] + (8 * i * 32), &tmp_buf[0],
+                       out + (8 * i * 32));
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
new file mode 100644
index 00000000000..790b4fb8d79
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
@@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+void vp9_fwht4x4_msa(const int16_t *input, int16_t *output,
+                     int32_t src_stride) {
+  v8i16 in0, in1, in2, in3, in4;
+
+  LD_SH4(input, src_stride, in0, in1, in2, in3);
+
+  in0 += in1;
+  in3 -= in2;
+  in4 = (in0 - in3) >> 1;
+  SUB2(in4, in1, in4, in2, in1, in2);
+  in0 -= in2;
+  in3 += in1;
+
+  TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
+
+  in0 += in2;
+  in1 -= in3;
+  in4 = (in0 - in1) >> 1;
+  SUB2(in4, in2, in4, in3, in2, in3);
+  in0 -= in3;
+  in1 += in2;
+
+  SLLI_4V(in0, in1, in2, in3, 2);
+
+  TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
+
+  ST4x2_UB(in0, output, 4);
+  ST4x2_UB(in3, output + 4, 4);
+  ST4x2_UB(in1, output + 8, 4);
+  ST4x2_UB(in2, output + 12, 4);
+}
+
+void vp9_fdct4x4_msa(const int16_t *input, int16_t *output,
+                     int32_t src_stride) {
+  v8i16 in0, in1, in2, in3;
+
+  LD_SH4(input, src_stride, in0, in1, in2, in3);
+
+  /* fdct4 pre-process */
+  {
+    v8i16 vec, mask;
+    v16i8 zero = { 0 };
+    v16i8 one = __msa_ldi_b(1);
+
+    mask = (v8i16)__msa_sldi_b(zero, one, 15);
+    SLLI_4V(in0, in1, in2, in3, 4);
+    vec = __msa_ceqi_h(in0, 0);
+    vec = vec ^ 255;
+    vec = mask & vec;
+    in0 += vec;
+  }
+
+  VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+  VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+  ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+  SRA_4V(in0, in1, in2, in3, 2);
+  PCKEV_D2_SH(in1, in0, in3, in2, in0, in2);
+  ST_SH2(in0, in2, output, 8);
+}
+
+void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
+                    int32_t tx_type) {
+  v8i16 in0, in1, in2, in3;
+
+  LD_SH4(input, stride, in0, in1, in2, in3);
+
+  /* fdct4 pre-process */
+  {
+    v8i16 temp, mask;
+    v16i8 zero = { 0 };
+    v16i8 one = __msa_ldi_b(1);
+
+    mask = (v8i16)__msa_sldi_b(zero, one, 15);
+    SLLI_4V(in0, in1, in2, in3, 4);
+    temp = __msa_ceqi_h(in0, 0);
+    temp = (v8i16)__msa_xori_b((v16u8)temp, 255);
+    temp = mask & temp;
+    in0 += temp;
+  }
+
+  switch (tx_type) {
+    case DCT_DCT:
+      VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_DCT:
+      VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case DCT_ADST:
+      VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_ADST:
+      VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+  ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+  SRA_4V(in0, in1, in2, in3, 2);
+  PCKEV_D2_SH(in1, in0, in3, in2, in0, in2);
+  ST_SH2(in0, in2, output, 8);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
new file mode 100644
index 00000000000..68e65723abb
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+void vp9_fdct8x8_msa(const int16_t *input, int16_t *output,
+                     int32_t src_stride) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  LD_SH8(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7);
+  SLLI_4V(in0, in1, in2, in3, 2);
+  SLLI_4V(in4, in5, in6, in7, 2);
+  VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+            in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+            in0, in1, in2, in3, in4, in5, in6, in7);
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
+  ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
+}
+
+void vp9_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+  out[0] = VP9_LD_HADD(input, stride);
+  out[1] = 0;
+}
+
+void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
+                    int32_t tx_type) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+  SLLI_4V(in0, in1, in2, in3, 2);
+  SLLI_4V(in4, in5, in6, in7, 2);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case ADST_DCT:
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case DCT_ADST:
+      VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    case ADST_ADST:
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                         in0, in1, in2, in3, in4, in5, in6, in7);
+      VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                     in0, in1, in2, in3, in4, in5, in6, in7);
+  VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
+  ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
new file mode 100644
index 00000000000..ad66576b6e5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
@@ -0,0 +1,548 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+#define VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) {  \
+  v8i16 k0_m = __msa_fill_h(cnst0);                                  \
+  v4i32 s0_m, s1_m, s2_m, s3_m;                                      \
+                                                                     \
+  s0_m = (v4i32)__msa_fill_h(cnst1);                                 \
+  k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m);                           \
+                                                                     \
+  ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m);                            \
+  ILVRL_H2_SW(reg0, reg1, s3_m, s2_m);                               \
+  DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m);                   \
+  SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                           \
+  out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);                    \
+                                                                     \
+  DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m);                   \
+  SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                           \
+  out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);                    \
+}
+
+#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7,  \
+                                  dst0, dst1, dst2, dst3) {                \
+  v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m;                                 \
+  v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m;                                 \
+                                                                           \
+  DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5,                      \
+              tp0_m, tp2_m, tp3_m, tp4_m);                                 \
+  DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7,                      \
+              tp5_m, tp6_m, tp7_m, tp8_m);                                 \
+  BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m);     \
+  BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m);     \
+  SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS);                 \
+  SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS);                 \
+  PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m,      \
+              dst0, dst1, dst2, dst3);                                     \
+}
+
+#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({   \
+  v8i16 dst_m;                                        \
+  v4i32 tp0_m, tp1_m;                                 \
+                                                      \
+  DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m);      \
+  SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS);          \
+  dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m);  \
+                                                      \
+  dst_m;                                              \
+})
+
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,                   \
+                  out0, out1, out2, out3, out4, out5, out6, out7) {         \
+  v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m;                        \
+  v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m;                         \
+  v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64,      \
+                     cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 };  \
+  v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64,    \
+                     cospi_24_64, -cospi_24_64, 0, 0 };                     \
+                                                                            \
+  SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m);                           \
+  cnst2_m = -cnst0_m;                                                       \
+  ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);        \
+  SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m);                           \
+  cnst4_m = -cnst2_m;                                                       \
+  ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);        \
+                                                                            \
+  ILVRL_H2_SH(in0, in7, vec1_m, vec0_m);                                    \
+  ILVRL_H2_SH(in4, in3, vec3_m, vec2_m);                                    \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,        \
+                            cnst1_m, cnst2_m, cnst3_m, in7, in0,            \
+                            in4, in3);                                      \
+                                                                            \
+  SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m);                           \
+  cnst2_m = -cnst0_m;                                                       \
+  ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);        \
+  SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m);                           \
+  cnst4_m = -cnst2_m;                                                       \
+  ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);        \
+                                                                            \
+  ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                    \
+  ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                    \
+                                                                            \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,        \
+                            cnst1_m, cnst2_m, cnst3_m, in5, in2,            \
+                            in6, in1);                                      \
+  BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5);                    \
+  out7 = -s0_m;                                                             \
+  out0 = s1_m;                                                              \
+                                                                            \
+  SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m);   \
+                                                                            \
+  ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m);        \
+  cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                \
+  cnst1_m = cnst0_m;                                                        \
+                                                                            \
+  ILVRL_H2_SH(in4, in3, vec1_m, vec0_m);                                    \
+  ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                    \
+  VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,        \
+                            cnst2_m, cnst3_m, cnst1_m, out1, out6,          \
+                            s0_m, s1_m);                                    \
+                                                                            \
+  SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                           \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                \
+                                                                            \
+  ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                    \
+  ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m);                                  \
+  out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                \
+  out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);                \
+  out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);                \
+  out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);                \
+                                                                            \
+  out1 = -out1;                                                             \
+  out3 = -out3;                                                             \
+  out5 = -out5;                                                             \
+}
+
+#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) {                \
+  v4i32 madd0_m, madd1_m, madd2_m, madd3_m;                         \
+  v8i16 madd_s0_m, madd_s1_m;                                       \
+                                                                    \
+  ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m);                        \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m,           \
+              c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m);  \
+  SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS);  \
+  PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1);      \
+}
+
+#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3,     \
+                    out0, out1, out2, out3) {                           \
+  v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                     \
+  v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m;                     \
+                                                                        \
+  ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m);                        \
+  ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m);                        \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+              cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+              m4_m, m5_m, tmp3_m, tmp2_m);                              \
+  SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
+  PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1);                  \
+  DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+              cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+  BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+              m4_m, m5_m, tmp3_m, tmp2_m);                              \
+  SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
+  PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3);                  \
+}
+
+#define VP9_LD_HADD(psrc, stride) ({                                  \
+  v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m;       \
+  v4i32 vec_w_m;                                                      \
+                                                                      \
+  LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m);                 \
+  ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m);                     \
+  LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m);  \
+  ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m,        \
+       in4_m, in6_m, in0_m, in4_m);                                   \
+  in0_m += in4_m;                                                     \
+                                                                      \
+  vec_w_m = __msa_hadd_s_w(in0_m, in0_m);                             \
+  HADD_SW_S32(vec_w_m);                                               \
+})
+
+#define VP9_FDCT_POSTPROC_2V_NEG_H(vec0, vec1) {  \
+  v8i16 tp0_m, tp1_m;                             \
+  v8i16 one_m = __msa_ldi_h(1);                   \
+                                                  \
+  tp0_m = __msa_clti_s_h(vec0, 0);                \
+  tp1_m = __msa_clti_s_h(vec1, 0);                \
+  vec0 += 1;                                      \
+  vec1 += 1;                                      \
+  tp0_m = one_m & tp0_m;                          \
+  tp1_m = one_m & tp1_m;                          \
+  vec0 += tp0_m;                                  \
+  vec1 += tp1_m;                                  \
+  vec0 >>= 2;                                     \
+  vec1 >>= 2;                                     \
+}
+
+#define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) {     \
+  v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m;                         \
+  v8i16 vec0_m, vec1_m, vec2_m, vec3_m;                             \
+  v4i32 vec4_m, vec5_m, vec6_m, vec7_m;                             \
+  v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,          \
+                    cospi_24_64, -cospi_8_64, 0, 0, 0 };            \
+                                                                    \
+  BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m);  \
+  ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m);       \
+  SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m);                    \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                        \
+  vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m);                         \
+                                                                    \
+  SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m);                    \
+  cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m);                        \
+  vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m);                         \
+                                                                    \
+  vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m);                         \
+  cnst2_m = __msa_splati_h(coeff_m, 2);                             \
+  cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m);                        \
+  vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m);                         \
+                                                                    \
+  SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS);      \
+  PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m,       \
+              vec7_m, vec7_m, out0, out2, out1, out3);              \
+}
+
+#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) {  \
+  v4i32 s0_m, s1_m, s2_m, s3_m, constant_m;                       \
+  v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m;                       \
+                                                                  \
+  UNPCK_R_SH_SW(in0, in0_r_m);                                    \
+  UNPCK_R_SH_SW(in1, in1_r_m);                                    \
+  UNPCK_R_SH_SW(in2, in2_r_m);                                    \
+  UNPCK_R_SH_SW(in3, in3_r_m);                                    \
+                                                                  \
+  constant_m = __msa_fill_w(sinpi_4_9);                           \
+  MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m);     \
+                                                                  \
+  constant_m = __msa_fill_w(sinpi_1_9);                           \
+  s0_m += in0_r_m * constant_m;                                   \
+  s1_m -= in1_r_m * constant_m;                                   \
+                                                                  \
+  constant_m = __msa_fill_w(sinpi_2_9);                           \
+  s0_m += in1_r_m * constant_m;                                   \
+  s1_m += in3_r_m * constant_m;                                   \
+                                                                  \
+  s2_m = in0_r_m + in1_r_m - in3_r_m;                             \
+                                                                  \
+  constant_m = __msa_fill_w(sinpi_3_9);                           \
+  MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m);     \
+                                                                  \
+  in0_r_m = s0_m + s3_m;                                          \
+  s2_m = s1_m - s3_m;                                             \
+  s3_m = s1_m - s0_m + s3_m;                                      \
+                                                                  \
+  SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS);      \
+  PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m,     \
+              s3_m, s3_m, out0, out1, out2, out3);                \
+}
+
+#define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                  out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m;                    \
+  v8i16 s7_m, x0_m, x1_m, x2_m, x3_m;                                \
+  v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,           \
+                    cospi_24_64, cospi_4_64, cospi_28_64,            \
+                    cospi_12_64, cospi_20_64 };                      \
+                                                                     \
+  /* FDCT stage1 */                                                  \
+  BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,                \
+              s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m);       \
+  BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m);       \
+  ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m);                    \
+  ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m);                    \
+  SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m);                           \
+  x1_m = __msa_ilvev_h(x1_m, x0_m);                                  \
+  out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                \
+                                                                     \
+  SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m);                           \
+  x2_m = -x2_m;                                                      \
+  x2_m = __msa_ilvev_h(x3_m, x2_m);                                  \
+  out6 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                \
+                                                                     \
+  out0 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                \
+  x2_m = __msa_splati_h(coeff_m, 2);                                 \
+  x2_m = __msa_ilvev_h(x2_m, x3_m);                                  \
+  out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                \
+                                                                     \
+  /* stage2 */                                                       \
+  ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m);                               \
+                                                                     \
+  s6_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                \
+  s5_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                \
+                                                                     \
+  /* stage3 */                                                       \
+  BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m);       \
+                                                                     \
+  /* stage4 */                                                       \
+  ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m);                    \
+  ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m);                    \
+                                                                     \
+  SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m);                           \
+  x1_m = __msa_ilvev_h(x0_m, x1_m);                                  \
+  out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m);                \
+                                                                     \
+  SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m);                           \
+  x2_m = __msa_ilvev_h(x3_m, x2_m);                                  \
+  out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                \
+                                                                     \
+  x1_m = __msa_splati_h(coeff_m, 5);                                 \
+  x0_m = -x0_m;                                                      \
+  x0_m = __msa_ilvev_h(x1_m, x0_m);                                  \
+  out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m);                \
+                                                                     \
+  x2_m = __msa_splati_h(coeff_m, 6);                                 \
+  x3_m = -x3_m;                                                      \
+  x2_m = __msa_ilvev_h(x2_m, x3_m);                                  \
+  out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                \
+}
+
+#define VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) {    \
+  v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;  \
+                                                                         \
+  SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15);    \
+  SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15);    \
+  AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3,         \
+             in0, in1, in2, in3);                                        \
+  AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7,         \
+             in4, in5, in6, in7);                                        \
+}
+
+#define VP9_FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                          out0, out1, out2, out3, out4, out5, out6, out7) {  \
+  v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m;                      \
+  v8i16 x0_m, x1_m, x2_m, x3_m;                                              \
+  v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64,      \
+                    cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 };     \
+                                                                             \
+  /* FDCT stage1 */                                                          \
+  BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,                        \
+              s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m);               \
+  BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m);               \
+  ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m);                            \
+  ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m);                            \
+  SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m);                                   \
+  x1_m = __msa_ilvev_h(x1_m, x0_m);                                          \
+  out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                        \
+                                                                             \
+  SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m);                                   \
+  x2_m = -x2_m;                                                              \
+  x2_m = __msa_ilvev_h(x3_m, x2_m);                                          \
+  out6 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                        \
+                                                                             \
+  out0 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                        \
+  x2_m = __msa_splati_h(coeff_m, 2);                                         \
+  x2_m = __msa_ilvev_h(x2_m, x3_m);                                          \
+  out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                        \
+                                                                             \
+  /* stage2 */                                                               \
+  ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m);                                       \
+                                                                             \
+  s6_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                        \
+  s5_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                        \
+                                                                             \
+  /* stage3 */                                                               \
+  BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m);               \
+                                                                             \
+  /* stage4 */                                                               \
+  ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m);                            \
+  ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m);                            \
+                                                                             \
+  SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m);                                   \
+  x1_m = __msa_ilvev_h(x0_m, x1_m);                                          \
+  out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m);                        \
+                                                                             \
+  SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m);                                   \
+  x2_m = __msa_ilvev_h(x3_m, x2_m);                                          \
+  out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                        \
+                                                                             \
+  x1_m = __msa_splati_h(coeff_m, 5);                                         \
+  x0_m = -x0_m;                                                              \
+  x0_m = __msa_ilvev_h(x1_m, x0_m);                                          \
+  out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m);                        \
+                                                                             \
+  x2_m = __msa_splati_h(coeff_m, 6);                                         \
+  x3_m = -x3_m;                                                              \
+  x2_m = __msa_ilvev_h(x2_m, x3_m);                                          \
+  out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                        \
+}
+
+#define VP9_FDCT8x16_ODD(input0, input1, input2, input3,           \
+                         input4, input5, input6, input7,           \
+                         out1, out3, out5, out7,                   \
+                         out9, out11, out13, out15) {              \
+  v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m;      \
+  v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m;      \
+  v8i16 stp36_m, stp37_m, vec0_m, vec1_m;                          \
+  v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m;                    \
+  v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m;                        \
+  v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,         \
+                    cospi_24_64, -cospi_8_64, -cospi_24_64,        \
+                    cospi_12_64, cospi_20_64 };                    \
+  v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64,         \
+                     cospi_18_64, cospi_10_64, cospi_22_64,        \
+                     cospi_6_64, cospi_26_64 };                    \
+  v8i16 coeff2_m = { -cospi_2_64, -cospi_10_64, -cospi_18_64,      \
+                     -cospi_26_64, 0, 0, 0, 0 };                   \
+                                                                   \
+  /* stp 1 */                                                      \
+  ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m);      \
+  ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m);      \
+                                                                   \
+  cnst4_m = __msa_splati_h(coeff_m, 0);                            \
+  stp25_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m);    \
+                                                                   \
+  cnst5_m = __msa_splati_h(coeff_m, 1);                            \
+  cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m);                       \
+  stp22_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m);    \
+  stp24_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m);    \
+  stp23_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m);    \
+                                                                   \
+  /* stp2 */                                                       \
+  BUTTERFLY_4(input0, input1, stp22_m, stp23_m,                    \
+              stp30_m, stp31_m, stp32_m, stp33_m);                 \
+  BUTTERFLY_4(input7, input6, stp25_m, stp24_m,                    \
+              stp37_m, stp36_m, stp35_m, stp34_m);                 \
+                                                                   \
+  ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m);  \
+  ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m);  \
+                                                                   \
+  SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m);                   \
+  cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+  stp26_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);    \
+                                                                   \
+  cnst0_m = __msa_splati_h(coeff_m, 4);                            \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+  stp21_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);    \
+                                                                   \
+  SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m);                   \
+  cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+  stp25_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m);    \
+                                                                   \
+  cnst0_m = __msa_splati_h(coeff_m, 3);                            \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+  stp22_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m);    \
+                                                                   \
+  /* stp4 */                                                       \
+  BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m,                  \
+              vec6_m, vec2_m, vec4_m, vec5_m);                     \
+  BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m,                  \
+              stp21_m, stp23_m, stp24_m, stp31_m);                 \
+                                                                   \
+  ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m);                     \
+  SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m);                  \
+  cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+                                                                   \
+  out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);       \
+                                                                   \
+  cnst0_m = __msa_splati_h(coeff2_m, 0);                           \
+  cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+  out15 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);      \
+                                                                   \
+  ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m);                     \
+  SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                  \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+                                                                   \
+  out9 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);       \
+                                                                   \
+  cnst1_m = __msa_splati_h(coeff2_m, 2);                           \
+  cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+  out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);       \
+                                                                   \
+  ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m);                   \
+  SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m);                  \
+  cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+  out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);       \
+                                                                   \
+  cnst0_m = __msa_splati_h(coeff2_m, 1);                           \
+  cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+  out11 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);      \
+                                                                   \
+  ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m);                   \
+  SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m);                  \
+  cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                       \
+                                                                   \
+  out13 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);      \
+                                                                   \
+  cnst1_m = __msa_splati_h(coeff2_m, 3);                           \
+  cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                       \
+  out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);       \
+}
+
+#define VP9_FDCT32_POSTPROC_NEG_W(vec) {  \
+  v4i32 temp_m;                           \
+  v4i32 one_m = __msa_ldi_w(1);           \
+                                          \
+  temp_m = __msa_clti_s_w(vec, 0);        \
+  vec += 1;                               \
+  temp_m = one_m & temp_m;                \
+  vec += temp_m;                          \
+  vec >>= 2;                              \
+}
+
+#define VP9_FDCT32_POSTPROC_2V_POS_H(vec0, vec1) {  \
+  v8i16 tp0_m, tp1_m;                               \
+  v8i16 one = __msa_ldi_h(1);                       \
+                                                    \
+  tp0_m = __msa_clei_s_h(vec0, 0);                  \
+  tp1_m = __msa_clei_s_h(vec1, 0);                  \
+  tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255);   \
+  tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255);   \
+  vec0 += 1;                                        \
+  vec1 += 1;                                        \
+  tp0_m = one & tp0_m;                              \
+  tp1_m = one & tp1_m;                              \
+  vec0 += tp0_m;                                    \
+  vec1 += tp1_m;                                    \
+  vec0 >>= 2;                                       \
+  vec1 >>= 2;                                       \
+}
+
+#define VP9_DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right,  \
+                              reg1_right, const0, const1,        \
+                              out0, out1, out2, out3) {          \
+  v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m;          \
+  v2i64 tp0_m, tp1_m, tp2_m, tp3_m;                              \
+  v4i32 k0_m = __msa_fill_w((int32_t) const0);                   \
+                                                                 \
+  s0_m = __msa_fill_w((int32_t) const1);                         \
+  k0_m = __msa_ilvev_w(s0_m, k0_m);                              \
+                                                                 \
+  ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m);                \
+  ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m);                 \
+  ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m);              \
+  ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m);               \
+                                                                 \
+  DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m);             \
+  DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m);             \
+  tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS);                  \
+  tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS);                  \
+  tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS);                  \
+  tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS);                  \
+  out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m);              \
+  out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m);              \
+                                                                 \
+  DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m);             \
+  DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m);             \
+  tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS);                  \
+  tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS);                  \
+  tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS);                  \
+  tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS);                  \
+  out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m);              \
+  out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m);              \
+}
+#endif  /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c
new file mode 100644
index 00000000000..1b8b694ce38
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c
@@ -0,0 +1,264 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride,
+                            const uint8_t *pred_ptr, int32_t pred_stride,
+                            int16_t *diff_ptr, int32_t diff_stride) {
+  uint32_t src0, src1, src2, src3;
+  uint32_t pred0, pred1, pred2, pred3;
+  v16i8 src = { 0 };
+  v16i8 pred = { 0 };
+  v16u8 src_l0, src_l1;
+  v8i16 diff0, diff1;
+
+  LW4(src_ptr, src_stride, src0, src1, src2, src3);
+  LW4(pred_ptr, pred_stride, pred0, pred1, pred2, pred3);
+  INSERT_W4_SB(src0, src1, src2, src3, src);
+  INSERT_W4_SB(pred0, pred1, pred2, pred3, pred);
+  ILVRL_B2_UB(src, pred, src_l0, src_l1);
+  HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+  ST8x4_UB(diff0, diff1, diff_ptr, (2 * diff_stride));
+}
+
+static void sub_blk_8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
+                            const uint8_t *pred_ptr, int32_t pred_stride,
+                            int16_t *diff_ptr, int32_t diff_stride) {
+  uint32_t loop_cnt;
+  uint64_t src0, src1, pred0, pred1;
+  v16i8 src = { 0 };
+  v16i8 pred = { 0 };
+  v16u8 src_l0, src_l1;
+  v8i16 diff0, diff1;
+
+  for (loop_cnt = 4; loop_cnt--;) {
+    LD2(src_ptr, src_stride, src0, src1);
+    src_ptr += (2 * src_stride);
+    LD2(pred_ptr, pred_stride, pred0, pred1);
+    pred_ptr += (2 * pred_stride);
+
+    INSERT_D2_SB(src0, src1, src);
+    INSERT_D2_SB(pred0, pred1, pred);
+    ILVRL_B2_UB(src, pred, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff_ptr, diff_stride);
+    diff_ptr += (2 * diff_stride);
+  }
+}
+
+static void sub_blk_16x16_msa(const uint8_t *src, int32_t src_stride,
+                              const uint8_t *pred, int32_t pred_stride,
+                              int16_t *diff, int32_t diff_stride) {
+  int8_t count;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+  v16u8 src_l0, src_l1;
+  v8i16 diff0, diff1;
+
+  for (count = 2; count--;) {
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+
+    LD_SB8(pred, pred_stride,
+           pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7);
+    pred += (8 * pred_stride);
+
+    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    diff += diff_stride;
+  }
+}
+
+static void sub_blk_32x32_msa(const uint8_t *src, int32_t src_stride,
+                              const uint8_t *pred, int32_t pred_stride,
+                              int16_t *diff, int32_t diff_stride) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+  v16u8 src_l0, src_l1;
+  v8i16 diff0, diff1;
+
+  for (loop_cnt = 8; loop_cnt--;) {
+    LD_SB2(src, 16, src0, src1);
+    src += src_stride;
+    LD_SB2(src, 16, src2, src3);
+    src += src_stride;
+    LD_SB2(src, 16, src4, src5);
+    src += src_stride;
+    LD_SB2(src, 16, src6, src7);
+    src += src_stride;
+
+    LD_SB2(pred, 16, pred0, pred1);
+    pred += pred_stride;
+    LD_SB2(pred, 16, pred2, pred3);
+    pred += pred_stride;
+    LD_SB2(pred, 16, pred4, pred5);
+    pred += pred_stride;
+    LD_SB2(pred, 16, pred6, pred7);
+    pred += pred_stride;
+
+    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    diff += diff_stride;
+  }
+}
+
+static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride,
+                              const uint8_t *pred, int32_t pred_stride,
+                              int16_t *diff, int32_t diff_stride) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+  v16u8 src_l0, src_l1;
+  v8i16 diff0, diff1;
+
+  for (loop_cnt = 32; loop_cnt--;) {
+    LD_SB4(src, 16, src0, src1, src2, src3);
+    src += src_stride;
+    LD_SB4(src, 16, src4, src5, src6, src7);
+    src += src_stride;
+
+    LD_SB4(pred, 16, pred0, pred1, pred2, pred3);
+    pred += pred_stride;
+    LD_SB4(pred, 16, pred4, pred5, pred6, pred7);
+    pred += pred_stride;
+
+    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 32, 8);
+    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 48, 8);
+    diff += diff_stride;
+
+    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff, 8);
+    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 16, 8);
+    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 32, 8);
+    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+    ST_SH2(diff0, diff1, diff + 48, 8);
+    diff += diff_stride;
+  }
+}
+
+void vp9_subtract_block_msa(int32_t rows, int32_t cols,
+                            int16_t *diff_ptr, ptrdiff_t diff_stride,
+                            const uint8_t *src_ptr, ptrdiff_t src_stride,
+                            const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
+  if (rows == cols) {
+    switch (rows) {
+      case 4:
+        sub_blk_4x4_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+                        diff_ptr, diff_stride);
+        break;
+      case 8:
+        sub_blk_8x8_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+                        diff_ptr, diff_stride);
+        break;
+      case 16:
+        sub_blk_16x16_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+                          diff_ptr, diff_stride);
+        break;
+      case 32:
+        sub_blk_32x32_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+                          diff_ptr, diff_stride);
+        break;
+      case 64:
+        sub_blk_64x64_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+                          diff_ptr, diff_stride);
+        break;
+      default:
+        vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
+                             src_stride, pred_ptr, pred_stride);
+        break;
+    }
+  } else {
+    vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
+                         pred_ptr, pred_stride);
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
new file mode 100644
index 00000000000..4053bffaef2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
@@ -0,0 +1,289 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
+                                            uint32_t stride,
+                                            uint8_t *frm2_ptr,
+                                            int32_t filt_sth,
+                                            int32_t filt_wgt,
+                                            uint32_t *acc,
+                                            uint16_t *cnt) {
+  uint32_t row;
+  uint64_t f0, f1, f2, f3;
+  v16i8 frm2, frm1 = { 0 };
+  v16i8 frm4, frm3 = { 0 };
+  v16u8 frm_r, frm_l;
+  v8i16 frm2_r, frm2_l;
+  v8i16 diff0, diff1, mod0_h, mod1_h;
+  v4i32 cnst3, cnst16, filt_wt, strength;
+  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+  v4i32 acc0, acc1, acc2, acc3;
+  v8i16 cnt0, cnt1;
+
+  filt_wt = __msa_fill_w(filt_wgt);
+  strength = __msa_fill_w(filt_sth);
+  cnst3 = __msa_ldi_w(3);
+  cnst16 = __msa_ldi_w(16);
+
+  for (row = 2; row--;) {
+    LD4(frm1_ptr, stride, f0, f1, f2, f3);
+    frm1_ptr += (4 * stride);
+
+    LD_SB2(frm2_ptr, 16, frm2, frm4);
+    frm2_ptr += 32;
+
+    LD_SW2(acc, 4, acc0, acc1);
+    LD_SW2(acc + 8, 4, acc2, acc3);
+    LD_SH2(cnt, 8, cnt0, cnt1);
+
+    INSERT_D2_SB(f0, f1, frm1);
+    INSERT_D2_SB(f2, f3, frm3);
+    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
+         diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
+    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+    diff0_r = (mod0_w < cnst16);
+    diff0_l = (mod1_w < cnst16);
+    diff1_r = (mod2_w < cnst16);
+    diff1_l = (mod3_w < cnst16);
+
+    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    mod0_w = diff0_r & mod0_w;
+    mod1_w = diff0_l & mod1_w;
+    mod2_w = diff1_r & mod2_w;
+    mod3_w = diff1_l & mod3_w;
+
+    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+    ST_SH2(mod0_h, mod1_h, cnt, 8);
+    cnt += 16;
+
+    UNPCK_UB_SH(frm2, frm2_r, frm2_l);
+    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    ST_SW2(mod0_w, mod1_w, acc, 4);
+    acc += 8;
+    ST_SW2(mod2_w, mod3_w, acc, 4);
+    acc += 8;
+
+    LD_SW2(acc, 4, acc0, acc1);
+    LD_SW2(acc + 8, 4, acc2, acc3);
+    LD_SH2(cnt, 8, cnt0, cnt1);
+
+    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
+         diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
+    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+    diff0_r = (mod0_w < cnst16);
+    diff0_l = (mod1_w < cnst16);
+    diff1_r = (mod2_w < cnst16);
+    diff1_l = (mod3_w < cnst16);
+
+    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    mod0_w = diff0_r & mod0_w;
+    mod1_w = diff0_l & mod1_w;
+    mod2_w = diff1_r & mod2_w;
+    mod3_w = diff1_l & mod3_w;
+
+    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+    ST_SH2(mod0_h, mod1_h, cnt, 8);
+    cnt += 16;
+    UNPCK_UB_SH(frm4, frm2_r, frm2_l);
+    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    ST_SW2(mod0_w, mod1_w, acc, 4);
+    acc += 8;
+    ST_SW2(mod2_w, mod3_w, acc, 4);
+    acc += 8;
+  }
+}
+
+static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
+                                             uint32_t stride,
+                                             uint8_t *frm2_ptr,
+                                             int32_t filt_sth,
+                                             int32_t filt_wgt,
+                                             uint32_t *acc,
+                                             uint16_t *cnt) {
+  uint32_t row;
+  v16i8 frm1, frm2, frm3, frm4;
+  v16u8 frm_r, frm_l;
+  v16i8 zero = { 0 };
+  v8u16 frm2_r, frm2_l;
+  v8i16 diff0, diff1, mod0_h, mod1_h;
+  v4i32 cnst3, cnst16, filt_wt, strength;
+  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+  v4i32 acc0, acc1, acc2, acc3;
+  v8i16 cnt0, cnt1;
+
+  filt_wt = __msa_fill_w(filt_wgt);
+  strength = __msa_fill_w(filt_sth);
+  cnst3 = __msa_ldi_w(3);
+  cnst16 = __msa_ldi_w(16);
+
+  for (row = 8; row--;) {
+    LD_SB2(frm1_ptr, stride, frm1, frm3);
+    frm1_ptr += stride;
+
+    LD_SB2(frm2_ptr, 16, frm2, frm4);
+    frm2_ptr += 16;
+
+    LD_SW2(acc, 4, acc0, acc1);
+    LD_SW2(acc, 4, acc2, acc3);
+    LD_SH2(cnt, 8, cnt0, cnt1);
+
+    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+    diff0_r = (mod0_w < cnst16);
+    diff0_l = (mod1_w < cnst16);
+    diff1_r = (mod2_w < cnst16);
+    diff1_l = (mod3_w < cnst16);
+
+    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    mod0_w = diff0_r & mod0_w;
+    mod1_w = diff0_l & mod1_w;
+    mod2_w = diff1_r & mod2_w;
+    mod3_w = diff1_l & mod3_w;
+
+    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+    ST_SH2(mod0_h, mod1_h, cnt, 8);
+    cnt += 16;
+
+    ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
+    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    ST_SW2(mod0_w, mod1_w, acc, 4);
+    acc += 8;
+    ST_SW2(mod2_w, mod3_w, acc, 4);
+    acc += 8;
+
+    LD_SW2(acc, 4, acc0, acc1);
+    LD_SW2(acc + 8, 4, acc2, acc3);
+    LD_SH2(cnt, 8, cnt0, cnt1);
+
+    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+    diff0_r = (mod0_w < cnst16);
+    diff0_l = (mod1_w < cnst16);
+    diff1_r = (mod2_w < cnst16);
+    diff1_l = (mod3_w < cnst16);
+
+    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+
+    mod0_w = diff0_r & mod0_w;
+    mod1_w = diff0_l & mod1_w;
+    mod2_w = diff1_r & mod2_w;
+    mod3_w = diff1_l & mod3_w;
+
+    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+    ST_SH2(mod0_h, mod1_h, cnt, 8);
+    cnt += 16;
+
+    ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
+    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+         mod0_w, mod1_w, mod2_w, mod3_w);
+    ST_SW2(mod0_w, mod1_w, acc, 4);
+    acc += 8;
+    ST_SW2(mod2_w, mod3_w, acc, 4);
+    acc += 8;
+
+    frm1_ptr += stride;
+    frm2_ptr += 16;
+  }
+}
+
+void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
+                                   uint8_t *frame2_ptr, uint32_t blk_w,
+                                   uint32_t blk_h, int32_t strength,
+                                   int32_t filt_wgt, uint32_t *accu,
+                                   uint16_t *cnt) {
+  if (8 == (blk_w * blk_h)) {
+    temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr,
+                                    strength, filt_wgt, accu, cnt);
+  } else if (16 == (blk_w * blk_h)) {
+    temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr,
+                                     strength, filt_wgt, accu, cnt);
+  } else {
+    vp9_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
+                                strength, filt_wgt, accu, cnt);
+  }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
index 9622ba1d67e..bea7653d2a7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -11,6 +11,7 @@
 #include <limits.h>
 #include <math.h>
 
+#include "vp9/encoder/vp9_aq_complexity.h"
 #include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/common/vp9_seg_common.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
index c0dce6c5b7d..e9acb1ca504 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
@@ -16,6 +16,8 @@
 extern "C" {
 #endif
 
+#include "vp9/common/vp9_enums.h"
+
 struct VP9_COMP;
 struct macroblock;
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index 24b427df575..6270bf45293 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -39,6 +39,8 @@ struct CYCLIC_REFRESH {
   int rdmult;
   // Cyclic refresh map.
   signed char *map;
+  // Map of the last q a block was coded at.
+  uint8_t *last_coded_q_map;
   // Thresholds applied to the projected rate/distortion of the coding block,
   // when deciding whether block should be refreshed.
   int64_t thresh_rate_sb;
@@ -48,12 +50,14 @@ struct CYCLIC_REFRESH {
   int16_t motion_thresh;
   // Rate target ratio to set q delta.
   double rate_ratio_qdelta;
+  // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
+  int rate_boost_fac;
   double low_content_avg;
-  int qindex_delta_seg1;
-  int qindex_delta_seg2;
+  int qindex_delta[3];
 };
 
 CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+  size_t last_coded_q_map_size;
   CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
   if (cr == NULL)
     return NULL;
@@ -63,12 +67,21 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
     vpx_free(cr);
     return NULL;
   }
+  last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
+  cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+  if (cr->last_coded_q_map == NULL) {
+    vpx_free(cr);
+    return NULL;
+  }
+  assert(MAXQ <= 255);
+  memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
 
   return cr;
 }
 
 void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
   vpx_free(cr->map);
+  vpx_free(cr->last_coded_q_map);
   vpx_free(cr);
 }
 
@@ -116,7 +129,8 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
   else  if (bsize >= BLOCK_16X16 &&
             rate < cr->thresh_rate_sb &&
             is_inter_block(mbmi) &&
-            mbmi->mv[0].as_int == 0)
+            mbmi->mv[0].as_int == 0 &&
+            cr->rate_boost_fac > 10)
     // More aggressive delta-q for bigger blocks with zero motion.
     return CR_SEGMENT_ID_BOOST2;
   else
@@ -157,11 +171,11 @@ int vp9_cyclic_refresh_estimate_bits_at_q(const VP9_COMP *cpi,
                              correction_factor, cm->bit_depth) +
                              weight_segment1 *
       vp9_estimate_bits_at_q(cm->frame_type,
-                             cm->base_qindex + cr->qindex_delta_seg1, mbs,
+                             cm->base_qindex + cr->qindex_delta[1], mbs,
                              correction_factor, cm->bit_depth) +
                              weight_segment2 *
       vp9_estimate_bits_at_q(cm->frame_type,
-                             cm->base_qindex + cr->qindex_delta_seg2, mbs,
+                             cm->base_qindex + cr->qindex_delta[2], mbs,
                              correction_factor, cm->bit_depth));
   return estimated_bits;
 }
@@ -246,9 +260,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
   // copy mbmi->segment_id into global segmentation map.
   for (y = 0; y < ymis; y++)
     for (x = 0; x < xmis; x++) {
-      cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
-      cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
-          mbmi->segment_id;
+      int map_offset = block_index + y * cm->mi_cols + x;
+      cr->map[map_offset] = new_map_value;
+      cpi->segmentation_map[map_offset] = mbmi->segment_id;
+      // Inter skip blocks were clearly not coded at the current qindex, so
+      // don't update the map for them. For cases where motion is non-zero or
+      // the reference frame isn't the previous frame, the previous value in
+      // the map for this spatial location is not entirely correct.
+      if (!is_inter_block(mbmi) || !skip)
+        cr->last_coded_q_map[map_offset] = clamp(
+            cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
     }
 }
 
@@ -357,7 +378,7 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
 // 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
 // Blocks labeled as BOOST1 may later get set to BOOST2 (during the
 // encoding of the superblock).
-void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
+static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
   VP9_COMMON *const cm = &cpi->common;
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   unsigned char *const seg_map = cpi->segmentation_map;
@@ -382,6 +403,10 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
     int sb_col_index = i - sb_row_index * sb_cols;
     int mi_row = sb_row_index * MI_BLOCK_SIZE;
     int mi_col = sb_col_index * MI_BLOCK_SIZE;
+    int qindex_thresh =
+        cpi->oxcf.content == VP9E_CONTENT_SCREEN
+            ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+            : 0;
     assert(mi_row >= 0 && mi_row < cm->mi_rows);
     assert(mi_col >= 0 && mi_col < cm->mi_cols);
     bl_index = mi_row * cm->mi_cols + mi_col;
@@ -397,7 +422,8 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
         // for possible boost/refresh (segment 1). The segment id may get
         // reset to 0 later if block gets coded anything other than ZEROMV.
         if (cr->map[bl_index2] == 0) {
-          sum_map++;
+          if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
+            sum_map++;
         } else if (cr->map[bl_index2] < 0) {
           cr->map[bl_index2]++;
         }
@@ -420,18 +446,30 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
   cr->sb_index = i;
 }
 
-// Set/update global/frame level cyclic refresh parameters.
+// Set cyclic refresh parameters.
 void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
   const RATE_CONTROL *const rc = &cpi->rc;
+  const VP9_COMMON *const cm = &cpi->common;
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   cr->percent_refresh = 10;
+  cr->max_qdelta_perc = 50;
+  cr->time_for_refresh = 0;
   // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
-  // periods of the refresh cycle, after a key frame. This corresponds to ~40
-  // frames with cr->percent_refresh = 10.
-  if (rc->frames_since_key <  40)
+  // periods of the refresh cycle, after a key frame.
+  if (rc->frames_since_key <  4 * cr->percent_refresh)
     cr->rate_ratio_qdelta = 3.0;
   else
     cr->rate_ratio_qdelta = 2.0;
+  // Adjust some parameters for low resolutions at low bitrates.
+  if (cm->width <= 352 &&
+      cm->height <= 288 &&
+      rc->avg_frame_bandwidth < 3400) {
+    cr->motion_thresh = 4;
+    cr->rate_boost_fac = 10;
+  } else {
+    cr->motion_thresh = 32;
+    cr->rate_boost_fac = 17;
+  }
 }
 
 // Setup cyclic background refresh: set delta q and segmentation map.
@@ -452,16 +490,17 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     unsigned char *const seg_map = cpi->segmentation_map;
     memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
     vp9_disable_segmentation(&cm->seg);
-    if (cm->frame_type == KEY_FRAME)
+    if (cm->frame_type == KEY_FRAME) {
+      memset(cr->last_coded_q_map, MAXQ,
+             cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
       cr->sb_index = 0;
+    }
     return;
   } else {
     int qindex_delta = 0;
     int qindex2;
     const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
     vp9_clear_system_state();
-    cr->max_qdelta_perc = 50;
-    cr->time_for_refresh = 0;
     // Set rate threshold to some multiple (set to 2 for now) of the target
     // rate (target is given by sb64_target_rate and scaled by 256).
     cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
@@ -469,7 +508,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     // q will not exceed 457, so (q * q) is within 32bit; see:
     // vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[].
     cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
-    cr->motion_thresh = 32;
+
     // Set up segmentation.
     // Clear down the segment map.
     vp9_enable_segmentation(&cm->seg);
@@ -493,7 +532,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
 
     // Set the q delta for segment BOOST1.
     qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
-    cr->qindex_delta_seg1 = qindex_delta;
+    cr->qindex_delta[1] = qindex_delta;
 
     // Compute rd-mult for segment BOOST1.
     qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
@@ -503,17 +542,24 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
 
     // Set a more aggressive (higher) q delta for segment BOOST2.
-    qindex_delta = compute_deltaq(cpi, cm->base_qindex,
-                                  MIN(CR_MAX_RATE_TARGET_RATIO,
-                                      CR_BOOST2_FAC * cr->rate_ratio_qdelta));
-    cr->qindex_delta_seg2 = qindex_delta;
+    qindex_delta = compute_deltaq(
+        cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO,
+        0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
+    cr->qindex_delta[2] = qindex_delta;
     vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
 
     // Update the segmentation and refresh map.
-    vp9_cyclic_refresh_update_map(cpi);
+    cyclic_refresh_update_map(cpi);
   }
 }
 
 int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
   return cr->rdmult;
 }
+
+void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+  memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+  cr->sb_index = 0;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
index 21f114b5e59..29d2a91bc69 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -27,9 +27,6 @@ extern "C" {
 // Maximum rate target ratio for setting segment delta-qp.
 #define CR_MAX_RATE_TARGET_RATIO 4.0
 
-// Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
-#define CR_BOOST2_FAC 1.7
-
 struct VP9_COMP;
 
 struct CYCLIC_REFRESH;
@@ -78,6 +75,8 @@ void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
 
 int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
 
+void vp9_cyclic_refresh_reset_resize(struct VP9_COMP *const cpi);
+
 static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
   return segment_id == CR_SEGMENT_ID_BOOST1 ||
          segment_id == CR_SEGMENT_ID_BOOST2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
index be6f7e4ee53..f072717f1d9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -10,6 +10,8 @@
 
 #include <math.h>
 
+#include "vpx_ports/mem.h"
+
 #include "vp9/encoder/vp9_aq_variance.h"
 
 #include "vp9/common/vp9_seg_common.h"
@@ -80,6 +82,61 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
   }
 }
 
+/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
+ * of variance() and highbd_8_variance(). It should not.
+ */
+static void aq_variance(const uint8_t *a, int  a_stride,
+                        const uint8_t *b, int  b_stride,
+                        int  w, int  h, unsigned int *sse, int *sum) {
+  int i, j;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void aq_highbd_variance64(const uint8_t *a8, int  a_stride,
+                                 const uint8_t *b8, int  b_stride,
+                                 int w, int h, uint64_t *sse, uint64_t *sum) {
+  int i, j;
+
+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+static void aq_highbd_8_variance(const uint8_t *a8, int  a_stride,
+                                 const uint8_t *b8, int  b_stride,
+                                 int w, int h, unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  uint64_t sum_long = 0;
+  aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+  *sse = (unsigned int)sse_long;
+  *sum = (int)sum_long;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
                                    BLOCK_SIZE bs) {
@@ -96,18 +153,18 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
     int avg;
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      highbd_variance(x->plane[0].src.buf, x->plane[0].src.stride,
-                      CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
-                      &sse, &avg);
+      aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+                           CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
+                           &sse, &avg);
       sse >>= 2 * (xd->bd - 8);
       avg >>= (xd->bd - 8);
     } else {
-      variance(x->plane[0].src.buf, x->plane[0].src.stride,
-               vp9_64_zeros, 0, bw, bh, &sse, &avg);
+      aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+                  vp9_64_zeros, 0, bw, bh, &sse, &avg);
     }
 #else
-    variance(x->plane[0].src.buf, x->plane[0].src.stride,
-             vp9_64_zeros, 0, bw, bh, &sse, &avg);
+    aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+                vp9_64_zeros, 0, bw, bh, &sse, &avg);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     var = sse - (((int64_t)avg * avg) / (bw * bh));
     return (256 * var) / (bw * bh);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
index 95b13bb7718..3ef3882d280 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
@@ -7,6 +7,7 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_common.h"
 #include "vpx_ports/mem.h"
 
@@ -28,6 +29,8 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
   return (sum + 8) >> 4;
 }
 
+// src_diff: first pass, 9 bit, dynamic range [-255, 255]
+//           second pass, 12 bit, dynamic range [-2040, 2040]
 static void hadamard_col8(const int16_t *src_diff, int src_stride,
                           int16_t *coeff) {
   int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
@@ -64,15 +67,18 @@ void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
   int16_t buffer[64];
   int16_t *tmp_buf = &buffer[0];
   for (idx = 0; idx < 8; ++idx) {
-    hadamard_col8(src_diff, src_stride, tmp_buf);
+    hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
+                                                   // dynamic range [-255, 255]
     tmp_buf += 8;
     ++src_diff;
   }
 
   tmp_buf = &buffer[0];
   for (idx = 0; idx < 8; ++idx) {
-    hadamard_col8(tmp_buf, 8, coeff);
-    coeff += 8;
+    hadamard_col8(tmp_buf, 8, coeff);  // tmp_buf: 12 bit
+                                       // dynamic range [-2040, 2040]
+    coeff += 8;  // coeff: 15 bit
+                 // dynamic range [-16320, 16320]
     ++tmp_buf;
   }
 }
@@ -82,58 +88,68 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
                           int16_t *coeff) {
   int idx;
   for (idx = 0; idx < 4; ++idx) {
+    // src_diff: 9 bit, dynamic range [-255, 255]
     int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
                                 + (idx & 0x01) * 8;
     vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
   }
 
+  // coeff: 15 bit, dynamic range [-16320, 16320]
   for (idx = 0; idx < 64; ++idx) {
     int16_t a0 = coeff[0];
     int16_t a1 = coeff[64];
     int16_t a2 = coeff[128];
     int16_t a3 = coeff[192];
 
-    int16_t b0 = a0 + a1;
-    int16_t b1 = a0 - a1;
-    int16_t b2 = a2 + a3;
-    int16_t b3 = a2 - a3;
+    int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
+    int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
+    int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
+    int16_t b3 = (a2 - a3) >> 1;
 
-    coeff[0]   = (b0 + b2) >> 1;
-    coeff[64]  = (b1 + b3) >> 1;
-    coeff[128] = (b0 - b2) >> 1;
-    coeff[192] = (b1 - b3) >> 1;
+    coeff[0]   = b0 + b2;  // 16 bit, [-32640, 32640]
+    coeff[64]  = b1 + b3;
+    coeff[128] = b0 - b2;
+    coeff[192] = b1 - b3;
 
     ++coeff;
   }
 }
 
+// coeff: 16 bits, dynamic range [-32640, 32640].
+// length: value range {16, 64, 256, 1024}.
 int16_t vp9_satd_c(const int16_t *coeff, int length) {
   int i;
   int satd = 0;
   for (i = 0; i < length; ++i)
     satd += abs(coeff[i]);
 
+  // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
   return (int16_t)satd;
 }
 
 // Integer projection onto row vectors.
-void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
+// height: value range {16, 32, 64}.
+void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
                        const int ref_stride, const int height) {
   int idx;
-  const int norm_factor = MAX(8, height >> 1);
+  const int norm_factor = height >> 1;
   for (idx = 0; idx < 16; ++idx) {
     int i;
     hbuf[idx] = 0;
+    // hbuf[idx]: 14 bit, dynamic range [0, 16320].
     for (i = 0; i < height; ++i)
       hbuf[idx] += ref[i * ref_stride];
+    // hbuf[idx]: 9 bit, dynamic range [0, 510].
     hbuf[idx] /= norm_factor;
     ++ref;
   }
 }
 
+// width: value range {16, 32, 64}.
 int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
   int idx;
   int16_t sum = 0;
+  // sum: 14 bit, dynamic range [0, 16320]
   for (idx = 0; idx < width; ++idx)
     sum += ref[idx];
   return sum;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
index d20e067669f..1ebdd066bfe 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -93,7 +93,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
 
 static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       int segment_id, const MODE_INFO *mi, vp9_writer *w) {
-  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
     const int skip = mi->mbmi.skip;
@@ -207,10 +207,10 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
 
   // If segment level coding of this signal is disabled...
   // or the segment allows multiple reference frame options
-  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
     assert(!is_compound);
     assert(mbmi->ref_frame[0] ==
-               vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
+               get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
   } else {
     // does the feature use compound prediction or not
     // (if not specified at the frame/segment level)
@@ -264,7 +264,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
 
   skip = write_skip(cm, xd, segment_id, mi, w);
 
-  if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
     vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
@@ -293,7 +293,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
     write_ref_frames(cm, xd, w);
 
     // If segment skip is not enabled code the mode.
-    if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+    if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
       if (bsize >= BLOCK_8X8) {
         write_inter_mode(w, mode, inter_probs);
       }
@@ -403,7 +403,7 @@ static void write_partition(const VP9_COMMON *const cm,
                             int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  const vp9_prob *const probs = get_partition_probs(cm, ctx);
+  const vp9_prob *const probs = xd->partition_probs[ctx];
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
 
@@ -481,9 +481,12 @@ static void write_modes_sb(VP9_COMP *cpi,
 static void write_modes(VP9_COMP *cpi,
                         const TileInfo *const tile, vp9_writer *w,
                         TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   int mi_row, mi_col;
 
+  set_partition_probs(cm, xd);
+
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
        mi_row += MI_BLOCK_SIZE) {
     vp9_zero(xd->left_seg_context);
@@ -787,10 +790,10 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
 
     for (i = 0; i < MAX_SEGMENTS; i++) {
       for (j = 0; j < SEG_LVL_MAX; j++) {
-        const int active = vp9_segfeature_active(seg, i, j);
+        const int active = segfeature_active(seg, i, j);
         vp9_wb_write_bit(wb, active);
         if (active) {
-          const int data = vp9_get_segdata(seg, i, j);
+          const int data = get_segdata(seg, i, j);
           const int data_max = vp9_seg_feature_data_max(j);
 
           if (vp9_is_segfeature_signed(j)) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
index 9e6ca3d594c..414d2bb1502 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
@@ -14,6 +14,7 @@
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 0e74784e9b6..b4059752ee7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -13,8 +13,10 @@
 #include <stdio.h>
 
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_config.h"
 
+#include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 
 #include "vp9/common/vp9_common.h"
@@ -360,7 +362,7 @@ static void get_variance(var *v) {
       ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
 }
 
-void sum_2_variances(const var *a, const var *b, var *r) {
+static void sum_2_variances(const var *a, const var *b, var *r) {
   assert(a->log2_count == b->log2_count);
   fill_variance(a->sum_square_error + b->sum_square_error,
                 a->sum_error + b->sum_error, a->log2_count + 1, r);
@@ -368,6 +370,7 @@ void sum_2_variances(const var *a, const var *b, var *r) {
 
 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
   variance_node node;
+  memset(&node, 0, sizeof(node));
   tree_to_node(data, bsize, &node);
   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
@@ -462,46 +465,55 @@ static int set_vt_partitioning(VP9_COMP *cpi,
   return 0;
 }
 
-void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
+// Set the variance split thresholds for following the block sizes:
+// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
+// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
+// currently only used on key frame.
+static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int is_key_frame = (cm->frame_type == KEY_FRAME);
+  const int threshold_multiplier = is_key_frame ? 20 : 1;
+  const int64_t threshold_base = (int64_t)(threshold_multiplier *
+      cpi->y_dequant[q][1]);
+  if (is_key_frame) {
+    thresholds[0] = threshold_base;
+    thresholds[1] = threshold_base >> 2;
+    thresholds[2] = threshold_base >> 2;
+    thresholds[3] = threshold_base << 2;
+  } else {
+    thresholds[1] = threshold_base;
+    if (cm->width <= 352 && cm->height <= 288) {
+      thresholds[0] = threshold_base >> 2;
+      thresholds[2] = threshold_base << 3;
+    } else {
+      thresholds[0] = threshold_base;
+      thresholds[1] = (5 * threshold_base) >> 2;
+      if (cm->width >= 1920 && cm->height >= 1080)
+        thresholds[1] = (7 * threshold_base) >> 2;
+      thresholds[2] = threshold_base << cpi->oxcf.speed;
+    }
+  }
+}
+
+void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
+  VP9_COMMON *const cm = &cpi->common;
   SPEED_FEATURES *const sf = &cpi->sf;
+  const int is_key_frame = (cm->frame_type == KEY_FRAME);
   if (sf->partition_search_type != VAR_BASED_PARTITION &&
       sf->partition_search_type != REFERENCE_PARTITION) {
     return;
   } else {
-    VP9_COMMON *const cm = &cpi->common;
-    const int is_key_frame = (cm->frame_type == KEY_FRAME);
-    const int threshold_multiplier = is_key_frame ? 20 : 1;
-    const int64_t threshold_base = (int64_t)(threshold_multiplier *
-        cpi->y_dequant[q][1]);
-
-    // TODO(marpan): Allow 4x4 partitions for inter-frames.
-    // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
-    // If 4x4 partition is not used, then 8x8 partition will be selected
-    // if variance of 16x16 block is very high, so use larger threshold
-    // for 16x16 (threshold_bsize_min) in that case.
-
-    // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
-    // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
+    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+    // The thresholds below are not changed locally.
     if (is_key_frame) {
-      cpi->vbp_thresholds[0] = threshold_base;
-      cpi->vbp_thresholds[1] = threshold_base >> 2;
-      cpi->vbp_thresholds[2] = threshold_base >> 2;
-      cpi->vbp_thresholds[3] = threshold_base << 2;
       cpi->vbp_threshold_sad = 0;
       cpi->vbp_bsize_min = BLOCK_8X8;
     } else {
-      cpi->vbp_thresholds[1] = threshold_base;
-      if (cm->width <= 352 && cm->height <= 288) {
-        cpi->vbp_thresholds[0] = threshold_base >> 2;
-        cpi->vbp_thresholds[2] = threshold_base << 3;
+      if (cm->width <= 352 && cm->height <= 288)
         cpi->vbp_threshold_sad = 100;
-      } else {
-        cpi->vbp_thresholds[0] = threshold_base;
-        cpi->vbp_thresholds[1] = (5 * threshold_base) >> 2;
-        cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed;
+      else
         cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
             (cpi->y_dequant[q][1] << 1) : 1000;
-      }
       cpi->vbp_bsize_min = BLOCK_16X16;
     }
     cpi->vbp_threshold_minmax = 15 + (q >> 3);
@@ -550,23 +562,6 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
   return (minmax_max - minmax_min);
 }
 
-static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
-  VP9_COMMON *const cm = &cpi->common;
-  const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]);
-
-  // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
-  // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
-  thresholds[1] = threshold_base;
-  if (cm->width <= 352 && cm->height <= 288) {
-    thresholds[0] = threshold_base >> 2;
-    thresholds[2] = threshold_base << 3;
-  } else {
-    thresholds[0] = threshold_base;
-    thresholds[1] = (5 * threshold_base) >> 2;
-    thresholds[2] = threshold_base << cpi->oxcf.speed;
-  }
-}
-
 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -679,7 +674,7 @@ static int choose_partitioning(VP9_COMP *cpi,
 
     if (cyclic_refresh_segment_id_boosted(segment_id)) {
       int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-      modify_vbp_thresholds(cpi, thresholds, q);
+      set_vbp_thresholds(cpi, thresholds, q);
     }
   }
 
@@ -693,17 +688,28 @@ static int choose_partitioning(VP9_COMP *cpi,
   s = x->plane[0].src.buf;
   sp = x->plane[0].src.stride;
 
-  if (!is_key_frame) {
+  if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
+      cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+    // In the case of spatial/temporal scalable coding, the assumption here is
+    // that the temporal reference frame will always be of type LAST_FRAME.
+    // TODO(marpan): If that assumption is broken, we need to revisit this code.
     MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
     unsigned int uv_sad;
     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
 
-    const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+    const YV12_BUFFER_CONFIG *yv12_g = NULL;
     unsigned int y_sad, y_sad_g;
     const BLOCK_SIZE bsize = BLOCK_32X32
         + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
 
     assert(yv12 != NULL);
+
+    if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+      // For now, GOLDEN will not be used for non-zero spatial layers, since
+      // it may not be a temporal reference.
+      yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+    }
+
     if (yv12_g && yv12_g != yv12) {
       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -723,7 +729,7 @@ static int choose_partitioning(VP9_COMP *cpi,
     mbmi->mv[0].as_int = 0;
     mbmi->interp_filter = BILINEAR;
 
-    y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+    y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
     if (y_sad_g < y_sad) {
       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -838,7 +844,9 @@ static int choose_partitioning(VP9_COMP *cpi,
           }
         }
       }
-      if (is_key_frame || (low_res &&
+      // TODO(marpan): There is an issue with variance based on 4x4 average in
+      // svc mode, don't allow it for now.
+      if (is_key_frame || (low_res && !cpi->use_svc &&
           vt.split[i].split[j].part_variances.none.variance >
           (thresholds[1] << 1))) {
         force_split[split_index] = 0;
@@ -1043,7 +1051,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
   if (!output_enabled)
     return;
 
-  if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+  if (!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     for (i = 0; i < TX_MODES; i++)
       rdc->tx_select_diff[i] += ctx->tx_rd_diff[i];
   }
@@ -1240,7 +1248,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
   } else {
     if (bsize >= BLOCK_8X8) {
-      if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+      if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
         vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
                                            ctx, best_rd);
       else
@@ -1283,8 +1291,8 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
   if (!frame_is_intra_only(cm)) {
     FRAME_COUNTS *const counts = td->counts;
     const int inter_block = is_inter_block(mbmi);
-    const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
-                                                     SEG_LVL_REF_FRAME);
+    const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
+                                                 SEG_LVL_REF_FRAME);
     if (!seg_ref_active) {
       counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
       // If the segment reference feature is enabled we have only a single
@@ -1309,7 +1317,7 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
       }
     }
     if (inter_block &&
-        !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+        !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
       if (bsize >= BLOCK_8X8) {
         const PREDICTION_MODE mode = mbmi->mode;
@@ -1556,7 +1564,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
-const struct {
+static const struct {
   int row;
   int col;
 } coord_lookup[16] = {
@@ -2212,66 +2220,6 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
   *max_block_size = max_size;
 }
 
-static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
-                                 MACROBLOCKD *const xd,
-                                 int mi_row, int mi_col,
-                                 BLOCK_SIZE *min_block_size,
-                                 BLOCK_SIZE *max_block_size) {
-  VP9_COMMON *const cm = &cpi->common;
-  MODE_INFO **mi_8x8 = xd->mi;
-  const int left_in_image = xd->left_available && mi_8x8[-1];
-  const int above_in_image = xd->up_available && mi_8x8[-xd->mi_stride];
-  int row8x8_remaining = tile->mi_row_end - mi_row;
-  int col8x8_remaining = tile->mi_col_end - mi_col;
-  int bh, bw;
-  BLOCK_SIZE min_size = BLOCK_32X32;
-  BLOCK_SIZE max_size = BLOCK_8X8;
-  int bsl = mi_width_log2_lookup[BLOCK_64X64];
-  const int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
-                       get_chessboard_index(cm->current_video_frame)) & 0x1;
-  // Trap case where we do not have a prediction.
-  if (search_range_ctrl &&
-      (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
-    int block;
-    MODE_INFO **mi;
-    BLOCK_SIZE sb_type;
-
-    // Find the min and max partition sizes used in the left SB64.
-    if (left_in_image) {
-      MODE_INFO *cur_mi;
-      mi = &mi_8x8[-1];
-      for (block = 0; block < MI_BLOCK_SIZE; ++block) {
-        cur_mi = mi[block * xd->mi_stride];
-        sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
-        min_size = MIN(min_size, sb_type);
-        max_size = MAX(max_size, sb_type);
-      }
-    }
-    // Find the min and max partition sizes used in the above SB64.
-    if (above_in_image) {
-      mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
-      for (block = 0; block < MI_BLOCK_SIZE; ++block) {
-        sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
-        min_size = MIN(min_size, sb_type);
-        max_size = MAX(max_size, sb_type);
-      }
-    }
-
-    min_size = min_partition_size[min_size];
-    max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
-                                   &bh, &bw);
-    min_size = MIN(min_size, max_size);
-    min_size = MAX(min_size, BLOCK_8X8);
-    max_size = MIN(max_size, BLOCK_32X32);
-  } else {
-    min_size = BLOCK_8X8;
-    max_size = BLOCK_32X32;
-  }
-
-  *min_block_size = min_size;
-  *max_block_size = max_size;
-}
-
 // TODO(jingning) refactor functions setting partition search range
 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
                                 int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -2841,7 +2789,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
                                                  : cm->last_frame_seg_map;
       int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
-      seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
     }
 
     x->source_variance = UINT_MAX;
@@ -2901,7 +2849,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
 static int check_dual_ref_flags(VP9_COMP *cpi) {
   const int ref_flags = cpi->ref_frame_flags;
 
-  if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
+  if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
     return 0;
   } else {
     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
@@ -2937,8 +2885,7 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
   if (xd->lossless)
     return ONLY_4X4;
   if (cpi->common.frame_type == KEY_FRAME &&
-      cpi->sf.use_nonrd_pick_mode &&
-      cpi->sf.partition_search_type == VAR_BASED_PARTITION)
+      cpi->sf.use_nonrd_pick_mode)
     return ALLOW_16X16;
   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
     return ALLOW_32X32;
@@ -2976,7 +2923,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
 
   if (cm->frame_type == KEY_FRAME)
     hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
-  else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+  else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
     set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
   else if (bsize >= BLOCK_8X8)
     vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
@@ -3591,7 +3538,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
                                                  : cm->last_frame_seg_map;
       int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
-      seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
       if (seg_skip) {
         partition_search_type = FIXED_PARTITION;
       }
@@ -3624,15 +3571,26 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
         if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
             xd->mi[0]->mbmi.segment_id) {
-          x->max_partition_size = BLOCK_64X64;
+          // Use lower max_partition_size for low resoultions.
+          if (cm->width <= 352 && cm->height <= 288)
+            x->max_partition_size = BLOCK_32X32;
+          else
+            x->max_partition_size = BLOCK_64X64;
           x->min_partition_size = BLOCK_8X8;
           nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
                                BLOCK_64X64, &dummy_rdc, 1,
                                INT64_MAX, td->pc_root);
         } else {
           choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
-          nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                                 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
+          // TODO(marpan): Seems like nonrd_select_partition does not support
+          // 4x4 partition. Since 4x4 is used on key frame, use this switch
+          // for now.
+          if (cm->frame_type == KEY_FRAME)
+            nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
+                                BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
+          else
+            nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
+                                   BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
         }
 
         break;
@@ -3671,15 +3629,15 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
       if (cm->use_highbitdepth) {
         switch (cm->bit_depth) {
           case VPX_BITS_8:
-            vp9_highbd_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
                                    &var16->sse, &var16->sum);
             break;
           case VPX_BITS_10:
-            vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
                                     &var16->sse, &var16->sum);
             break;
           case VPX_BITS_12:
-            vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
                                       &var16->sse, &var16->sum);
             break;
           default:
@@ -3688,11 +3646,11 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
             return -1;
         }
       } else {
-        vp9_get16x16var(src, src_stride, last_src, last_stride,
+        vpx_get16x16var(src, src_stride, last_src, last_stride,
                         &var16->sse, &var16->sum);
       }
 #else
-      vp9_get16x16var(src, src_stride, last_src, last_stride,
+      vpx_get16x16var(src, src_stride, last_src, last_stride,
                       &var16->sse, &var16->sum);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
       var16->var = var16->sse -
@@ -3778,9 +3736,13 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
   int tile_tok = 0;
 
-  if (cpi->tile_data == NULL) {
+  if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
+    if (cpi->tile_data != NULL)
+      vpx_free(cpi->tile_data);
     CHECK_MEM_ERROR(cm, cpi->tile_data,
         vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
+    cpi->allocated_tiles = tile_cols * tile_rows;
+
     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
         TileDataEnc *tile_data =
@@ -4149,8 +4111,8 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
   MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
-  const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
-                                             SEG_LVL_SKIP);
+  const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
+                                         SEG_LVL_SKIP);
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
index 1acde0283e9..6aaa56463b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
@@ -40,7 +40,7 @@ void vp9_init_tile_data(struct VP9_COMP *cpi);
 void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
                      int tile_row, int tile_col);
 
-void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q);
+void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
index 9a4e61ec882..2829365e533 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -13,10 +13,12 @@
 #include "./vpx_config.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
 #include "vp9/common/vp9_systemdependent.h"
 
 #include "vp9/encoder/vp9_encodemb.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
index af73fcbdcc3..22759983ffa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
@@ -22,7 +22,7 @@ static struct vp9_token mv_class_encodings[MV_CLASSES];
 static struct vp9_token mv_fp_encodings[MV_FP_SIZE];
 static struct vp9_token mv_class0_encodings[CLASS0_SIZE];
 
-void vp9_entropy_mv_init() {
+void vp9_entropy_mv_init(void) {
   vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree);
   vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree);
   vp9_tokens_from_tree(mv_class0_encodings, vp9_mv_class0_tree);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
index 0ae473749ab..e8ee5ab6641 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
@@ -18,7 +18,7 @@
 extern "C" {
 #endif
 
-void vp9_entropy_mv_init();
+void vp9_entropy_mv_init(void);
 
 void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w,
                          nmv_context_counts *const counts);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index a1018adb88f..d708b83197b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -17,6 +17,7 @@
 #include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "vpx/internal/vpx_psnr.h"
+#include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 
 #include "vp9/common/vp9_alloccommon.h"
@@ -111,7 +112,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 
 // Mark all inactive blocks as active. Other segmentation features may be set
 // so memset cannot be used, instead only inactive blocks should be reset.
-void vp9_suppress_active_map(VP9_COMP *cpi) {
+static void suppress_active_map(VP9_COMP *cpi) {
   unsigned char *const seg_map = cpi->segmentation_map;
   int i;
   if (cpi->active_map.enabled || cpi->active_map.update)
@@ -120,7 +121,7 @@ void vp9_suppress_active_map(VP9_COMP *cpi) {
         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 }
 
-void vp9_apply_active_map(VP9_COMP *cpi) {
+static void apply_active_map(VP9_COMP *cpi) {
   struct segmentation *const seg = &cpi->common.seg;
   unsigned char *const seg_map = cpi->segmentation_map;
   const unsigned char *const active_map = cpi->active_map.map;
@@ -685,6 +686,29 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
   vp9_setup_pc_tree(&cpi->common, &cpi->td);
 }
 
+void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
+  cpi->framerate = framerate < 0.1 ? 30 : framerate;
+  vp9_rc_update_framerate(cpi);
+}
+
+static void set_tile_limits(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+
+  int min_log2_tile_cols, max_log2_tile_cols;
+  vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+  if (is_two_pass_svc(cpi) &&
+      (cpi->svc.encode_empty_frame_state == ENCODING ||
+      cpi->svc.number_spatial_layers > 1)) {
+    cm->log2_tile_cols = 0;
+    cm->log2_tile_rows = 0;
+  } else {
+    cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+                               min_log2_tile_cols, max_log2_tile_cols);
+    cm->log2_tile_rows = cpi->oxcf.tile_rows;
+  }
+}
+
 static void update_frame_size(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -693,6 +717,8 @@ static void update_frame_size(VP9_COMP *cpi) {
   vp9_init_context_buffers(cm);
   init_macroblockd(cm, xd);
 
+  set_tile_limits(cpi);
+
   if (is_two_pass_svc(cpi)) {
     if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
                                  cm->width, cm->height,
@@ -707,27 +733,6 @@ static void update_frame_size(VP9_COMP *cpi) {
   }
 }
 
-void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
-  cpi->framerate = framerate < 0.1 ? 30 : framerate;
-  vp9_rc_update_framerate(cpi);
-}
-
-static void set_tile_limits(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-
-  int min_log2_tile_cols, max_log2_tile_cols;
-  vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
-
-  if (is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING) {
-    cm->log2_tile_cols = 0;
-    cm->log2_tile_rows = 0;
-  } else {
-    cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
-                               min_log2_tile_cols, max_log2_tile_cols);
-    cm->log2_tile_rows = cpi->oxcf.tile_rows;
-  }
-}
-
 static void init_buffer_indices(VP9_COMP *cpi) {
   cpi->lst_fb_idx = 0;
   cpi->gld_fb_idx = 1;
@@ -751,6 +756,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
   cm->height = oxcf->height;
   vp9_alloc_compressor_data(cpi);
 
+  cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
+
   // Single thread case: use counts in common.
   cpi->td.counts = &cm->counts;
 
@@ -995,7 +1002,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X16,
                    vpx_highbd_sad32x16_bits8,
                    vpx_highbd_sad32x16_avg_bits8,
-                   vp9_highbd_variance32x16,
+                   vpx_highbd_8_variance32x16,
                    vp9_highbd_sub_pixel_variance32x16,
                    vp9_highbd_sub_pixel_avg_variance32x16,
                    NULL,
@@ -1005,7 +1012,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X32,
                    vpx_highbd_sad16x32_bits8,
                    vpx_highbd_sad16x32_avg_bits8,
-                   vp9_highbd_variance16x32,
+                   vpx_highbd_8_variance16x32,
                    vp9_highbd_sub_pixel_variance16x32,
                    vp9_highbd_sub_pixel_avg_variance16x32,
                    NULL,
@@ -1015,7 +1022,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X32,
                    vpx_highbd_sad64x32_bits8,
                    vpx_highbd_sad64x32_avg_bits8,
-                   vp9_highbd_variance64x32,
+                   vpx_highbd_8_variance64x32,
                    vp9_highbd_sub_pixel_variance64x32,
                    vp9_highbd_sub_pixel_avg_variance64x32,
                    NULL,
@@ -1025,7 +1032,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X64,
                    vpx_highbd_sad32x64_bits8,
                    vpx_highbd_sad32x64_avg_bits8,
-                   vp9_highbd_variance32x64,
+                   vpx_highbd_8_variance32x64,
                    vp9_highbd_sub_pixel_variance32x64,
                    vp9_highbd_sub_pixel_avg_variance32x64,
                    NULL,
@@ -1035,7 +1042,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X32,
                    vpx_highbd_sad32x32_bits8,
                    vpx_highbd_sad32x32_avg_bits8,
-                   vp9_highbd_variance32x32,
+                   vpx_highbd_8_variance32x32,
                    vp9_highbd_sub_pixel_variance32x32,
                    vp9_highbd_sub_pixel_avg_variance32x32,
                    vpx_highbd_sad32x32x3_bits8,
@@ -1045,7 +1052,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X64,
                    vpx_highbd_sad64x64_bits8,
                    vpx_highbd_sad64x64_avg_bits8,
-                   vp9_highbd_variance64x64,
+                   vpx_highbd_8_variance64x64,
                    vp9_highbd_sub_pixel_variance64x64,
                    vp9_highbd_sub_pixel_avg_variance64x64,
                    vpx_highbd_sad64x64x3_bits8,
@@ -1055,7 +1062,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X16,
                    vpx_highbd_sad16x16_bits8,
                    vpx_highbd_sad16x16_avg_bits8,
-                   vp9_highbd_variance16x16,
+                   vpx_highbd_8_variance16x16,
                    vp9_highbd_sub_pixel_variance16x16,
                    vp9_highbd_sub_pixel_avg_variance16x16,
                    vpx_highbd_sad16x16x3_bits8,
@@ -1065,7 +1072,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X8,
                    vpx_highbd_sad16x8_bits8,
                    vpx_highbd_sad16x8_avg_bits8,
-                   vp9_highbd_variance16x8,
+                   vpx_highbd_8_variance16x8,
                    vp9_highbd_sub_pixel_variance16x8,
                    vp9_highbd_sub_pixel_avg_variance16x8,
                    vpx_highbd_sad16x8x3_bits8,
@@ -1075,7 +1082,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X16,
                    vpx_highbd_sad8x16_bits8,
                    vpx_highbd_sad8x16_avg_bits8,
-                   vp9_highbd_variance8x16,
+                   vpx_highbd_8_variance8x16,
                    vp9_highbd_sub_pixel_variance8x16,
                    vp9_highbd_sub_pixel_avg_variance8x16,
                    vpx_highbd_sad8x16x3_bits8,
@@ -1085,7 +1092,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X8,
                    vpx_highbd_sad8x8_bits8,
                    vpx_highbd_sad8x8_avg_bits8,
-                   vp9_highbd_variance8x8,
+                   vpx_highbd_8_variance8x8,
                    vp9_highbd_sub_pixel_variance8x8,
                    vp9_highbd_sub_pixel_avg_variance8x8,
                    vpx_highbd_sad8x8x3_bits8,
@@ -1095,7 +1102,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X4,
                    vpx_highbd_sad8x4_bits8,
                    vpx_highbd_sad8x4_avg_bits8,
-                   vp9_highbd_variance8x4,
+                   vpx_highbd_8_variance8x4,
                    vp9_highbd_sub_pixel_variance8x4,
                    vp9_highbd_sub_pixel_avg_variance8x4,
                    NULL,
@@ -1105,7 +1112,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X8,
                    vpx_highbd_sad4x8_bits8,
                    vpx_highbd_sad4x8_avg_bits8,
-                   vp9_highbd_variance4x8,
+                   vpx_highbd_8_variance4x8,
                    vp9_highbd_sub_pixel_variance4x8,
                    vp9_highbd_sub_pixel_avg_variance4x8,
                    NULL,
@@ -1115,7 +1122,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X4,
                    vpx_highbd_sad4x4_bits8,
                    vpx_highbd_sad4x4_avg_bits8,
-                   vp9_highbd_variance4x4,
+                   vpx_highbd_8_variance4x4,
                    vp9_highbd_sub_pixel_variance4x4,
                    vp9_highbd_sub_pixel_avg_variance4x4,
                    vpx_highbd_sad4x4x3_bits8,
@@ -1127,7 +1134,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X16,
                    vpx_highbd_sad32x16_bits10,
                    vpx_highbd_sad32x16_avg_bits10,
-                   vp9_highbd_10_variance32x16,
+                   vpx_highbd_10_variance32x16,
                    vp9_highbd_10_sub_pixel_variance32x16,
                    vp9_highbd_10_sub_pixel_avg_variance32x16,
                    NULL,
@@ -1137,7 +1144,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X32,
                    vpx_highbd_sad16x32_bits10,
                    vpx_highbd_sad16x32_avg_bits10,
-                   vp9_highbd_10_variance16x32,
+                   vpx_highbd_10_variance16x32,
                    vp9_highbd_10_sub_pixel_variance16x32,
                    vp9_highbd_10_sub_pixel_avg_variance16x32,
                    NULL,
@@ -1147,7 +1154,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X32,
                    vpx_highbd_sad64x32_bits10,
                    vpx_highbd_sad64x32_avg_bits10,
-                   vp9_highbd_10_variance64x32,
+                   vpx_highbd_10_variance64x32,
                    vp9_highbd_10_sub_pixel_variance64x32,
                    vp9_highbd_10_sub_pixel_avg_variance64x32,
                    NULL,
@@ -1157,7 +1164,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X64,
                    vpx_highbd_sad32x64_bits10,
                    vpx_highbd_sad32x64_avg_bits10,
-                   vp9_highbd_10_variance32x64,
+                   vpx_highbd_10_variance32x64,
                    vp9_highbd_10_sub_pixel_variance32x64,
                    vp9_highbd_10_sub_pixel_avg_variance32x64,
                    NULL,
@@ -1167,7 +1174,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X32,
                    vpx_highbd_sad32x32_bits10,
                    vpx_highbd_sad32x32_avg_bits10,
-                   vp9_highbd_10_variance32x32,
+                   vpx_highbd_10_variance32x32,
                    vp9_highbd_10_sub_pixel_variance32x32,
                    vp9_highbd_10_sub_pixel_avg_variance32x32,
                    vpx_highbd_sad32x32x3_bits10,
@@ -1177,7 +1184,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X64,
                    vpx_highbd_sad64x64_bits10,
                    vpx_highbd_sad64x64_avg_bits10,
-                   vp9_highbd_10_variance64x64,
+                   vpx_highbd_10_variance64x64,
                    vp9_highbd_10_sub_pixel_variance64x64,
                    vp9_highbd_10_sub_pixel_avg_variance64x64,
                    vpx_highbd_sad64x64x3_bits10,
@@ -1187,7 +1194,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X16,
                    vpx_highbd_sad16x16_bits10,
                    vpx_highbd_sad16x16_avg_bits10,
-                   vp9_highbd_10_variance16x16,
+                   vpx_highbd_10_variance16x16,
                    vp9_highbd_10_sub_pixel_variance16x16,
                    vp9_highbd_10_sub_pixel_avg_variance16x16,
                    vpx_highbd_sad16x16x3_bits10,
@@ -1197,7 +1204,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X8,
                    vpx_highbd_sad16x8_bits10,
                    vpx_highbd_sad16x8_avg_bits10,
-                   vp9_highbd_10_variance16x8,
+                   vpx_highbd_10_variance16x8,
                    vp9_highbd_10_sub_pixel_variance16x8,
                    vp9_highbd_10_sub_pixel_avg_variance16x8,
                    vpx_highbd_sad16x8x3_bits10,
@@ -1207,7 +1214,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X16,
                    vpx_highbd_sad8x16_bits10,
                    vpx_highbd_sad8x16_avg_bits10,
-                   vp9_highbd_10_variance8x16,
+                   vpx_highbd_10_variance8x16,
                    vp9_highbd_10_sub_pixel_variance8x16,
                    vp9_highbd_10_sub_pixel_avg_variance8x16,
                    vpx_highbd_sad8x16x3_bits10,
@@ -1217,7 +1224,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X8,
                    vpx_highbd_sad8x8_bits10,
                    vpx_highbd_sad8x8_avg_bits10,
-                   vp9_highbd_10_variance8x8,
+                   vpx_highbd_10_variance8x8,
                    vp9_highbd_10_sub_pixel_variance8x8,
                    vp9_highbd_10_sub_pixel_avg_variance8x8,
                    vpx_highbd_sad8x8x3_bits10,
@@ -1227,7 +1234,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X4,
                    vpx_highbd_sad8x4_bits10,
                    vpx_highbd_sad8x4_avg_bits10,
-                   vp9_highbd_10_variance8x4,
+                   vpx_highbd_10_variance8x4,
                    vp9_highbd_10_sub_pixel_variance8x4,
                    vp9_highbd_10_sub_pixel_avg_variance8x4,
                    NULL,
@@ -1237,7 +1244,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X8,
                    vpx_highbd_sad4x8_bits10,
                    vpx_highbd_sad4x8_avg_bits10,
-                   vp9_highbd_10_variance4x8,
+                   vpx_highbd_10_variance4x8,
                    vp9_highbd_10_sub_pixel_variance4x8,
                    vp9_highbd_10_sub_pixel_avg_variance4x8,
                    NULL,
@@ -1247,7 +1254,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X4,
                    vpx_highbd_sad4x4_bits10,
                    vpx_highbd_sad4x4_avg_bits10,
-                   vp9_highbd_10_variance4x4,
+                   vpx_highbd_10_variance4x4,
                    vp9_highbd_10_sub_pixel_variance4x4,
                    vp9_highbd_10_sub_pixel_avg_variance4x4,
                    vpx_highbd_sad4x4x3_bits10,
@@ -1259,7 +1266,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X16,
                    vpx_highbd_sad32x16_bits12,
                    vpx_highbd_sad32x16_avg_bits12,
-                   vp9_highbd_12_variance32x16,
+                   vpx_highbd_12_variance32x16,
                    vp9_highbd_12_sub_pixel_variance32x16,
                    vp9_highbd_12_sub_pixel_avg_variance32x16,
                    NULL,
@@ -1269,7 +1276,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X32,
                    vpx_highbd_sad16x32_bits12,
                    vpx_highbd_sad16x32_avg_bits12,
-                   vp9_highbd_12_variance16x32,
+                   vpx_highbd_12_variance16x32,
                    vp9_highbd_12_sub_pixel_variance16x32,
                    vp9_highbd_12_sub_pixel_avg_variance16x32,
                    NULL,
@@ -1279,7 +1286,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X32,
                    vpx_highbd_sad64x32_bits12,
                    vpx_highbd_sad64x32_avg_bits12,
-                   vp9_highbd_12_variance64x32,
+                   vpx_highbd_12_variance64x32,
                    vp9_highbd_12_sub_pixel_variance64x32,
                    vp9_highbd_12_sub_pixel_avg_variance64x32,
                    NULL,
@@ -1289,7 +1296,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X64,
                    vpx_highbd_sad32x64_bits12,
                    vpx_highbd_sad32x64_avg_bits12,
-                   vp9_highbd_12_variance32x64,
+                   vpx_highbd_12_variance32x64,
                    vp9_highbd_12_sub_pixel_variance32x64,
                    vp9_highbd_12_sub_pixel_avg_variance32x64,
                    NULL,
@@ -1299,7 +1306,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_32X32,
                    vpx_highbd_sad32x32_bits12,
                    vpx_highbd_sad32x32_avg_bits12,
-                   vp9_highbd_12_variance32x32,
+                   vpx_highbd_12_variance32x32,
                    vp9_highbd_12_sub_pixel_variance32x32,
                    vp9_highbd_12_sub_pixel_avg_variance32x32,
                    vpx_highbd_sad32x32x3_bits12,
@@ -1309,7 +1316,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_64X64,
                    vpx_highbd_sad64x64_bits12,
                    vpx_highbd_sad64x64_avg_bits12,
-                   vp9_highbd_12_variance64x64,
+                   vpx_highbd_12_variance64x64,
                    vp9_highbd_12_sub_pixel_variance64x64,
                    vp9_highbd_12_sub_pixel_avg_variance64x64,
                    vpx_highbd_sad64x64x3_bits12,
@@ -1319,7 +1326,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X16,
                    vpx_highbd_sad16x16_bits12,
                    vpx_highbd_sad16x16_avg_bits12,
-                   vp9_highbd_12_variance16x16,
+                   vpx_highbd_12_variance16x16,
                    vp9_highbd_12_sub_pixel_variance16x16,
                    vp9_highbd_12_sub_pixel_avg_variance16x16,
                    vpx_highbd_sad16x16x3_bits12,
@@ -1329,7 +1336,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_16X8,
                    vpx_highbd_sad16x8_bits12,
                    vpx_highbd_sad16x8_avg_bits12,
-                   vp9_highbd_12_variance16x8,
+                   vpx_highbd_12_variance16x8,
                    vp9_highbd_12_sub_pixel_variance16x8,
                    vp9_highbd_12_sub_pixel_avg_variance16x8,
                    vpx_highbd_sad16x8x3_bits12,
@@ -1339,7 +1346,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X16,
                    vpx_highbd_sad8x16_bits12,
                    vpx_highbd_sad8x16_avg_bits12,
-                   vp9_highbd_12_variance8x16,
+                   vpx_highbd_12_variance8x16,
                    vp9_highbd_12_sub_pixel_variance8x16,
                    vp9_highbd_12_sub_pixel_avg_variance8x16,
                    vpx_highbd_sad8x16x3_bits12,
@@ -1349,7 +1356,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X8,
                    vpx_highbd_sad8x8_bits12,
                    vpx_highbd_sad8x8_avg_bits12,
-                   vp9_highbd_12_variance8x8,
+                   vpx_highbd_12_variance8x8,
                    vp9_highbd_12_sub_pixel_variance8x8,
                    vp9_highbd_12_sub_pixel_avg_variance8x8,
                    vpx_highbd_sad8x8x3_bits12,
@@ -1359,7 +1366,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_8X4,
                    vpx_highbd_sad8x4_bits12,
                    vpx_highbd_sad8x4_avg_bits12,
-                   vp9_highbd_12_variance8x4,
+                   vpx_highbd_12_variance8x4,
                    vp9_highbd_12_sub_pixel_variance8x4,
                    vp9_highbd_12_sub_pixel_avg_variance8x4,
                    NULL,
@@ -1369,7 +1376,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X8,
                    vpx_highbd_sad4x8_bits12,
                    vpx_highbd_sad4x8_avg_bits12,
-                   vp9_highbd_12_variance4x8,
+                   vpx_highbd_12_variance4x8,
                    vp9_highbd_12_sub_pixel_variance4x8,
                    vp9_highbd_12_sub_pixel_avg_variance4x8,
                    NULL,
@@ -1379,7 +1386,7 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
         HIGHBD_BFP(BLOCK_4X4,
                    vpx_highbd_sad4x4_bits12,
                    vpx_highbd_sad4x4_avg_bits12,
-                   vp9_highbd_12_variance4x4,
+                   vpx_highbd_12_variance4x4,
                    vp9_highbd_12_sub_pixel_variance4x4,
                    vp9_highbd_12_sub_pixel_avg_variance4x4,
                    vpx_highbd_sad4x4x3_bits12,
@@ -1589,6 +1596,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
                   sizeof(*cm->frame_contexts)));
 
   cpi->use_svc = 0;
+  cpi->resize_state = 0;
+  cpi->resize_avg_qp = 0;
+  cpi->resize_buffer_underflow = 0;
   cpi->common.buffer_pool = pool;
 
   init_config(cpi, oxcf);
@@ -1802,61 +1812,61 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
     cpi->fn_ptr[BT].sdx4df         = SDX4DF;
 
   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg,
-      vp9_variance32x16, vp9_sub_pixel_variance32x16,
+      vpx_variance32x16, vp9_sub_pixel_variance32x16,
       vp9_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d)
 
   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg,
-      vp9_variance16x32, vp9_sub_pixel_variance16x32,
+      vpx_variance16x32, vp9_sub_pixel_variance16x32,
       vp9_sub_pixel_avg_variance16x32, NULL, NULL, vpx_sad16x32x4d)
 
   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg,
-      vp9_variance64x32, vp9_sub_pixel_variance64x32,
+      vpx_variance64x32, vp9_sub_pixel_variance64x32,
       vp9_sub_pixel_avg_variance64x32, NULL, NULL, vpx_sad64x32x4d)
 
   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg,
-      vp9_variance32x64, vp9_sub_pixel_variance32x64,
+      vpx_variance32x64, vp9_sub_pixel_variance32x64,
       vp9_sub_pixel_avg_variance32x64, NULL, NULL, vpx_sad32x64x4d)
 
   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg,
-      vp9_variance32x32, vp9_sub_pixel_variance32x32,
+      vpx_variance32x32, vp9_sub_pixel_variance32x32,
       vp9_sub_pixel_avg_variance32x32, vpx_sad32x32x3, vpx_sad32x32x8,
       vpx_sad32x32x4d)
 
   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg,
-      vp9_variance64x64, vp9_sub_pixel_variance64x64,
+      vpx_variance64x64, vp9_sub_pixel_variance64x64,
       vp9_sub_pixel_avg_variance64x64, vpx_sad64x64x3, vpx_sad64x64x8,
       vpx_sad64x64x4d)
 
   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg,
-      vp9_variance16x16, vp9_sub_pixel_variance16x16,
+      vpx_variance16x16, vp9_sub_pixel_variance16x16,
       vp9_sub_pixel_avg_variance16x16, vpx_sad16x16x3, vpx_sad16x16x8,
       vpx_sad16x16x4d)
 
   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg,
-      vp9_variance16x8, vp9_sub_pixel_variance16x8,
+      vpx_variance16x8, vp9_sub_pixel_variance16x8,
       vp9_sub_pixel_avg_variance16x8,
       vpx_sad16x8x3, vpx_sad16x8x8, vpx_sad16x8x4d)
 
   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg,
-      vp9_variance8x16, vp9_sub_pixel_variance8x16,
+      vpx_variance8x16, vp9_sub_pixel_variance8x16,
       vp9_sub_pixel_avg_variance8x16,
       vpx_sad8x16x3, vpx_sad8x16x8, vpx_sad8x16x4d)
 
   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg,
-      vp9_variance8x8, vp9_sub_pixel_variance8x8,
+      vpx_variance8x8, vp9_sub_pixel_variance8x8,
       vp9_sub_pixel_avg_variance8x8,
       vpx_sad8x8x3, vpx_sad8x8x8, vpx_sad8x8x4d)
 
   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg,
-      vp9_variance8x4, vp9_sub_pixel_variance8x4,
+      vpx_variance8x4, vp9_sub_pixel_variance8x4,
       vp9_sub_pixel_avg_variance8x4, NULL, vpx_sad8x4x8, vpx_sad8x4x4d)
 
   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg,
-      vp9_variance4x8, vp9_sub_pixel_variance4x8,
+      vpx_variance4x8, vp9_sub_pixel_variance4x8,
       vp9_sub_pixel_avg_variance4x8, NULL, vpx_sad4x8x8, vpx_sad4x8x4d)
 
   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg,
-      vp9_variance4x4, vp9_sub_pixel_variance4x4,
+      vpx_variance4x4, vp9_sub_pixel_variance4x4,
       vp9_sub_pixel_avg_variance4x4,
       vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
 
@@ -2049,6 +2059,65 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
 #endif
 }
 
+/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
+ * and highbd_8_variance(). It should not.
+ */
+static void encoder_variance(const uint8_t *a, int  a_stride,
+                             const uint8_t *b, int  b_stride,
+                             int  w, int  h, unsigned int *sse, int *sum) {
+  int i, j;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void encoder_highbd_variance64(const uint8_t *a8, int  a_stride,
+                                      const uint8_t *b8, int  b_stride,
+                                      int w, int h, uint64_t *sse,
+                                      uint64_t *sum) {
+  int i, j;
+
+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+static void encoder_highbd_8_variance(const uint8_t *a8, int  a_stride,
+                                      const uint8_t *b8, int  b_stride,
+                                      int w, int h,
+                                      unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  uint64_t sum_long = 0;
+  encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
+                            &sse_long, &sum_long);
+  *sse = (unsigned int)sse_long;
+  *sum = (int)sum_long;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 static int64_t get_sse(const uint8_t *a, int a_stride,
                        const uint8_t *b, int b_stride,
                        int width, int height) {
@@ -2060,15 +2129,15 @@ static int64_t get_sse(const uint8_t *a, int a_stride,
   int x, y;
 
   if (dw > 0) {
-    variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
-             dw, height, &sse, &sum);
+    encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+                     dw, height, &sse, &sum);
     total_sse += sse;
   }
 
   if (dh > 0) {
-    variance(&a[(height - dh) * a_stride], a_stride,
-             &b[(height - dh) * b_stride], b_stride,
-             width - dw, dh, &sse, &sum);
+    encoder_variance(&a[(height - dh) * a_stride], a_stride,
+                     &b[(height - dh) * b_stride], b_stride,
+                     width - dw, dh, &sse, &sum);
     total_sse += sse;
   }
 
@@ -2076,7 +2145,7 @@ static int64_t get_sse(const uint8_t *a, int a_stride,
     const uint8_t *pa = a;
     const uint8_t *pb = b;
     for (x = 0; x < width / 16; ++x) {
-      vp9_mse16x16(pa, a_stride, pb, b_stride, &sse);
+      vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);
       total_sse += sse;
 
       pa += 16;
@@ -2121,21 +2190,22 @@ static int64_t highbd_get_sse(const uint8_t *a, int a_stride,
   unsigned int sse = 0;
   int sum = 0;
   if (dw > 0) {
-    highbd_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
-                    dw, height, &sse, &sum);
+    encoder_highbd_8_variance(&a[width - dw], a_stride,
+                              &b[width - dw], b_stride,
+                              dw, height, &sse, &sum);
     total_sse += sse;
   }
   if (dh > 0) {
-    highbd_variance(&a[(height - dh) * a_stride], a_stride,
-                    &b[(height - dh) * b_stride], b_stride,
-                    width - dw, dh, &sse, &sum);
+    encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
+                              &b[(height - dh) * b_stride], b_stride,
+                              width - dw, dh, &sse, &sum);
     total_sse += sse;
   }
   for (y = 0; y < height / 16; ++y) {
     const uint8_t *pa = a;
     const uint8_t *pb = b;
     for (x = 0; x < width / 16; ++x) {
-      vp9_highbd_mse16x16(pa, a_stride, pb, b_stride, &sse);
+      vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
       total_sse += sse;
       pa += 16;
       pb += 16;
@@ -2260,8 +2330,9 @@ static void generate_psnr_packet(VP9_COMP *cpi) {
     pkt.data.psnr.psnr[i] = psnr.psnr[i];
   }
   pkt.kind = VPX_CODEC_PSNR_PKT;
-  if (is_two_pass_svc(cpi))
-    cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr;
+  if (cpi->use_svc)
+    cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+        cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr;
   else
     vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
 }
@@ -2711,7 +2782,10 @@ void vp9_scale_references(VP9_COMP *cpi) {
 #if CONFIG_VP9_HIGHBITDEPTH
       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
         const int new_fb = get_free_fb(cm);
-        RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
+        RefCntBuffer *new_fb_ptr = NULL;
+        if (cm->new_fb_idx == INVALID_IDX)
+          return;
+        new_fb_ptr = &pool->frame_bufs[new_fb];
         cm->cur_frame = &pool->frame_bufs[new_fb];
         vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf,
                                  cm->width, cm->height,
@@ -2723,7 +2797,10 @@ void vp9_scale_references(VP9_COMP *cpi) {
 #else
       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
         const int new_fb = get_free_fb(cm);
-        RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
+        RefCntBuffer *new_fb_ptr = NULL;
+        if (cm->new_fb_idx == INVALID_IDX)
+          return;
+        new_fb_ptr = &pool->frame_bufs[new_fb];
         vp9_realloc_frame_buffer(&new_fb_ptr->buf,
                                  cm->width, cm->height,
                                  cm->subsampling_x, cm->subsampling_y,
@@ -2792,19 +2869,25 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
   recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
-    fprintf(f, "%10u %dx%d %10d %10d %10d %10d"
-        "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d "
-        "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
+    fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
+       "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
+       "%10"PRId64" %10"PRId64" %10d "
+       "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
         "%6d %6d %5d %5d %5d "
         "%10"PRId64" %10.3lf"
-        "%10lf %8u %10"PRId64" %10d %10d\n",
+        "%10lf %8u %10"PRId64" %10d %10d %10d\n",
         cpi->common.current_video_frame,
         cm->width, cm->height,
+        cpi->rc.source_alt_ref_pending,
+        cpi->rc.source_alt_ref_active,
         cpi->rc.this_frame_target,
         cpi->rc.projected_frame_size,
         cpi->rc.projected_frame_size / cpi->common.MBs,
         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
         cpi->rc.vbr_bits_off_target,
+        cpi->rc.vbr_bits_off_target_fast,
+        cpi->twopass.extend_minq,
+        cpi->twopass.extend_minq_fast,
         cpi->rc.total_target_vs_actual,
         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
         cpi->rc.total_actual_bits, cm->base_qindex,
@@ -2821,7 +2904,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
         cpi->twopass.bits_left /
             (1 + cpi->twopass.total_left_stats.coded_error),
         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
-        cpi->twopass.kf_zeromotion_pct);
+        cpi->twopass.kf_zeromotion_pct,
+        cpi->twopass.fr_content_type);
 
   fclose(f);
 
@@ -2933,7 +3017,7 @@ static void init_motion_estimation(VP9_COMP *cpi) {
   }
 }
 
-void set_frame_size(VP9_COMP *cpi) {
+static void set_frame_size(VP9_COMP *cpi) {
   int ref_frame;
   VP9_COMMON *const cm = &cpi->common;
   VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2951,6 +3035,31 @@ void set_frame_size(VP9_COMP *cpi) {
                          oxcf->scaled_frame_height);
   }
 
+  if (oxcf->pass == 0 &&
+      oxcf->rc_mode == VPX_CBR &&
+      !cpi->use_svc &&
+      oxcf->resize_mode == RESIZE_DYNAMIC) {
+      if (cpi->resize_state == 1) {
+        oxcf->scaled_frame_width =
+            (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
+        oxcf->scaled_frame_height =
+            (cm->height * cpi->resize_scale_num) /cpi->resize_scale_den;
+      } else if (cpi->resize_state == -1) {
+        // Go back up to original size.
+        oxcf->scaled_frame_width = oxcf->width;
+        oxcf->scaled_frame_height = oxcf->height;
+      }
+      if (cpi->resize_state != 0) {
+        // There has been a change in frame size.
+        vp9_set_size_literal(cpi,
+                             oxcf->scaled_frame_width,
+                             oxcf->scaled_frame_height);
+
+        // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+        set_mv_search_params(cpi);
+      }
+  }
+
   if ((oxcf->pass == 2) &&
       (!cpi->use_svc ||
           (is_two_pass_svc(cpi) &&
@@ -3026,11 +3135,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
 
   vp9_set_quantizer(cm, q);
-  vp9_set_vbp_thresholds(cpi, q);
+  vp9_set_variance_partition_thresholds(cpi, q);
 
   setup_frame(cpi);
 
-  vp9_suppress_active_map(cpi);
+  suppress_active_map(cpi);
   // Variance adaptive and in frame q adjustment experiments are mutually
   // exclusive.
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -3040,7 +3149,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
   } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
     vp9_cyclic_refresh_setup(cpi);
   }
-  vp9_apply_active_map(cpi);
+  apply_active_map(cpi);
 
   // transform / motion compensation build reconstruction frame
   vp9_encode_frame(cpi);
@@ -3394,7 +3503,7 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
   cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
 }
 
-int setup_interp_filter_search_mask(VP9_COMP *cpi) {
+static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
   INTERP_FILTER ifilter;
   int ref_total[MAX_REF_FRAMES] = {0};
   MV_REFERENCE_FRAME ref;
@@ -3471,34 +3580,41 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
   }
   if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
-    // Use the last frame context for the empty frame.
+    // Use context 0 for intra only empty frame, but the last frame context
+    // for other empty frames.
+    if (cpi->svc.encode_empty_frame_state == ENCODING) {
+      if (cpi->svc.encode_intra_empty_frame != 0)
+        cm->frame_context_idx = 0;
+      else
+        cm->frame_context_idx = FRAME_CONTEXTS - 1;
+    } else {
     cm->frame_context_idx =
-        (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 :
         cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
         cpi->svc.temporal_layer_id;
+    }
+
+    cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
 
     // The probs will be updated based on the frame type of its previous
     // frame if frame_parallel_decoding_mode is 0. The type may vary for
     // the frame after a key frame in base layer since we may drop enhancement
     // layers. So set frame_parallel_decoding_mode to 1 in this case.
-    if (cpi->svc.number_temporal_layers == 1) {
-      if (cpi->svc.spatial_layer_id == 0 &&
-          cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
-        cm->frame_parallel_decoding_mode = 1;
-      else
-        cm->frame_parallel_decoding_mode = 0;
-    } else if (cpi->svc.spatial_layer_id == 0) {
-      // Find the 2nd frame in temporal base layer and 1st frame in temporal
-      // enhancement layers from the key frame.
-      int i;
-      for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
-        if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
+    if (cm->frame_parallel_decoding_mode == 0) {
+      if (cpi->svc.number_temporal_layers == 1) {
+        if (cpi->svc.spatial_layer_id == 0 &&
+            cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
           cm->frame_parallel_decoding_mode = 1;
-          break;
+      } else if (cpi->svc.spatial_layer_id == 0) {
+        // Find the 2nd frame in temporal base layer and 1st frame in temporal
+        // enhancement layers from the key frame.
+        int i;
+        for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
+          if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
+            cm->frame_parallel_decoding_mode = 1;
+            break;
+          }
         }
       }
-      if (i == cpi->svc.number_temporal_layers)
-        cm->frame_parallel_decoding_mode = 0;
     }
   }
 
@@ -3643,9 +3759,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
   cm->prev_frame = cm->cur_frame;
 
-  if (is_two_pass_svc(cpi))
-    cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
-        cm->frame_type;
+  if (cpi->use_svc)
+    cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+                           cpi->svc.number_temporal_layers +
+                           cpi->svc.temporal_layer_id].last_frame_type =
+                               cm->frame_type;
 }
 
 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -3788,8 +3906,8 @@ static int frame_is_reference(const VP9_COMP *cpi) {
          cm->seg.update_data;
 }
 
-void adjust_frame_rate(VP9_COMP *cpi,
-                       const struct lookahead_entry *source) {
+static void adjust_frame_rate(VP9_COMP *cpi,
+                              const struct lookahead_entry *source) {
   int64_t this_duration;
   int step = 0;
 
@@ -3872,15 +3990,16 @@ static void check_src_altref(VP9_COMP *cpi,
 extern double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
                                  const unsigned char *img2, int img2_pitch,
                                  int width, int height);
-#endif
 
-void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) {
+static void adjust_image_stat(double y, double u, double v, double all,
+                              ImageStat *s) {
   s->stat[Y] += y;
   s->stat[U] += u;
   s->stat[V] += v;
   s->stat[ALL] += all;
   s->worst = MIN(s->worst, all);
 }
+#endif  // CONFIG_INTERNAL_STATS
 
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest,
@@ -3905,6 +4024,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 #endif
     if (oxcf->pass == 2)
       vp9_restore_layer_context(cpi);
+  } else if (is_one_pass_cbr_svc(cpi)) {
+    vp9_one_pass_cbr_svc_start_layer(cpi);
   }
 
   vpx_usec_timer_start(&cmptimer);
@@ -3923,9 +4044,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   // Normal defaults
   cm->reset_frame_context = 0;
   cm->refresh_frame_context = 1;
-  cpi->refresh_last_frame = 1;
-  cpi->refresh_golden_frame = 0;
-  cpi->refresh_alt_ref_frame = 0;
+  if (!is_one_pass_cbr_svc(cpi)) {
+    cpi->refresh_last_frame = 1;
+    cpi->refresh_golden_frame = 0;
+    cpi->refresh_alt_ref_frame = 0;
+  }
 
   // Should we encode an arf frame.
   arf_src_index = get_arf_src_index(cpi);
@@ -3962,6 +4085,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
       }
 
       cm->show_frame = 0;
+      cm->intra_only = 0;
       cpi->refresh_alt_ref_frame = 1;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_last_frame = 0;
@@ -3980,12 +4104,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     }
 
     // Read in the source frame.
-#if CONFIG_SPATIAL_SVC
-    if (is_two_pass_svc(cpi))
+    if (cpi->use_svc)
       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
     else
-#endif
       source = vp9_lookahead_pop(cpi->lookahead, flush);
+
     if (source != NULL) {
       cm->show_frame = 1;
       cm->intra_only = 0;
@@ -4034,8 +4157,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     adjust_frame_rate(cpi, source);
   }
 
-  if (cpi->svc.number_temporal_layers > 1 &&
-      oxcf->rc_mode == VPX_CBR) {
+  if (is_one_pass_cbr_svc(cpi)) {
     vp9_update_temporal_layer_framerate(cpi);
     vp9_restore_layer_context(cpi);
   }
@@ -4071,7 +4193,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
           (is_two_pass_svc(cpi) &&
               cpi->svc.encode_empty_frame_state != ENCODING))) {
     vp9_rc_get_second_pass_params(cpi);
-  } else {
+  } else if (oxcf->pass == 1) {
     set_frame_size(cpi);
   }
 
@@ -4117,11 +4239,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   }
 
   // Save layer specific state.
-  if ((cpi->svc.number_temporal_layers > 1 &&
-       oxcf->rc_mode == VPX_CBR) ||
-      ((cpi->svc.number_temporal_layers > 1 ||
-        cpi->svc.number_spatial_layers > 1) &&
-       oxcf->pass == 2)) {
+  if (is_one_pass_cbr_svc(cpi) ||
+        ((cpi->svc.number_temporal_layers > 1 ||
+          cpi->svc.number_spatial_layers > 1) &&
+         oxcf->pass == 2)) {
     vp9_save_layer_context(cpi);
   }
 
@@ -4180,7 +4301,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
           vp9_clear_system_state();
 
 #if CONFIG_VP9_HIGHBITDEPTH
-          calc_highbd_psnr(orig, pp, &psnr, cpi->td.mb.e_mbd.bd,
+          calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
                            cpi->oxcf.input_bit_depth);
 #else
           calc_psnr(orig, pp, &psnr2);
@@ -4231,31 +4352,38 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         }
       }
       if (cpi->b_calculate_blockiness) {
-        double frame_blockiness = vp9_get_blockiness(
-            cpi->Source->y_buffer, cpi->Source->y_stride,
-            cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
-            cpi->Source->y_width, cpi->Source->y_height);
-        cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
-        cpi->total_blockiness += frame_blockiness;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (!cm->use_highbitdepth)
+#endif
+        {
+          double frame_blockiness = vp9_get_blockiness(
+              cpi->Source->y_buffer, cpi->Source->y_stride,
+              cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+              cpi->Source->y_width, cpi->Source->y_height);
+          cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
+          cpi->total_blockiness += frame_blockiness;
+        }
       }
 
       if (cpi->b_calculate_consistency) {
-        double this_inconsistency = vp9_get_ssim_metrics(
-            cpi->Source->y_buffer, cpi->Source->y_stride,
-            cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
-            cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
-            &cpi->metrics, 1);
-
-        const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
-
-
-        double consistency = vpx_sse_to_psnr(samples, peak,
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (!cm->use_highbitdepth)
+#endif
+        {
+          double this_inconsistency = vp9_get_ssim_metrics(
+              cpi->Source->y_buffer, cpi->Source->y_stride,
+              cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+              cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
+              &cpi->metrics, 1);
+
+          const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+          double consistency = vpx_sse_to_psnr(samples, peak,
                                              (double)cpi->total_inconsistency);
-
-        if (consistency > 0.0)
-          cpi->worst_consistency = MIN(cpi->worst_consistency,
-                                       consistency);
-        cpi->total_inconsistency += this_inconsistency;
+          if (consistency > 0.0)
+            cpi->worst_consistency = MIN(cpi->worst_consistency,
+                                         consistency);
+          cpi->total_inconsistency += this_inconsistency;
+        }
       }
 
       if (cpi->b_calculate_ssimg) {
@@ -4273,6 +4401,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
         adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
       }
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!cm->use_highbitdepth)
+#endif
       {
         double y, u, v, frame_all;
         frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
@@ -4280,6 +4411,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
         /* TODO(JBB): add 10/12 bit support */
       }
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!cm->use_highbitdepth)
+#endif
       {
         double y, u, v, frame_all;
         frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
@@ -4291,8 +4425,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 #endif
 
   if (is_two_pass_svc(cpi)) {
-    if (cpi->svc.encode_empty_frame_state == ENCODING)
+    if (cpi->svc.encode_empty_frame_state == ENCODING) {
       cpi->svc.encode_empty_frame_state = ENCODED;
+      cpi->svc.encode_intra_empty_frame = 0;
+    }
 
     if (cm->show_frame) {
       ++cpi->svc.spatial_layer_to_encode;
@@ -4302,6 +4438,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
       // May need the empty frame after an visible frame.
       cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
     }
+  } else if (is_one_pass_cbr_svc(cpi)) {
+    if (cm->show_frame) {
+      ++cpi->svc.spatial_layer_to_encode;
+      if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+        cpi->svc.spatial_layer_to_encode = 0;
+    }
   }
   return 0;
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index 41f1c13d493..2b0da103ffe 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -194,10 +194,10 @@ typedef struct VP9EncoderConfig {
   int ss_number_layers;  // Number of spatial layers.
   int ts_number_layers;  // Number of temporal layers.
   // Bitrate allocation for spatial layers.
+  int layer_target_bitrate[VPX_MAX_LAYERS];
   int ss_target_bitrate[VPX_SS_MAX_LAYERS];
   int ss_enable_auto_arf[VPX_SS_MAX_LAYERS];
   // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
-  int ts_target_bitrate[VPX_TS_MAX_LAYERS];
   int ts_rate_decimator[VPX_TS_MAX_LAYERS];
 
   int enable_auto_arf;
@@ -237,6 +237,7 @@ typedef struct VP9EncoderConfig {
   int use_highbitdepth;
 #endif
   vpx_color_space_t color_space;
+  VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
 } VP9EncoderConfig;
 
 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
@@ -305,6 +306,7 @@ typedef struct VP9_COMP {
   YV12_BUFFER_CONFIG scaled_last_source;
 
   TileDataEnc *tile_data;
+  int allocated_tiles;  // Keep track of memory allocated for tiles.
 
   // For a still frame, this flag is set to 1 to skip partition search.
   int partition_search_skippable_frame;
@@ -477,6 +479,12 @@ typedef struct VP9_COMP {
 #endif
 
   int resize_pending;
+  int resize_state;
+  int resize_scale_num;
+  int resize_scale_den;
+  int resize_avg_qp;
+  int resize_buffer_underflow;
+  int resize_count;
 
   // VAR_BASED_PARTITION thresholds
   // 0 - threshold_64x64; 1 - threshold_32x32;
@@ -611,9 +619,11 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
 
 static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
-  return cpi->use_svc &&
-         ((cpi->svc.number_spatial_layers > 1) ||
-         (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0));
+  return cpi->use_svc && cpi->oxcf.pass != 0;
+}
+
+static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
+  return (cpi->use_svc && cpi->oxcf.pass == 0);
 }
 
 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
@@ -642,6 +652,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) {
 
 void vp9_new_framerate(VP9_COMP *cpi, double framerate);
 
+#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
index 8700ccdaecd..4ae3fbc5409 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
@@ -54,6 +54,18 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
   return 0;
 }
 
+static int get_max_tile_cols(VP9_COMP *cpi) {
+  const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
+  int mi_cols = aligned_width >> MI_SIZE_LOG2;
+  int min_log2_tile_cols, max_log2_tile_cols;
+  int log2_tile_cols;
+
+  vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+  log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+                   min_log2_tile_cols, max_log2_tile_cols);
+  return (1 << log2_tile_cols);
+}
+
 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
@@ -65,20 +77,30 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
 
   // Only run once to create threads and allocate thread data.
   if (cpi->num_workers == 0) {
+    int allocated_workers = num_workers;
+
+    // While using SVC, we need to allocate threads according to the highest
+    // resolution.
+    if (cpi->use_svc) {
+      int max_tile_cols = get_max_tile_cols(cpi);
+      allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
+    }
+
     CHECK_MEM_ERROR(cm, cpi->workers,
-                    vpx_malloc(num_workers * sizeof(*cpi->workers)));
+                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
 
     CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
-                    vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+                    vpx_calloc(allocated_workers,
+                    sizeof(*cpi->tile_thr_data)));
 
-    for (i = 0; i < num_workers; i++) {
+    for (i = 0; i < allocated_workers; i++) {
       VP9Worker *const worker = &cpi->workers[i];
       EncWorkerData *thread_data = &cpi->tile_thr_data[i];
 
       ++cpi->num_workers;
       winterface->init(worker);
 
-      if (i < num_workers - 1) {
+      if (i < allocated_workers - 1) {
         thread_data->cpi = cpi;
 
         // Allocate thread data.
@@ -154,7 +176,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
     // Set the starting tile for each thread.
     thread_data->start = i;
 
-    if (i == num_workers - 1)
+    if (i == cpi->num_workers - 1)
       winterface->execute(worker);
     else
       winterface->launch(worker);
@@ -171,7 +193,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
     EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
 
     // Accumulate counters.
-    if (i < num_workers - 1) {
+    if (i < cpi->num_workers - 1) {
       vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
       accumulate_rd_opt(&cpi->td, thread_data->td);
     }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
index 96f3598b1dc..6e1ed365dab 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
@@ -9,6 +9,7 @@
  */
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_common.h"
 #include "vp9/encoder/vp9_extend.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index 9752668b15d..3d7843ea731 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -12,9 +12,11 @@
 #include <math.h>
 #include <stdio.h>
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vpx_scale/yv12config.h"
 
@@ -111,8 +113,8 @@ static void output_stats(FIRSTPASS_STATS *stats,
     fpfile = fopen("firstpass.stt", "a");
 
     fprintf(fpfile, "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
-            "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
-            "%12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+            "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
+            "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
             stats->frame,
             stats->weight,
             stats->intra_error,
@@ -122,6 +124,8 @@ static void output_stats(FIRSTPASS_STATS *stats,
             stats->pcnt_motion,
             stats->pcnt_second_ref,
             stats->pcnt_neutral,
+            stats->intra_skip_pct,
+            stats->inactive_zone_rows,
             stats->MVr,
             stats->mvr_abs,
             stats->MVc,
@@ -158,7 +162,9 @@ static void zero_stats(FIRSTPASS_STATS *section) {
   section->pcnt_motion  = 0.0;
   section->pcnt_second_ref = 0.0;
   section->pcnt_neutral = 0.0;
-  section->MVr        = 0.0;
+  section->intra_skip_pct = 0.0;
+  section->inactive_zone_rows = 0.0;
+  section->MVr = 0.0;
   section->mvr_abs     = 0.0;
   section->MVc        = 0.0;
   section->mvc_abs     = 0.0;
@@ -183,7 +189,9 @@ static void accumulate_stats(FIRSTPASS_STATS *section,
   section->pcnt_motion += frame->pcnt_motion;
   section->pcnt_second_ref += frame->pcnt_second_ref;
   section->pcnt_neutral += frame->pcnt_neutral;
-  section->MVr        += frame->MVr;
+  section->intra_skip_pct += frame->intra_skip_pct;
+  section->inactive_zone_rows += frame->inactive_zone_rows;
+  section->MVr += frame->MVr;
   section->mvr_abs     += frame->mvr_abs;
   section->MVc        += frame->MVc;
   section->mvc_abs     += frame->mvc_abs;
@@ -206,7 +214,9 @@ static void subtract_stats(FIRSTPASS_STATS *section,
   section->pcnt_motion -= frame->pcnt_motion;
   section->pcnt_second_ref -= frame->pcnt_second_ref;
   section->pcnt_neutral -= frame->pcnt_neutral;
-  section->MVr        -= frame->MVr;
+  section->intra_skip_pct -= frame->intra_skip_pct;
+  section->inactive_zone_rows -= frame->inactive_zone_rows;
+  section->MVr -= frame->MVr;
   section->mvr_abs     -= frame->mvr_abs;
   section->MVc        -= frame->MVc;
   section->mvc_abs     -= frame->mvc_abs;
@@ -266,13 +276,13 @@ void vp9_end_first_pass(VP9_COMP *cpi) {
 static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
   switch (bsize) {
     case BLOCK_8X8:
-      return vp9_mse8x8;
+      return vpx_mse8x8;
     case BLOCK_16X8:
-      return vp9_mse16x8;
+      return vpx_mse16x8;
     case BLOCK_8X16:
-      return vp9_mse8x16;
+      return vpx_mse8x16;
     default:
-      return vp9_mse16x16;
+      return vpx_mse16x16;
   }
 }
 
@@ -292,37 +302,37 @@ static vp9_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
     default:
       switch (bsize) {
         case BLOCK_8X8:
-          return vp9_highbd_mse8x8;
+          return vpx_highbd_8_mse8x8;
         case BLOCK_16X8:
-          return vp9_highbd_mse16x8;
+          return vpx_highbd_8_mse16x8;
         case BLOCK_8X16:
-          return vp9_highbd_mse8x16;
+          return vpx_highbd_8_mse8x16;
         default:
-          return vp9_highbd_mse16x16;
+          return vpx_highbd_8_mse16x16;
       }
       break;
     case 10:
       switch (bsize) {
         case BLOCK_8X8:
-          return vp9_highbd_10_mse8x8;
+          return vpx_highbd_10_mse8x8;
         case BLOCK_16X8:
-          return vp9_highbd_10_mse16x8;
+          return vpx_highbd_10_mse16x8;
         case BLOCK_8X16:
-          return vp9_highbd_10_mse8x16;
+          return vpx_highbd_10_mse8x16;
         default:
-          return vp9_highbd_10_mse16x16;
+          return vpx_highbd_10_mse16x16;
       }
       break;
     case 12:
       switch (bsize) {
         case BLOCK_8X8:
-          return vp9_highbd_12_mse8x8;
+          return vpx_highbd_12_mse8x8;
         case BLOCK_16X8:
-          return vp9_highbd_12_mse16x8;
+          return vpx_highbd_12_mse16x8;
         case BLOCK_8X16:
-          return vp9_highbd_12_mse8x16;
+          return vpx_highbd_12_mse8x16;
         default:
-          return vp9_highbd_12_mse16x16;
+          return vpx_highbd_12_mse16x16;
       }
       break;
   }
@@ -451,6 +461,8 @@ static void set_first_pass_params(VP9_COMP *cpi) {
   cpi->rc.frames_to_key = INT_MAX;
 }
 
+#define UL_INTRA_THRESH 50
+#define INVALID_ROW -1
 void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
   int mb_row, mb_col;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -475,6 +487,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
   int second_ref_count = 0;
   const int intrapenalty = INTRA_MODE_PENALTY;
   double neutral_count;
+  int intra_skip_count = 0;
+  int image_data_start_row = INVALID_ROW;
   int new_mv_count = 0;
   int sum_in_vectors = 0;
   MV lastmv = {0, 0};
@@ -633,7 +647,19 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
       xd->mi[0]->mbmi.tx_size = use_dc_pred ?
          (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
       vp9_encode_intra_block_plane(x, bsize, 0);
-      this_error = vp9_get_mb_ss(x->plane[0].src_diff);
+      this_error = vpx_get_mb_ss(x->plane[0].src_diff);
+
+      // Keep a record of blocks that have almost no intra error residual
+      // (i.e. are in effect completely flat and untextured in the intra
+      // domain). In natural videos this is uncommon, but it is much more
+      // common in animations, graphics and screen content, so may be used
+      // as a signal to detect these types of content.
+      if (this_error < UL_INTRA_THRESH) {
+        ++intra_skip_count;
+      } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
+        image_data_start_row = mb_row;
+      }
+
 #if CONFIG_VP9_HIGHBITDEPTH
       if (cm->use_highbitdepth) {
         switch (cm->bit_depth) {
@@ -961,6 +987,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
     vp9_clear_system_state();
   }
 
+  // Clamp the image start to rows/2. This number of rows is discarded top
+  // and bottom as dead data so rows / 2 means the frame is blank.
+  if ((image_data_start_row > cm->mb_rows / 2) ||
+      (image_data_start_row == INVALID_ROW)) {
+    image_data_start_row = cm->mb_rows / 2;
+  }
+  // Exclude any image dead zone
+  if (image_data_start_row > 0) {
+    intra_skip_count =
+      MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
+  }
+
   {
     FIRSTPASS_STATS fps;
     // The minimum error here insures some bit allocation to frames even
@@ -985,6 +1023,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
     fps.pcnt_inter = (double)intercount / num_mbs;
     fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
     fps.pcnt_neutral = (double)neutral_count / num_mbs;
+    fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+    fps.inactive_zone_rows = (double)image_data_start_row;
 
     if (mvcount > 0) {
       fps.MVr = (double)sum_mvr / mvcount;
@@ -1106,21 +1146,25 @@ static double calc_correction_factor(double err_per_mb,
 
 static int get_twopass_worst_quality(const VP9_COMP *cpi,
                                      const double section_err,
+                                     double inactive_zone,
                                      int section_target_bandwidth,
                                      double group_weight_factor) {
   const RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 
+  inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
+
   if (section_target_bandwidth <= 0) {
     return rc->worst_quality;  // Highest value allowed
   } else {
     const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
                         ? cpi->initial_mbs : cpi->common.MBs;
-    const double err_per_mb = section_err / num_mbs;
+    const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+    const double av_err_per_mb = section_err / active_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
-    const double ediv_size_correction = num_mbs / EDIV_SIZE_FACTOR;
+    const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
     const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
-                                         BPER_MB_NORMBITS) / num_mbs;
+                                         BPER_MB_NORMBITS) / active_mbs;
 
     int q;
     int is_svc_upper_layer = 0;
@@ -1133,7 +1177,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
     // content at the given rate.
     for (q = rc->best_quality; q < rc->worst_quality; ++q) {
       const double factor =
-          calc_correction_factor(err_per_mb,
+          calc_correction_factor(av_err_per_mb,
                                  ERR_DIVISOR - ediv_size_correction,
                                  is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
                                  FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
@@ -1246,8 +1290,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
     twopass->modified_error_left = modified_error_total;
   }
 
-  // Reset the vbr bits off target counter
+  // Reset the vbr bits off target counters
   cpi->rc.vbr_bits_off_target = 0;
+  cpi->rc.vbr_bits_off_target_fast = 0;
 
   cpi->rc.rate_error_estimate = 0;
 
@@ -1411,6 +1456,8 @@ static double calc_frame_boost(VP9_COMP *cpi,
   const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
                       ? cpi->initial_mbs : cpi->common.MBs;
 
+  // TODO(paulwilkins): correct for dead zone
+
   // Underlying boost factor is based on inter error ratio.
   frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
                 DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
@@ -1695,7 +1742,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
   mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
 
   // Allocate bits to the other frames in the group.
-  for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
+  for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
     int arf_idx = 0;
     if (EOF == input_stats(twopass, &frame_stats))
       break;
@@ -1776,6 +1823,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 #if GROUP_ADAPTIVE_MAXQ
   double gf_group_raw_error = 0.0;
 #endif
+  double gf_group_skip_pct = 0.0;
+  double gf_group_inactive_zone_rows = 0.0;
   double gf_first_frame_err = 0.0;
   double mod_frame_err = 0.0;
 
@@ -1825,6 +1874,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 #if GROUP_ADAPTIVE_MAXQ
     gf_group_raw_error -= this_frame->coded_error;
 #endif
+    gf_group_skip_pct -= this_frame->intra_skip_pct;
+    gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
   }
 
   // Motion breakout threshold for loop below depends on image size.
@@ -1869,6 +1920,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 #if GROUP_ADAPTIVE_MAXQ
     gf_group_raw_error += this_frame->coded_error;
 #endif
+    gf_group_skip_pct += this_frame->intra_skip_pct;
+    gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
 
     if (EOF == input_stats(twopass, &next_frame))
       break;
@@ -1933,8 +1986,26 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Was the group length constrained by the requirement for a new KF?
   rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
 
+  // Should we use the alternate reference frame.
+  if (allow_alt_ref &&
+    (i < cpi->oxcf.lag_in_frames) &&
+    (i >= rc->min_gf_interval)) {
+    // Calculate the boost for alt ref.
+    rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
+      &b_boost);
+    rc->source_alt_ref_pending = 1;
+
+    // Test to see if multi arf is appropriate.
+    cpi->multi_arf_enabled =
+      (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
+      (zero_motion_accumulator < 0.995)) ? 1 : 0;
+  } else {
+    rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
+    rc->source_alt_ref_pending = 0;
+  }
+
   // Set the interval until the next gf.
-  if (is_key_frame || rc->source_alt_ref_active)
+  if (is_key_frame || rc->source_alt_ref_pending)
     rc->baseline_gf_interval = i - 1;
   else
     rc->baseline_gf_interval = i;
@@ -1953,30 +2024,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 #if GROUP_ADAPTIVE_MAXQ
       gf_group_raw_error += this_frame->coded_error;
 #endif
+      gf_group_skip_pct += this_frame->intra_skip_pct;
+      gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
     }
     rc->baseline_gf_interval = new_gf_interval;
   }
 
   rc->frames_till_gf_update_due = rc->baseline_gf_interval;
 
-  // Should we use the alternate reference frame.
-  if (allow_alt_ref &&
-      (i < cpi->oxcf.lag_in_frames) &&
-      (i >= rc->min_gf_interval)) {
-    // Calculate the boost for alt ref.
-    rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
-                                   &b_boost);
-    rc->source_alt_ref_pending = 1;
-
-    // Test to see if multi arf is appropriate.
-    cpi->multi_arf_enabled =
-      (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
-      (zero_motion_accumulator < 0.995)) ? 1 : 0;
-  } else {
-    rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
-    rc->source_alt_ref_pending = 0;
-  }
-
   // Reset the file position.
   reset_fpf_position(twopass, start_pos);
 
@@ -1993,6 +2048,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     const int vbr_group_bits_per_frame =
       (int)(gf_group_bits / rc->baseline_gf_interval);
     const double group_av_err = gf_group_raw_error  / rc->baseline_gf_interval;
+    const double group_av_skip_pct =
+      gf_group_skip_pct / rc->baseline_gf_interval;
+    const double group_av_inactive_zone =
+      ((gf_group_inactive_zone_rows * 2) /
+       (rc->baseline_gf_interval * (double)cm->mb_rows));
+
     int tmp_q;
     // rc factor is a weight factor that corrects for local rate control drift.
     double rc_factor = 1.0;
@@ -2004,7 +2065,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
                       (double)(100 - rc->rate_error_estimate) / 100.0);
     }
     tmp_q =
-      get_twopass_worst_quality(cpi, group_av_err, vbr_group_bits_per_frame,
+      get_twopass_worst_quality(cpi, group_av_err,
+                                (group_av_skip_pct + group_av_inactive_zone),
+                                vbr_group_bits_per_frame,
                                 twopass->kfgroup_inter_fraction * rc_factor);
     twopass->active_worst_quality =
       MAX(tmp_q, twopass->active_worst_quality >> 1);
@@ -2413,7 +2476,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 }
 
 // Define the reference buffers that will be updated post encode.
-void configure_buffer_updates(VP9_COMP *cpi) {
+static void configure_buffer_updates(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
 
   cpi->rc.is_src_frame_alt_ref = 0;
@@ -2460,7 +2523,7 @@ void configure_buffer_updates(VP9_COMP *cpi) {
   }
 }
 
-int is_skippable_frame(const VP9_COMP *cpi) {
+static int is_skippable_frame(const VP9_COMP *cpi) {
   // If the current frame does not have non-zero motion vector detected in the
   // first  pass, and so do its previous and forward frames, then this frame
   // can be skipped for partition check, and the partition size is assigned
@@ -2543,10 +2606,17 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
+    const double section_length = twopass->total_left_stats.count;
     const double section_error =
-      twopass->total_left_stats.coded_error / twopass->total_left_stats.count;
+      twopass->total_left_stats.coded_error / section_length;
+    const double section_intra_skip =
+      twopass->total_left_stats.intra_skip_pct / section_length;
+    const double section_inactive_zone =
+      (twopass->total_left_stats.inactive_zone_rows * 2) /
+      ((double)cm->mb_rows * section_length);
     const int tmp_q =
       get_twopass_worst_quality(cpi, section_error,
+                                section_intra_skip + section_inactive_zone,
                                 section_target_bandwidth, DEFAULT_GRP_WEIGHT);
 
     twopass->active_worst_quality = tmp_q;
@@ -2562,6 +2632,12 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   if (EOF == input_stats(twopass, &this_frame))
     return;
 
+  // Set the frame content type flag.
+  if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
+    twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
+  else
+    twopass->fr_content_type = FC_NORMAL;
+
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
     FIRSTPASS_STATS this_frame_copy;
@@ -2580,9 +2656,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
         cpi->ref_frame_flags &=
             (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
         lc->frames_from_key_frame = 0;
-        // Reset the empty frame resolution since we have a key frame.
-        cpi->svc.empty_frame_width = cm->width;
-        cpi->svc.empty_frame_height = cm->height;
+        // Encode an intra only empty frame since we have a key frame.
+        cpi->svc.encode_intra_empty_frame = 1;
       }
     } else {
       cm->frame_type = INTER_FRAME;
@@ -2649,6 +2724,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
 #define MINQ_ADJ_LIMIT 48
 #define MINQ_ADJ_LIMIT_CQ 20
+#define HIGH_UNDERSHOOT_RATIO 2
 void vp9_twopass_postencode_update(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -2715,5 +2791,32 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
 
     twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
     twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
+
+    // If there is a big and undexpected undershoot then feed the extra
+    // bits back in quickly. One situation where this may happen is if a
+    // frame is unexpectedly almost perfectly predicted by the ARF or GF
+    // but not very well predcited by the previous frame.
+    if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) {
+      int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO;
+      if (rc->projected_frame_size < fast_extra_thresh) {
+        rc->vbr_bits_off_target_fast +=
+          fast_extra_thresh - rc->projected_frame_size;
+        rc->vbr_bits_off_target_fast =
+          MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
+
+        // Fast adaptation of minQ if necessary to use up the extra bits.
+        if (rc->avg_frame_bandwidth) {
+          twopass->extend_minq_fast =
+            (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
+        }
+        twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
+                                        minq_adj_limit - twopass->extend_minq);
+      } else if (rc->vbr_bits_off_target_fast) {
+        twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
+                                        minq_adj_limit - twopass->extend_minq);
+      } else {
+        twopass->extend_minq_fast = 0;
+      }
+    }
   }
 }
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
index 08e7a8bf114..00479322d31 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -51,6 +51,8 @@ typedef struct {
   double pcnt_motion;
   double pcnt_second_ref;
   double pcnt_neutral;
+  double intra_skip_pct;
+  double inactive_zone_rows;  // Image mask rows top and bottom.
   double MVr;
   double mvr_abs;
   double MVc;
@@ -73,6 +75,13 @@ typedef enum {
   FRAME_UPDATE_TYPES = 5
 } FRAME_UPDATE_TYPE;
 
+#define FC_ANIMATION_THRESH 0.15
+typedef enum {
+  FC_NORMAL = 0,
+  FC_GRAPHICS_ANIMATION = 1,
+  FRAME_CONTENT_TYPES = 2
+} FRAME_CONTENT_TYPE;
+
 typedef struct {
   unsigned char index;
   RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
@@ -103,6 +112,8 @@ typedef struct {
   uint8_t *this_frame_mb_stats;
   FIRSTPASS_MB_STATS firstpass_mb_stats;
 #endif
+  // An indication of the content type of the current frame
+  FRAME_CONTENT_TYPE fr_content_type;
 
   // Projected total bits available for a key frame group of frames
   int64_t kf_group_bits;
@@ -122,6 +133,7 @@ typedef struct {
   int baseline_active_worst_quality;
   int extend_minq;
   int extend_maxq;
+  int extend_minq_fast;
 
   GF_GROUP gf_group;
 } TWO_PASS;
@@ -135,6 +147,7 @@ void vp9_end_first_pass(struct VP9_COMP *cpi);
 
 void vp9_init_second_pass(struct VP9_COMP *cpi);
 void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
 
 // Post encode update of the rate control parameters for 2-pass
 void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
index 80c509a1b49..081b99f9f1f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -13,10 +13,13 @@
 #include <stdio.h>
 
 #include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_reconinter.h"
 
 #include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_mcomp.h"
@@ -159,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
       error_per_bit + 4096) >> 13 : 0)
 
 
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
 static INLINE int sp(int x) {
-  return (x & 7) << 1;
+  return x & 7;
 }
 
 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -283,32 +286,32 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
   bestmv->row *= 8;                                                        \
   bestmv->col *= 8;
 
-static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
-                                              const MV *bestmv,
-                                              const MV *ref_mv,
-                                              int error_per_bit,
-                                              const vp9_variance_fn_ptr_t *vfp,
-                                              const uint8_t *const src,
-                                              const int src_stride,
-                                              const uint8_t *const y,
-                                              int y_stride,
-                                              const uint8_t *second_pred,
-                                              int w, int h, int offset,
-                                              int *mvjcost, int *mvcost[2],
-                                              unsigned int *sse1,
-                                              int *distortion) {
+static unsigned int setup_center_error(const MACROBLOCKD *xd,
+                                       const MV *bestmv,
+                                       const MV *ref_mv,
+                                       int error_per_bit,
+                                       const vp9_variance_fn_ptr_t *vfp,
+                                       const uint8_t *const src,
+                                       const int src_stride,
+                                       const uint8_t *const y,
+                                       int y_stride,
+                                       const uint8_t *second_pred,
+                                       int w, int h, int offset,
+                                       int *mvjcost, int *mvcost[2],
+                                       unsigned int *sse1,
+                                       int *distortion) {
   unsigned int besterr;
 #if CONFIG_VP9_HIGHBITDEPTH
   if (second_pred != NULL) {
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
-      vp9_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+      vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
                                y_stride);
       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
                         sse1);
     } else {
       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
-      vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+      vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
     }
   } else {
@@ -320,7 +323,7 @@ static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
   (void) xd;
   if (second_pred != NULL) {
     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
-    vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+    vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
   } else {
     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
@@ -607,7 +610,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
   return besterr;
 }
 
-const MV search_step_table[12] = {
+static const MV search_step_table[12] = {
     // left, right, up, down
     {0, -4}, {0, 4}, {-4, 0}, {4, 0},
     {0, -2}, {0, 2}, {-2, 0}, {2, 0},
@@ -676,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
       tc = bc + search_step[idx].col;
       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-        int row_offset = (tr & 0x07) << 1;
-        int col_offset = (tc & 0x07) << 1;
         MV this_mv;
         this_mv.row = tr;
         this_mv.col = tc;
         if (second_pred == NULL)
-          thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, &sse);
         else
-          thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse, second_pred);
         cost_array[idx] = thismse +
             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -706,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
     tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
       const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-      int row_offset = (tr & 0x07) << 1;
-      int col_offset = (tc & 0x07) << 1;
       MV this_mv = {tr, tc};
       if (second_pred == NULL)
-        thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                            src_address, src_stride, &sse);
       else
-        thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                             src_address, src_stride, &sse, second_pred);
       cost_array[4] = thismse +
           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -1788,8 +1787,11 @@ static const MV search_pos[4] = {
 };
 
 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
-                                           BLOCK_SIZE bsize) {
+                                           BLOCK_SIZE bsize,
+                                           int mi_row, int mi_col) {
   MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   DECLARE_ALIGNED(16, int16_t, hbuf[128]);
   DECLARE_ALIGNED(16, int16_t, vbuf[128]);
   DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
@@ -1806,12 +1808,34 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
   unsigned int best_sad, tmp_sad, this_sad[4];
   MV this_mv;
   const int norm_factor = 3 + (bw >> 5);
+  const YV12_BUFFER_CONFIG *scaled_ref_frame =
+      vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
+
+  if (scaled_ref_frame) {
+    int i;
+    // Swap out the reference frame for a version that's been scaled to
+    // match the resolution of the current frame, allowing the existing
+    // motion search code to be used without additional modifications.
+    for (i = 0; i < MAX_MB_PLANE; i++)
+      backup_yv12[i] = xd->plane[i].pre[0];
+    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+  }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-  tmp_mv->row = 0;
-  tmp_mv->col = 0;
-  return cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
-                                xd->plane[0].pre[0].buf, ref_stride);
+  {
+    unsigned int this_sad;
+    tmp_mv->row = 0;
+    tmp_mv->col = 0;
+    this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
+                                      xd->plane[0].pre[0].buf, ref_stride);
+
+    if (scaled_ref_frame) {
+      int i;
+      for (i = 0; i < MAX_MB_PLANE; i++)
+        xd->plane[i].pre[0] = backup_yv12[i];
+    }
+    return this_sad;
+  }
 #endif
 
   // Set up prediction 1-D reference set
@@ -1889,6 +1913,12 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
   tmp_mv->row *= 8;
   tmp_mv->col *= 8;
 
+  if (scaled_ref_frame) {
+    int i;
+    for (i = 0; i < MAX_MB_PLANE; i++)
+      xd->plane[i].pre[0] = backup_yv12[i];
+  }
+
   return best_sad;
 }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
index dd8a4607942..99c1afa28ff 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -83,7 +83,8 @@ int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
 // Perform integral projection based motion estimation.
 unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
                                            MACROBLOCK *x,
-                                           BLOCK_SIZE bsize);
+                                           BLOCK_SIZE bsize,
+                                           int mi_row, int mi_col);
 
 typedef int (integer_mv_pattern_search_fn) (
     const MACROBLOCK *x,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
index 5eb5d542b78..8e191038514 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -14,6 +14,7 @@
 #include "./vpx_scale_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index 9fb7cfba7bf..3eaa99054ce 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -14,8 +14,10 @@
 #include <stdio.h>
 
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
@@ -23,6 +25,7 @@
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
 
 #include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_encoder.h"
@@ -214,7 +217,7 @@ static void block_variance(const uint8_t *src, int src_stride,
 
   for (i = 0; i < h; i += block_size) {
     for (j = 0; j < w; j += block_size) {
-      vp9_get8x8var(src + src_stride * i + j, src_stride,
+      vpx_get8x8var(src + src_stride * i + j, src_stride,
                     ref + ref_stride * i + j, ref_stride,
                     &sse8x8[k], &sum8x8[k]);
       *sse += sse8x8[k];
@@ -294,13 +297,11 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
     else
       tx_size = TX_8X8;
 
-    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
-        tx_size = TX_8X8;
-      else if (tx_size > TX_16X16)
-        tx_size = TX_16X16;
-    }
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+      tx_size = TX_8X8;
+    else if (tx_size > TX_16X16)
+      tx_size = TX_16X16;
   } else {
     tx_size = MIN(max_txsize_lookup[bsize],
                   tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -478,13 +479,11 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
     else
       xd->mi[0]->mbmi.tx_size = TX_8X8;
 
-    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
-        xd->mi[0]->mbmi.tx_size = TX_8X8;
-      else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
-        xd->mi[0]->mbmi.tx_size = TX_16X16;
-    }
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+        cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+      xd->mi[0]->mbmi.tx_size = TX_8X8;
+    else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
+      xd->mi[0]->mbmi.tx_size = TX_16X16;
   } else {
     xd->mi[0]->mbmi.tx_size =
         MIN(max_txsize_lookup[bsize],
@@ -659,7 +658,8 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
   block = 0;
   *rate = 0;
   *dist = 0;
-  *sse = (*sse << 6) >> shift;
+  if (*sse < INT64_MAX)
+    *sse = (*sse << 6) >> shift;
   for (r = 0; r < max_blocks_high; r += block_step) {
     for (c = 0; c < num_4x4_w; c += block_step) {
       if (c < max_blocks_wide) {
@@ -1059,6 +1059,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                          int mi_row, int mi_col, RD_COST *rd_cost,
                          BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   VP9_COMMON *const cm = &cpi->common;
+  SPEED_FEATURES *const sf = &cpi->sf;
   TileInfo *const tile_info = &tile_data->tile_info;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -1078,9 +1079,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   unsigned int var_y = UINT_MAX;
   unsigned int sse_y = UINT_MAX;
   // Reduce the intra cost penalty for small blocks (<=16x16).
-  const int reduction_fac =
-      (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
-       bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
+  const int reduction_fac = (bsize <= BLOCK_16X16) ?
+      ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
   const int intra_cost_penalty = vp9_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
   const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
@@ -1180,7 +1180,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
       if (cm->use_prev_frame_mvs)
         vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
-                         candidates, mi_row, mi_col, NULL, NULL);
+                         candidates, mi_row, mi_col, NULL, NULL,
+                         xd->mi[0]->mbmi.mode_context);
       else
         const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
                                              xd->mi[0],
@@ -1219,7 +1220,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       continue;
 
     i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
-    if (cpi->ref_frame_flags & flag_list[i])
+    if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
       if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
         ref_frame_skip_mask |= (1 << ref_frame);
     if (ref_frame_skip_mask & (1 << ref_frame))
@@ -1247,7 +1248,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         if (bsize < BLOCK_16X16)
           continue;
 
-        tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+        tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
 
         if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
           continue;
@@ -1594,7 +1595,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
   if (cpi->sf.adaptive_rd_thresh) {
     THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)];
-    PREDICTION_MODE this_mode;
 
     if (best_ref_frame == INTRA_FRAME) {
       // Only consider the modes that are included in the intra_mode_list.
@@ -1604,12 +1604,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       // TODO(yunqingwang): Check intra mode mask and only update freq_fact
       // for those valid modes.
       for (i = 0; i < intra_modes; i++) {
-        PREDICTION_MODE this_mode = intra_mode_list[i];
         update_thresh_freq_fact(cpi, tile_data, bsize, INTRA_FRAME,
-                                best_mode_idx, this_mode);
+                                best_mode_idx, intra_mode_list[i]);
       }
     } else {
       for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
+        PREDICTION_MODE this_mode;
         if (best_ref_frame != ref_frame) continue;
         for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
           update_thresh_freq_fact(cpi, tile_data, bsize, ref_frame,
@@ -1660,7 +1660,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                            sf, sf);
       vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
-                       candidates, mi_row, mi_col, NULL, NULL);
+                       candidates, mi_row, mi_col, NULL, NULL,
+                       xd->mi[0]->mbmi.mode_context);
 
       vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
                             &dummy_mv[0], &dummy_mv[1]);
@@ -1692,8 +1693,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     // If the segment reference frame feature is enabled....
     // then do nothing if the current ref frame is not allowed..
-    if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
-        vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
       continue;
 
     mbmi->ref_frame[0] = ref_frame;
@@ -1734,7 +1735,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         b_mv[NEWMV].as_int = INVALID_MV;
         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
                                       &b_mv[NEARESTMV],
-                                      &b_mv[NEARMV]);
+                                      &b_mv[NEARMV],
+                                      xd->mi[0]->mbmi.mode_context);
 
         for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
           int b_rate = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
index 3c07e2c2437..e6e17c073e3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -11,6 +11,7 @@
 #include <math.h>
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"
@@ -677,7 +678,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
     x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
   }
 
-  x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
+  x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
   x->q_index = qindex;
 
   x->errorperbit = rdmult >> 6;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 4c33ffd977b..85003f65ef1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -16,6 +16,7 @@
 #include <string.h>
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
@@ -136,7 +137,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m,
   }
 }
 
-void vp9_rc_init_minq_luts() {
+void vp9_rc_init_minq_luts(void) {
   init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
                  arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
                  inter_minq_8, rtc_minq_8, VPX_BITS_8);
@@ -233,13 +234,16 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
   return target;
 }
 
-// Update the buffer level for higher layers, given the encoded current layer.
+// Update the buffer level for higher temporal layers, given the encoded current
+// temporal layer.
 static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
-  int temporal_layer = 0;
+  int i = 0;
   int current_temporal_layer = svc->temporal_layer_id;
-  for (temporal_layer = current_temporal_layer + 1;
-      temporal_layer < svc->number_temporal_layers; ++temporal_layer) {
-    LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer];
+  for (i = current_temporal_layer + 1;
+      i < svc->number_temporal_layers; ++i) {
+    const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+                                       svc->number_temporal_layers);
+    LAYER_CONTEXT *lc = &svc->layer_context[layer];
     RATE_CONTROL *lrc = &lc->rc;
     int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
         encoded_frame_size);
@@ -267,7 +271,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
   rc->buffer_level = rc->bits_off_target;
 
-  if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) {
+  if (is_one_pass_cbr_svc(cpi)) {
     update_layer_buffer_level(&cpi->svc, encoded_frame_size);
   }
 }
@@ -491,7 +495,10 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
   i = active_best_quality;
 
   do {
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+        cm->seg.enabled &&
+        cpi->svc.temporal_layer_id == 0 &&
+        cpi->svc.spatial_layer_id == 0) {
       bits_per_mb_at_this_q =
           (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
     } else {
@@ -1057,10 +1064,12 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
     if (frame_is_intra_only(cm) ||
         (!rc->is_src_frame_alt_ref &&
          (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
-      active_best_quality -= cpi->twopass.extend_minq;
+      active_best_quality -=
+        (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
       active_worst_quality += (cpi->twopass.extend_maxq / 2);
     } else {
-      active_best_quality -= cpi->twopass.extend_minq / 2;
+      active_best_quality -=
+        (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
       active_worst_quality += cpi->twopass.extend_maxq;
     }
   }
@@ -1203,11 +1212,9 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
     // this frame refreshes means next frames don't unless specified by user
     rc->frames_since_golden = 0;
 
-    if (cpi->oxcf.pass == 2) {
-      if (!rc->source_alt_ref_pending &&
-          cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD)
-      rc->source_alt_ref_active = 0;
-    } else if (!rc->source_alt_ref_pending) {
+    // If we are not using alt ref in the up and coming group clear the arf
+    // active flag.
+    if (!rc->source_alt_ref_pending) {
       rc->source_alt_ref_active = 0;
     }
 
@@ -1414,13 +1421,14 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   } else {
     target = rc->avg_frame_bandwidth;
   }
-  if (svc->number_temporal_layers > 1 &&
-      oxcf->rc_mode == VPX_CBR) {
+  if (is_one_pass_cbr_svc(cpi)) {
     // Note that for layers, avg_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
-    int current_temporal_layer = svc->temporal_layer_id;
-    const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer];
+    int layer =
+        LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+            svc->temporal_layer_id, svc->number_temporal_layers);
+    const LAYER_CONTEXT *lc = &svc->layer_context[layer];
     target = lc->avg_frame_size;
     min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
   }
@@ -1455,7 +1463,9 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
     if (svc->number_temporal_layers > 1 &&
         oxcf->rc_mode == VPX_CBR) {
       // Use the layer framerate for temporal layers CBR mode.
-      const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+      const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+          svc->temporal_layer_id, svc->number_temporal_layers);
+      const LAYER_CONTEXT *lc = &svc->layer_context[layer];
       framerate = lc->framerate;
     }
     kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
@@ -1468,10 +1478,27 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   return vp9_rc_clamp_iframe_target_size(cpi, target);
 }
 
+// Reset information needed to set proper reference frames and buffer updates
+// for temporal layering. This is called when a key frame is encoded.
+static void reset_temporal_layer_to_zero(VP9_COMP *cpi) {
+  int sl;
+  LAYER_CONTEXT *lc = NULL;
+  cpi->svc.temporal_layer_id = 0;
+
+  for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+    lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+    lc->current_video_frame_in_layer = 0;
+    lc->frames_from_key_frame = 0;
+  }
+}
+
 void vp9_rc_get_svc_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   int target = rc->avg_frame_bandwidth;
+  const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
+      cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
+
   if ((cm->current_video_frame == 0) ||
       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key && (rc->frames_since_key %
@@ -1480,30 +1507,39 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     rc->source_alt_ref_active = 0;
 
     if (is_two_pass_svc(cpi)) {
-      cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
+      cpi->svc.layer_context[layer].is_key_frame = 1;
       cpi->ref_frame_flags &=
           (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
-    }
-
-    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+    } else if (is_one_pass_cbr_svc(cpi)) {
+      cpi->svc.layer_context[layer].is_key_frame = 1;
+      reset_temporal_layer_to_zero(cpi);
+      cpi->ref_frame_flags &=
+                (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
+      // Assumption here is that LAST_FRAME is being updated for a keyframe.
+      // Thus no change in update flags.
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
   } else {
     cm->frame_type = INTER_FRAME;
-
     if (is_two_pass_svc(cpi)) {
-      LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+      LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
       if (cpi->svc.spatial_layer_id == 0) {
         lc->is_key_frame = 0;
       } else {
-        lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+        lc->is_key_frame =
+            cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
         if (lc->is_key_frame)
           cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
       }
       cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
-    }
-
-    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+    } else if (is_one_pass_cbr_svc(cpi)) {
+      LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+      if (cpi->svc.spatial_layer_id == 0) {
+        lc->is_key_frame = 0;
+      } else {
+        lc->is_key_frame =
+            cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
+      }
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }
@@ -1560,6 +1596,10 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
     target = calc_pframe_target_size_one_pass_cbr(cpi);
 
   vp9_rc_set_frame_target(cpi, target);
+  if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
+    cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
+  else
+    cpi->resize_state = 0;
 }
 
 int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1669,9 +1709,9 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
 
 #define VBR_PCT_ADJUSTMENT_LIMIT 50
 // For VBR...adjustment to the frame target based on error from previous frames
-static void vbr_rate_correction(VP9_COMP *cpi,
-                                int *this_frame_target,
-                                int64_t vbr_bits_off_target) {
+static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) {
+  RATE_CONTROL *const rc = &cpi->rc;
+  int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
   int max_delta;
   double position_factor = 1.0;
 
@@ -1695,6 +1735,20 @@ static void vbr_rate_correction(VP9_COMP *cpi,
       (vbr_bits_off_target < -max_delta) ? max_delta
                                          : (int)-vbr_bits_off_target;
   }
+
+  // Fast redistribution of bits arising from massive local undershoot.
+  // Dont do it for kf,arf,gf or overlay frames.
+  if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
+      rc->vbr_bits_off_target_fast) {
+    int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target);
+    int fast_extra_bits;
+    fast_extra_bits =
+      (int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits);
+    fast_extra_bits = (int)MIN(fast_extra_bits,
+      MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
+    *this_frame_target += (int)fast_extra_bits;
+    rc->vbr_bits_off_target_fast -= fast_extra_bits;
+  }
 }
 
 void vp9_set_target_rate(VP9_COMP *cpi) {
@@ -1703,6 +1757,95 @@ void vp9_set_target_rate(VP9_COMP *cpi) {
 
   // Correction to rate target based on prior over or under shoot.
   if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ)
-    vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target);
+    vbr_rate_correction(cpi, &target_rate);
   vp9_rc_set_frame_target(cpi, target_rate);
 }
+
+// Check if we should resize, based on average QP from past x frames.
+// Only allow for resize at most one scale down for now, scaling factor is 2.
+int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
+  int resize_now = 0;
+  cpi->resize_scale_num = 1;
+  cpi->resize_scale_den = 1;
+  // Don't resize on key frame; reset the counters on key frame.
+  if (cm->frame_type == KEY_FRAME) {
+    cpi->resize_avg_qp = 0;
+    cpi->resize_count = 0;
+    return 0;
+  }
+  // Resize based on average QP over some window.
+  // Ignore samples close to key frame, since QP is usually high after key.
+  if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
+    const int window = (int)(5 * cpi->framerate);
+    cpi->resize_avg_qp += cm->base_qindex;
+    if (cpi->rc.buffer_level < 0)
+      ++cpi->resize_buffer_underflow;
+    ++cpi->resize_count;
+    // Check for resize action every "window" frames.
+    if (cpi->resize_count >= window) {
+      int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
+      // Resize down if buffer level has underflowed sufficent amount in past
+      // window, and we are at original resolution.
+      // Resize back up if average QP is low, and we are currently in a resized
+      // down state.
+      if (cpi->resize_state == 0 &&
+          cpi->resize_buffer_underflow > (cpi->resize_count >> 3)) {
+        resize_now = 1;
+      } else if (cpi->resize_state == 1 &&
+                 avg_qp < 40 * cpi->rc.worst_quality / 100) {
+        resize_now = -1;
+      }
+      // Reset for next window measurement.
+      cpi->resize_avg_qp = 0;
+      cpi->resize_count = 0;
+      cpi->resize_buffer_underflow = 0;
+    }
+  }
+  // If decision is to resize, reset some quantities, and check is we should
+  // reduce rate correction factor,
+  if (resize_now != 0) {
+    int target_bits_per_frame;
+    int active_worst_quality;
+    int qindex;
+    int tot_scale_change;
+    // For now, resize is by 1/2 x 1/2.
+    cpi->resize_scale_num = 1;
+    cpi->resize_scale_den = 2;
+    tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
+        (cpi->resize_scale_num * cpi->resize_scale_num);
+    // Reset buffer level to optimal, update target size.
+    rc->buffer_level = rc->optimal_buffer_level;
+    rc->bits_off_target = rc->optimal_buffer_level;
+    rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
+    // Reset cyclic refresh parameters.
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
+      vp9_cyclic_refresh_reset_resize(cpi);
+    // Get the projected qindex, based on the scaled target frame size (scaled
+    // so target_bits_per_mb in vp9_rc_regulate_q will be correct target).
+    target_bits_per_frame = (resize_now == 1) ?
+        rc->this_frame_target * tot_scale_change :
+        rc->this_frame_target / tot_scale_change;
+    active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+    qindex = vp9_rc_regulate_q(cpi,
+                               target_bits_per_frame,
+                               rc->best_quality,
+                               active_worst_quality);
+    // If resize is down, check if projected q index is close to worst_quality,
+    // and if so, reduce the rate correction factor (since likely can afford
+    // lower q for resized frame).
+    if (resize_now == 1 &&
+        qindex > 90 * cpi->rc.worst_quality / 100) {
+      rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
+    }
+    // If resize is back up, check if projected q index is too much above the
+    // current base_qindex, and if so, reduce the rate correction factor
+    // (since prefer to keep q for resized frame at least close to previous q).
+    if (resize_now == -1 &&
+       qindex > 130 * cm->base_qindex / 100) {
+      rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
+    }
+  }
+  return resize_now;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
index 869f6e59e97..a10836c7444 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -100,6 +100,7 @@ typedef struct {
   int64_t buffer_level;
   int64_t bits_off_target;
   int64_t vbr_bits_off_target;
+  int64_t vbr_bits_off_target_fast;
 
   int decimation_factor;
   int decimation_count;
@@ -152,7 +153,7 @@ int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
 
 double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
 
-void vp9_rc_init_minq_luts();
+void vp9_rc_init_minq_luts(void);
 
 // Generally at the high level, the following flow is expected
 // to be enforced for rate control:
@@ -244,6 +245,8 @@ void vp9_rc_set_gf_interval_range(const struct VP9_COMP *const cpi,
 
 void vp9_set_target_rate(struct VP9_COMP *cpi);
 
+int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
index 194001c51a2..90ee1e44a1b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
@@ -15,6 +15,7 @@
 #include "./vp9_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropy.h"
@@ -128,7 +129,7 @@ static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
   }
 }
 
-void vp9_init_me_luts() {
+void vp9_init_me_luts(void) {
   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
                   VPX_BITS_8);
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -264,6 +265,7 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
+  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   RD_OPT *const rd = &cpi->rd;
   int i;
 
@@ -279,6 +281,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
                        cm->frame_type != KEY_FRAME) ? 0 : 1;
 
   set_block_thresholds(cm, rd);
+  set_partition_probs(cm, xd);
 
   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
     fill_token_costs(x->token_costs, cm->fc->coef_probs);
@@ -286,7 +289,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
       cm->frame_type == KEY_FRAME) {
     for (i = 0; i < PARTITION_CONTEXTS; ++i)
-      vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
+      vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
                       vp9_partition_tree);
   }
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
index 4d247342b0a..7ba2568fe68 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
@@ -150,7 +150,7 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
                                              int ref_frame);
 
-void vp9_init_me_luts();
+void vp9_init_me_luts(void);
 
 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                               const struct macroblockd_plane *pd,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
index 73825623748..162d4de5f5d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -14,6 +14,7 @@
 #include "./vp9_rtcd.h"
 
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropy.h"
@@ -24,6 +25,7 @@
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_systemdependent.h"
 
@@ -1802,7 +1804,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
         frame_mv[ZEROMV][frame].as_int = 0;
         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
                                       &frame_mv[NEARESTMV][frame],
-                                      &frame_mv[NEARMV][frame]);
+                                      &frame_mv[NEARMV][frame],
+                                      xd->mi[0]->mbmi.mode_context);
       }
 
       // search for the best motion vector on this segment
@@ -2117,8 +2120,8 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm,
                                      unsigned int *ref_costs_single,
                                      unsigned int *ref_costs_comp,
                                      vp9_prob *comp_mode_p) {
-  int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
-                                             SEG_LVL_REF_FRAME);
+  int seg_ref_active = segfeature_active(&cm->seg, segment_id,
+                                         SEG_LVL_REF_FRAME);
   if (seg_ref_active) {
     memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
     memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
@@ -2218,7 +2221,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
 
   // Gets an initial list of candidate vectors from neighbours and orders them
   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col,
-                   NULL, NULL);
+                   NULL, NULL, xd->mi[0]->mbmi.mode_context);
 
   // Candidate refinement carried out at encoder and decoder
   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -3004,8 +3007,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
     }
     // If the segment reference frame feature is enabled....
     // then do nothing if the current ref frame is not allowed..
-    if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
-        vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
       ref_frame_skip_mask[0] |= (1 << ref_frame);
       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
     }
@@ -3014,7 +3017,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
   // Disable this drop out case if the ref frame
   // segment level feature is enabled for this segment. This is to
   // prevent the possibility that we end up unable to pick any mode.
-  if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
     // unless ARNR filtering is enabled in which case we want
     // an unfiltered alternative. We allow near/nearest as well
@@ -3193,7 +3196,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
 
       // Do not allow compound prediction if the segment level reference frame
       // feature is in use as in this case there can only be one reference.
-      if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+      if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
         continue;
 
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3635,7 +3638,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
 
   rd_cost->rate = INT_MAX;
 
-  assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
 
   mbmi->mode = ZEROMV;
   mbmi->uv_mode = DC_PRED;
@@ -3847,7 +3850,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
         continue;
       // Do not allow compound prediction if the segment level reference frame
       // feature is in use as in this case there can only be one reference.
-      if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+      if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
         continue;
 
       if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3872,13 +3875,13 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
 
     // If the segment reference frame feature is enabled....
     // then do nothing if the current ref frame is not allowed..
-    if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
-        vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
       continue;
     // Disable this drop out case if the ref frame
     // segment level feature is enabled for this segment. This is to
     // prevent the possibility that we end up unable to pick any mode.
-    } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+    } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
       // unless ARNR filtering is enabled in which case we want
       // an unfiltered alternative. We allow near/nearest as well
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
index 2ebdff291d6..f46cad80491 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/encoder/vp9_resize.h"
 
@@ -28,7 +29,7 @@
 typedef int16_t interp_kernel[INTERP_TAPS];
 
 // Filters for interpolation (0.5-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
   {-3,  0, 35, 64, 35,  0, -3, 0},
   {-3, -1, 34, 64, 36,  1, -3, 0},
   {-3, -1, 32, 64, 38,  1, -3, 0},
@@ -64,7 +65,7 @@ const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
 };
 
 // Filters for interpolation (0.625-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
   {-1, -8, 33, 80, 33, -8, -1, 0},
   {-1, -8, 30, 80, 35, -8, -1, 1},
   {-1, -8, 28, 80, 37, -7, -2, 1},
@@ -100,7 +101,7 @@ const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
 };
 
 // Filters for interpolation (0.75-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
   {2, -11,  25,  96,  25, -11,   2, 0},
   {2, -11,  22,  96,  28, -11,   2, 0},
   {2, -10,  19,  95,  31, -11,   2, 0},
@@ -136,7 +137,7 @@ const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
 };
 
 // Filters for interpolation (0.875-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
   {3,  -8,  13, 112,  13,  -8,   3, 0},
   {3,  -7,  10, 112,  17,  -9,   3, -1},
   {2,  -6,   7, 111,  21,  -9,   3, -1},
@@ -172,7 +173,7 @@ const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
 };
 
 // Filters for interpolation (full-band) - no filtering for integer pixels
-const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
   {0,   0,   0, 128,   0,   0,   0, 0},
   {0,   1,  -3, 128,   3,  -1,   0, 0},
   {-1,   2,  -6, 127,   7,  -2,   1, 0},
@@ -214,15 +215,15 @@ static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
 static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
   int outlength16 = outlength * 16;
   if (outlength16 >= inlength * 16)
-    return vp9_filteredinterp_filters1000;
+    return filteredinterp_filters1000;
   else if (outlength16 >= inlength * 13)
-    return vp9_filteredinterp_filters875;
+    return filteredinterp_filters875;
   else if (outlength16 >= inlength * 11)
-    return vp9_filteredinterp_filters750;
+    return filteredinterp_filters750;
   else if (outlength16 >= inlength * 9)
-    return vp9_filteredinterp_filters625;
+    return filteredinterp_filters625;
   else
-    return vp9_filteredinterp_filters500;
+    return filteredinterp_filters500;
 }
 
 static void interpolate(const uint8_t *const input, int inlength,
@@ -427,7 +428,7 @@ static int get_down2_length(int length, int steps) {
   return length;
 }
 
-int get_down2_steps(int in_length, int out_length) {
+static int get_down2_steps(int in_length, int out_length) {
   int steps = 0;
   int proj_in_length;
   while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
index 4f93578326b..6c6c4ed3022 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -19,9 +19,33 @@ static int frame_is_boosted(const VP9_COMP *cpi) {
   return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
 }
 
-static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
+// Sets a partition size down to which the auto partition code will always
+// search (can go lower), based on the image dimensions. The logic here
+// is that the extent to which ringing artefacts are offensive, depends
+// partly on the screen area that over which they propogate. Propogation is
+// limited by transform block size but the screen area take up by a given block
+// size will be larger for a small image format stretched to full screen.
+static BLOCK_SIZE set_partition_min_limit(VP9_COMMON *const cm) {
+  unsigned int screen_area = (cm->width * cm->height);
+
+  // Select block size based on image format size.
+  if (screen_area < 1280 * 720) {
+    // Formats smaller in area than 720P
+    return BLOCK_4X4;
+  } else if (screen_area < 1920 * 1080) {
+    // Format >= 720P and < 1080P
+    return BLOCK_8X8;
+  } else {
+    // Formats 1080P and up
+    return BLOCK_16X16;
+  }
+}
+
+static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
                                                        SPEED_FEATURES *sf,
                                                        int speed) {
+  VP9_COMMON *const cm = &cpi->common;
+
   if (speed >= 1) {
     if (MIN(cm->width, cm->height) >= 720) {
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -45,6 +69,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
       sf->partition_search_breakout_dist_thr = (1 << 22);
       sf->partition_search_breakout_rate_thr = 100;
     }
+    sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
   }
 
   if (speed >= 3) {
@@ -62,6 +87,13 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
     }
   }
 
+  // If this is a two pass clip that fits the criteria for animated or
+  // graphics content then reset disable_split_mask for speeds 1-4.
+  if ((speed >= 1) && (cpi->oxcf.pass == 2) &&
+      (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)) {
+    sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+  }
+
   if (speed >= 4) {
     if (MIN(cm->width, cm->height) >= 720) {
       sf->partition_search_breakout_dist_thr = (1 << 26);
@@ -72,29 +104,6 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
   }
 }
 
-// Sets a partition size down to which the auto partition code will always
-// search (can go lower), based on the image dimensions. The logic here
-// is that the extent to which ringing artefacts are offensive, depends
-// partly on the screen area that over which they propogate. Propogation is
-// limited by transform block size but the screen area take up by a given block
-// size will be larger for a small image format stretched to full screen.
-static BLOCK_SIZE set_partition_min_limit(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  unsigned int screen_area = (cm->width * cm->height);
-
-  // Select block size based on image format size.
-  if (screen_area < 1280 * 720) {
-    // Formats smaller in area than 720P
-    return BLOCK_4X4;
-  } else if (screen_area < 1920 * 1080) {
-    // Format >= 720P and < 1080P
-    return BLOCK_8X8;
-  } else {
-    // Formats 1080P and up
-    return BLOCK_16X16;
-  }
-}
-
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
   const int boosted = frame_is_boosted(cpi);
@@ -139,7 +148,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->disable_filter_search_var_thresh = 100;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
     sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->rd_auto_partition_min_limit = set_partition_min_limit(cpi);
     sf->allow_partition_search_skip = 1;
   }
 
@@ -260,8 +268,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
                                  FLAG_SKIP_INTRA_LOWVAR;
     sf->adaptive_pred_interp_filter = 2;
 
-    // Reference masking is not supported in dynamic scaling mode.
-    sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
+    // Disable reference masking if using spatial scaling since
+    // pred_mv_sad will not be set (since vp9_mv_pred will not
+    // be called).
+    // TODO(marpan/agrange): Fix this condition.
+    sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
+                             cpi->svc.number_spatial_layers == 1) ? 1 : 0;
 
     sf->disable_filter_search_var_thresh = 50;
     sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -337,6 +349,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     sf->coeff_prob_appx_step = 4;
     sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
     sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
+    sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
 
     if (!is_keyframe) {
       int i;
@@ -360,7 +373,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     // Turn on this to use non-RD key frame coding mode.
     sf->use_nonrd_pick_mode = 1;
     sf->mv.search_method = NSTEP;
-    sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
     sf->mv.reduce_first_step_size = 1;
     sf->skip_encode_sb = 0;
   }
@@ -379,7 +391,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
 
 void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
   SPEED_FEATURES *const sf = &cpi->sf;
-  VP9_COMMON *const cm = &cpi->common;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   RD_OPT *const rd = &cpi->rd;
   int i;
@@ -387,7 +398,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
   if (oxcf->mode == REALTIME) {
     set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
   } else if (oxcf->mode == GOOD) {
-    set_good_speed_feature_framesize_dependent(cm, sf, oxcf->speed);
+    set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
   }
 
   if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
index 88db5dda06d..172de5d1daa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
@@ -10,6 +10,7 @@
 
 #include <math.h>
 #include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
 #include "vp9/encoder/vp9_ssim.h"
 
 void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
index cfdc90d15fb..b345b162cdb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
@@ -12,6 +12,7 @@
 #include "vp9/common/vp9_entropy.h"
 
 #include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_subexp.h"
 #include "vp9/encoder/vp9_writer.h"
 
 #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
index ac54893cf45..6fbb747e7d3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
@@ -16,11 +16,15 @@
 extern "C" {
 #endif
 
-void vp9_write_prob_diff_update(vp9_writer *w,
+#include "vp9/common/vp9_prob.h"
+
+struct vp9_writer;
+
+void vp9_write_prob_diff_update(struct vp9_writer *w,
                                 vp9_prob newp, vp9_prob oldp);
 
-void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
-                               unsigned int *ct);
+void vp9_cond_prob_diff_update(struct vp9_writer *w, vp9_prob *oldp,
+                               const unsigned int ct[2]);
 
 int vp9_prob_diff_update_savings_search(const unsigned int *ct,
                                         vp9_prob oldp, vp9_prob *bestp,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index b3491a27a4a..1b35ac9b61e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -15,89 +15,85 @@
 #include "vp9/encoder/vp9_extend.h"
 
 #define SMALL_FRAME_FB_IDX 7
+#define SMALL_FRAME_WIDTH  16
+#define SMALL_FRAME_HEIGHT 16
 
 void vp9_init_layer_context(VP9_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  int layer;
-  int layer_end;
+  int sl, tl;
   int alt_ref_idx = svc->number_spatial_layers;
 
   svc->spatial_layer_id = 0;
   svc->temporal_layer_id = 0;
 
-  if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
-    layer_end = svc->number_temporal_layers;
-  } else {
-    layer_end = svc->number_spatial_layers;
-
-    if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
-      if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
-                                   cpi->common.width, cpi->common.height,
-                                   cpi->common.subsampling_x,
-                                   cpi->common.subsampling_y,
+  if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+    if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+                                 SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
+                                 cpi->common.subsampling_x,
+                                 cpi->common.subsampling_y,
 #if CONFIG_VP9_HIGHBITDEPTH
                                  cpi->common.use_highbitdepth,
 #endif
                                  VP9_ENC_BORDER_IN_PIXELS,
                                  cpi->common.byte_alignment,
                                  NULL, NULL, NULL))
-        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
-                           "Failed to allocate empty frame for multiple frame "
-                           "contexts");
-
-      memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
-             cpi->svc.empty_frame.img.buffer_alloc_sz);
-      cpi->svc.empty_frame_width = cpi->common.width;
-      cpi->svc.empty_frame_height = cpi->common.height;
-    }
+      vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                         "Failed to allocate empty frame for multiple frame "
+                         "contexts");
+
+    memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+           cpi->svc.empty_frame.img.buffer_alloc_sz);
   }
 
-  for (layer = 0; layer < layer_end; ++layer) {
-    LAYER_CONTEXT *const lc = &svc->layer_context[layer];
-    RATE_CONTROL *const lrc = &lc->rc;
-    int i;
-    lc->current_video_frame_in_layer = 0;
-    lc->layer_size = 0;
-    lc->frames_from_key_frame = 0;
-    lc->last_frame_type = FRAME_TYPES;
-    lrc->ni_av_qi = oxcf->worst_allowed_q;
-    lrc->total_actual_bits = 0;
-    lrc->total_target_vs_actual = 0;
-    lrc->ni_tot_qi = 0;
-    lrc->tot_q = 0.0;
-    lrc->avg_q = 0.0;
-    lrc->ni_frames = 0;
-    lrc->decimation_count = 0;
-    lrc->decimation_factor = 0;
-
-    for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
-      lrc->rate_correction_factors[i] = 1.0;
-    }
+  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+    for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+      int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+      LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+      RATE_CONTROL *const lrc = &lc->rc;
+      int i;
+      lc->current_video_frame_in_layer = 0;
+      lc->layer_size = 0;
+      lc->frames_from_key_frame = 0;
+      lc->last_frame_type = FRAME_TYPES;
+      lrc->ni_av_qi = oxcf->worst_allowed_q;
+      lrc->total_actual_bits = 0;
+      lrc->total_target_vs_actual = 0;
+      lrc->ni_tot_qi = 0;
+      lrc->tot_q = 0.0;
+      lrc->avg_q = 0.0;
+      lrc->ni_frames = 0;
+      lrc->decimation_count = 0;
+      lrc->decimation_factor = 0;
+
+      for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+        lrc->rate_correction_factors[i] = 1.0;
+      }
 
-    if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
-      lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
-      lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
-      lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
-      lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
-    } else {
-      lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
-      lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
-      lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
-      lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
-                                          oxcf->best_allowed_q) / 2;
-      lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+      if (cpi->oxcf.rc_mode == VPX_CBR) {
+        lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+        lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+        lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+        lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
+      } else {
+        lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+        lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+        lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
+        lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
                                             oxcf->best_allowed_q) / 2;
-      if (oxcf->ss_enable_auto_arf[layer])
-        lc->alt_ref_idx = alt_ref_idx++;
-      else
-        lc->alt_ref_idx = INVALID_IDX;
-      lc->gold_ref_idx = INVALID_IDX;
-    }
+        lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+                                              oxcf->best_allowed_q) / 2;
+        if (oxcf->ss_enable_auto_arf[sl])
+          lc->alt_ref_idx = alt_ref_idx++;
+        else
+          lc->alt_ref_idx = INVALID_IDX;
+        lc->gold_ref_idx = INVALID_IDX;
+      }
 
-    lrc->buffer_level = oxcf->starting_buffer_level_ms *
-                            lc->target_bandwidth / 1000;
-    lrc->bits_off_target = lrc->buffer_level;
+      lrc->buffer_level = oxcf->starting_buffer_level_ms *
+                              lc->target_bandwidth / 1000;
+      lrc->bits_off_target = lrc->buffer_level;
+    }
   }
 
   // Still have extra buffer for base layer golden frame
@@ -112,53 +108,98 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
   SVC *const svc = &cpi->svc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   const RATE_CONTROL *const rc = &cpi->rc;
-  int layer;
-  int layer_end;
+  int sl, tl, layer = 0, spatial_layer_target;
   float bitrate_alloc = 1.0;
 
-  if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
-    layer_end = svc->number_temporal_layers;
-  } else {
-    layer_end = svc->number_spatial_layers;
-  }
+  if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+    for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+      spatial_layer_target = 0;
 
-  for (layer = 0; layer < layer_end; ++layer) {
-    LAYER_CONTEXT *const lc = &svc->layer_context[layer];
-    RATE_CONTROL *const lrc = &lc->rc;
+      for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+        layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+        svc->layer_context[layer].target_bandwidth =
+            oxcf->layer_target_bitrate[layer];
+      }
 
-    if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
-      lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
-    } else {
-      lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
+      layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ?
+          0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers);
+      spatial_layer_target =
+          svc->layer_context[layer].target_bandwidth =
+              oxcf->layer_target_bitrate[layer];
+
+      for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+        LAYER_CONTEXT *const lc =
+            &svc->layer_context[sl * oxcf->ts_number_layers + tl];
+        RATE_CONTROL *const lrc = &lc->rc;
+
+        lc->spatial_layer_target_bandwidth = spatial_layer_target;
+        bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target;
+        lrc->starting_buffer_level =
+            (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+        lrc->optimal_buffer_level =
+            (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+        lrc->maximum_buffer_size =
+            (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+        lrc->bits_off_target =
+            MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+        lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+        lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
+        lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+        lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+        lrc->worst_quality = rc->worst_quality;
+        lrc->best_quality = rc->best_quality;
+      }
     }
-    bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
-    // Update buffer-related quantities.
-    lrc->starting_buffer_level =
-        (int64_t)(rc->starting_buffer_level * bitrate_alloc);
-    lrc->optimal_buffer_level =
-        (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
-    lrc->maximum_buffer_size =
-        (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
-    lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
-    lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
-    // Update framerate-related quantities.
+  } else {
+    int layer_end;
+
     if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
-      lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+      layer_end = svc->number_temporal_layers;
     } else {
-      lc->framerate = cpi->framerate;
+      layer_end = svc->number_spatial_layers;
+    }
+
+    for (layer = 0; layer < layer_end; ++layer) {
+      LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+      RATE_CONTROL *const lrc = &lc->rc;
+
+      lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+
+      bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
+      // Update buffer-related quantities.
+      lrc->starting_buffer_level =
+          (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+      lrc->optimal_buffer_level =
+          (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+      lrc->maximum_buffer_size =
+          (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+      lrc->bits_off_target = MIN(lrc->bits_off_target,
+                                 lrc->maximum_buffer_size);
+      lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+      // Update framerate-related quantities.
+      if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
+        lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+      } else {
+        lc->framerate = cpi->framerate;
+      }
+      lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+      lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+      // Update qp-related quantities.
+      lrc->worst_quality = rc->worst_quality;
+      lrc->best_quality = rc->best_quality;
     }
-    lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
-    lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
-    // Update qp-related quantities.
-    lrc->worst_quality = rc->worst_quality;
-    lrc->best_quality = rc->best_quality;
   }
 }
 
 static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {
-  return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
-         &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
-         &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+  if (is_one_pass_cbr_svc(cpi))
+    return &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+        cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id];
+  else
+    return (cpi->svc.number_temporal_layers > 1 &&
+            cpi->oxcf.rc_mode == VPX_CBR) ?
+             &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
+             &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
 }
 
 void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
@@ -166,18 +207,22 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   LAYER_CONTEXT *const lc = get_layer_context(cpi);
   RATE_CONTROL *const lrc = &lc->rc;
-  const int layer = svc->temporal_layer_id;
+  // Index into spatial+temporal arrays.
+  const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers +
+      svc->temporal_layer_id;
+  const int tl = svc->temporal_layer_id;
 
-  lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+  lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
   lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
   lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
   // Update the average layer frame size (non-cumulative per-frame-bw).
-  if (layer == 0) {
+  if (tl == 0) {
     lc->avg_frame_size = lrc->avg_frame_bandwidth;
   } else {
     const double prev_layer_framerate =
-        cpi->framerate / oxcf->ts_rate_decimator[layer - 1];
-    const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1];
+        cpi->framerate / oxcf->ts_rate_decimator[tl - 1];
+    const int prev_layer_target_bandwidth =
+        oxcf->layer_target_bitrate[st_idx - 1];
     lc->avg_frame_size =
         (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
               (lc->framerate - prev_layer_framerate));
@@ -243,9 +288,8 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
 
 void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
   LAYER_CONTEXT *const lc =
-      (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
-      &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
-      &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+      &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+                              cpi->svc.number_temporal_layers];
   ++lc->current_video_frame_in_layer;
   ++lc->frames_from_key_frame;
 }
@@ -253,10 +297,11 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
 int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
   return is_two_pass_svc(cpi) &&
          cpi->svc.spatial_layer_id > 0 &&
-         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
+         cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+                                cpi->svc.number_temporal_layers +
+                                cpi->svc.temporal_layer_id].is_key_frame;
 }
 
-#if CONFIG_SPATIAL_SVC
 static void get_layer_resolution(const int width_org, const int height_org,
                                  const int num, const int den,
                                  int *width_out, int *height_out) {
@@ -276,6 +321,201 @@ static void get_layer_resolution(const int width_org, const int height_org,
   *height_out = h;
 }
 
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
+  int frame_num_within_temporal_struct = 0;
+  int spatial_id, temporal_id;
+  spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+  frame_num_within_temporal_struct =
+      cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+      cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4;
+  temporal_id = cpi->svc.temporal_layer_id =
+      (frame_num_within_temporal_struct & 1) ? 2 :
+      (frame_num_within_temporal_struct >> 1);
+  cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+      cpi->ext_refresh_alt_ref_frame = 0;
+  if (!temporal_id) {
+    cpi->ext_refresh_frame_flags_pending = 1;
+    cpi->ext_refresh_last_frame = 1;
+    if (!spatial_id) {
+      cpi->ref_frame_flags = VP9_LAST_FLAG;
+    } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+      // base layer is a key frame.
+      cpi->ref_frame_flags = VP9_GOLD_FLAG;
+    } else {
+      cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+  } else if (temporal_id == 1) {
+    cpi->ext_refresh_frame_flags_pending = 1;
+    cpi->ext_refresh_alt_ref_frame = 1;
+    if (!spatial_id) {
+      cpi->ref_frame_flags = VP9_LAST_FLAG;
+    } else {
+      cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+  } else {
+    if (frame_num_within_temporal_struct == 1) {
+      // the first tl2 picture
+      if (!spatial_id) {
+        cpi->ext_refresh_frame_flags_pending = 1;
+        cpi->ext_refresh_alt_ref_frame = 1;
+        cpi->ref_frame_flags = VP9_LAST_FLAG;
+      } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+        cpi->ext_refresh_frame_flags_pending = 1;
+        cpi->ext_refresh_alt_ref_frame = 1;
+        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+      } else {  // Top layer
+        cpi->ext_refresh_frame_flags_pending = 0;
+        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+      }
+    } else {
+      //  The second tl2 picture
+      if (!spatial_id) {
+        cpi->ext_refresh_frame_flags_pending = 1;
+        cpi->ref_frame_flags = VP9_LAST_FLAG;
+        cpi->ext_refresh_last_frame = 1;
+      } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+        cpi->ext_refresh_frame_flags_pending = 1;
+        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+        cpi->ext_refresh_last_frame = 1;
+      } else {  // top layer
+        cpi->ext_refresh_frame_flags_pending = 0;
+        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+      }
+    }
+  }
+  if (temporal_id == 0) {
+    cpi->lst_fb_idx = spatial_id;
+    if (spatial_id)
+      cpi->gld_fb_idx = spatial_id - 1;
+    else
+      cpi->gld_fb_idx = 0;
+    cpi->alt_fb_idx = 0;
+  } else if (temporal_id == 1) {
+    cpi->lst_fb_idx = spatial_id;
+    cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+    cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+  } else if (frame_num_within_temporal_struct == 1) {
+    cpi->lst_fb_idx = spatial_id;
+    cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+    cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+  } else {
+    cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+    cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+    cpi->alt_fb_idx = 0;
+  }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
+  int spatial_id, temporal_id;
+  spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+  temporal_id = cpi->svc.temporal_layer_id =
+      cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+      cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1;
+  cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+                                cpi->ext_refresh_alt_ref_frame = 0;
+  if (!temporal_id) {
+    cpi->ext_refresh_frame_flags_pending = 1;
+    cpi->ext_refresh_last_frame = 1;
+    if (!spatial_id) {
+      cpi->ref_frame_flags = VP9_LAST_FLAG;
+    } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+      // base layer is a key frame.
+      cpi->ref_frame_flags = VP9_GOLD_FLAG;
+    } else {
+      cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+  } else if (temporal_id == 1) {
+    cpi->ext_refresh_frame_flags_pending = 1;
+    cpi->ext_refresh_alt_ref_frame = 1;
+    if (!spatial_id) {
+      cpi->ref_frame_flags = VP9_LAST_FLAG;
+    } else {
+      cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+  }
+
+  if (temporal_id == 0) {
+    cpi->lst_fb_idx = spatial_id;
+    if (spatial_id)
+      cpi->gld_fb_idx = spatial_id - 1;
+    else
+      cpi->gld_fb_idx = 0;
+    cpi->alt_fb_idx = 0;
+  } else if (temporal_id == 1) {
+    cpi->lst_fb_idx = spatial_id;
+    cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+    cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+  }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 0 - that has no temporal layering.
+static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
+    VP9_COMP *const cpi) {
+  int spatial_id;
+  spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+  cpi->ext_refresh_last_frame =
+      cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0;
+  cpi->ext_refresh_frame_flags_pending = 1;
+  cpi->ext_refresh_last_frame = 1;
+  if (!spatial_id) {
+    cpi->ref_frame_flags = VP9_LAST_FLAG;
+  } else if (cpi->svc.layer_context[0].is_key_frame) {
+    cpi->ref_frame_flags = VP9_GOLD_FLAG;
+  } else {
+    cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+  }
+  cpi->lst_fb_idx = spatial_id;
+  if (spatial_id)
+    cpi->gld_fb_idx = spatial_id - 1;
+  else
+    cpi->gld_fb_idx = 0;
+}
+
+int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
+  int width = 0, height = 0;
+  LAYER_CONTEXT *lc = NULL;
+
+  if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+    set_flags_and_fb_idx_for_temporal_mode3(cpi);
+  } else if (cpi->svc.temporal_layering_mode ==
+           VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+    set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
+  } else if (cpi->svc.temporal_layering_mode ==
+           VP9E_TEMPORAL_LAYERING_MODE_0101) {
+    set_flags_and_fb_idx_for_temporal_mode2(cpi);
+  } else if (cpi->svc.temporal_layering_mode ==
+      VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+    // VP9E_TEMPORAL_LAYERING_MODE_BYPASS :
+    // if the code goes here, it means the encoder will be relying on the
+    // flags from outside for layering.
+    // However, since when spatial+temporal layering is used, the buffer indices
+    // cannot be derived automatically, the bypass mode will only work when the
+    // number of spatial layers equals 1.
+    assert(cpi->svc.number_spatial_layers == 1);
+  }
+
+  lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+                               cpi->svc.number_temporal_layers +
+                               cpi->svc.temporal_layer_id];
+
+  get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
+                       lc->scaling_factor_num, lc->scaling_factor_den,
+                       &width, &height);
+
+  if (vp9_set_size_literal(cpi, width, height) != 0)
+    return VPX_CODEC_INVALID_PARAM;
+
+  return 0;
+}
+
+#if CONFIG_SPATIAL_SVC
 int vp9_svc_start_frame(VP9_COMP *const cpi) {
   int width = 0, height = 0;
   LAYER_CONTEXT *lc;
@@ -362,20 +602,11 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
         cpi->lst_fb_idx =
             cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
 
-        // Gradually make the empty frame smaller to save bits. Make it half of
-        // its previous size because of the scaling factor restriction.
-        cpi->svc.empty_frame_width >>= 1;
-        cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1;
-        if (cpi->svc.empty_frame_width < 16)
-          cpi->svc.empty_frame_width = 16;
+        if (cpi->svc.encode_intra_empty_frame != 0)
+          cpi->common.intra_only = 1;
 
-        cpi->svc.empty_frame_height >>= 1;
-        cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1;
-        if (cpi->svc.empty_frame_height < 16)
-          cpi->svc.empty_frame_height = 16;
-
-        width = cpi->svc.empty_frame_width;
-        height = cpi->svc.empty_frame_height;
+        width = SMALL_FRAME_WIDTH;
+        height = SMALL_FRAME_HEIGHT;
       }
     }
   }
@@ -395,11 +626,12 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
   return 0;
 }
 
+#endif
+
 struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
                                               struct lookahead_ctx *ctx,
                                               int drain) {
   struct lookahead_entry *buf = NULL;
-
   if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
     buf = vp9_lookahead_peek(ctx, 0);
     if (buf != NULL) {
@@ -409,7 +641,5 @@ struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
       }
     }
   }
-
   return buf;
 }
-#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index e9645ce9f24..b6a5ea54835 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -22,6 +22,7 @@ extern "C" {
 typedef struct {
   RATE_CONTROL rc;
   int target_bandwidth;
+  int spatial_layer_target_bandwidth;  // Target for the spatial layer.
   double framerate;
   int avg_frame_size;
   int max_q;
@@ -57,17 +58,18 @@ typedef struct {
     NEED_TO_ENCODE
   }encode_empty_frame_state;
   struct lookahead_entry empty_frame;
-  int empty_frame_width;
-  int empty_frame_height;
+  int encode_intra_empty_frame;
 
   // Store scaled source frames to be used for temporal filter to generate
   // a alt ref frame.
   YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
 
   // Layer context used for rate control in one pass temporal CBR mode or
-  // two pass spatial mode. Defined for temporal or spatial layers for now.
-  // Does not support temporal combined with spatial RC.
-  LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
+  // two pass spatial mode.
+  LAYER_CONTEXT layer_context[VPX_MAX_LAYERS];
+  // Indicates what sort of temporal layering is used.
+  // Currently, this only works for CBR mode.
+  VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
 } SVC;
 
 struct VP9_COMP;
@@ -111,6 +113,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi,
 // Start a frame and initialize svc parameters
 int vp9_svc_start_frame(struct VP9_COMP *const cpi);
 
+int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
index d7979ab53a5..24b6203cb66 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -23,7 +23,9 @@
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_temporal_filter.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
 
@@ -109,7 +111,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             kernel, mv_precision_uv, x, y);
 }
 
-void vp9_temporal_filter_init() {
+void vp9_temporal_filter_init(void) {
   int i;
 
   fixed_divide[0] = 0;
@@ -680,7 +682,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
   if (frames_to_blur > 0) {
     // Setup scaling factors. Scaling on each of the arnr frames is not
     // supported.
-    if (is_two_pass_svc(cpi)) {
+    if (cpi->use_svc) {
       // In spatial svc the scaling factors might be less then 1/2.
       // So we will use non-normative scaling.
       int frame_used = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
index a971e0ae365..f537b8870a6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
@@ -15,7 +15,7 @@
 extern "C" {
 #endif
 
-void vp9_temporal_filter_init();
+void vp9_temporal_filter_init(void);
 void vp9_temporal_filter(VP9_COMP *cpi, int distance);
 
 #ifdef __cplusplus
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
index 862be4d384a..181a99ce880 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
@@ -17,6 +17,7 @@
 
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_scan.h"
 #include "vp9/common/vp9_seg_common.h"
 
 #include "vp9/encoder/vp9_cost.h"
@@ -483,7 +484,7 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t,
 static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
                              TX_SIZE tx_size) {
   const int eob_max = 16 << (tx_size << 1);
-  return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
+  return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
 }
 
 static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -614,8 +615,8 @@ void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_skip_context(xd);
-  const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
-                                              SEG_LVL_SKIP);
+  const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+                                          SEG_LVL_SKIP);
   struct tokenize_b_args arg = {cpi, td, t};
   if (mbmi->skip) {
     if (!dry_run)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
index f38f96d6c26..c571b7c9545 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
@@ -9,6 +9,7 @@
  */
 
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
@@ -18,25 +19,16 @@
 
 #include "vp9/encoder/vp9_variance.h"
 
-void variance(const uint8_t *a, int  a_stride,
-              const uint8_t *b, int  b_stride,
-              int  w, int  h, unsigned int *sse, int *sum) {
-  int i, j;
-
-  *sum = 0;
-  *sse = 0;
-
-  for (i = 0; i < h; i++) {
-    for (j = 0; j < w; j++) {
-      const int diff = a[j] - b[j];
-      *sum += diff;
-      *sse += diff * diff;
-    }
-
-    a += a_stride;
-    b += b_stride;
-  }
-}
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
 
 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
 // or vertical direction to produce the filtered output block. Used to implement
@@ -52,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                               int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
-                                              const int16_t *vp9_filter) {
+                                              const uint8_t *vp9_filter) {
   unsigned int i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -84,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                                unsigned int pixel_step,
                                                unsigned int output_height,
                                                unsigned int output_width,
-                                               const int16_t *vp9_filter) {
+                                               const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -100,25 +92,6 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
   }
 }
 
-unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
-  unsigned int i, sum = 0;
-
-  for (i = 0; i < 256; ++i) {
-    sum += src_ptr[i] * src_ptr[i];
-  }
-
-  return sum;
-}
-
-#define VAR(W, H) \
-unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
-                                       const uint8_t *b, int b_stride, \
-                                       unsigned int *sse) { \
-  int sum; \
-  variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
-  return *sse - (((int64_t)sum * sum) / (W * H)); \
-}
-
 #define SUBPIX_VAR(W, H) \
 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
   const uint8_t *src, int  src_stride, \
@@ -129,11 +102,11 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
   uint8_t temp2[H * W]; \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
-  return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
+  return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
 }
 
 #define SUBPIX_AVG_VAR(W, H) \
@@ -148,182 +121,55 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
-  vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
+  vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
 \
-  return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
-}
-
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
-                       const uint8_t *ref_ptr, int ref_stride,
-                       unsigned int *sse, int *sum) {
-  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
-}
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
-                     const uint8_t *ref_ptr, int ref_stride,
-                     unsigned int *sse, int *sum) {
-  variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
-}
-
-unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
-                            const uint8_t *ref, int ref_stride,
-                            unsigned int *sse) {
-  int sum;
-  variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
-  return *sse;
-}
-
-unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
-                           const uint8_t *ref, int ref_stride,
-                           unsigned int *sse) {
-  int sum;
-  variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
-  return *sse;
-}
-
-unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
-                           const uint8_t *ref, int ref_stride,
-                           unsigned int *sse) {
-  int sum;
-  variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
-  return *sse;
-}
-
-unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
-                          const uint8_t *ref, int ref_stride,
-                          unsigned int *sse) {
-  int sum;
-  variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
-  return *sse;
+  return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 }
 
-VAR(4, 4)
 SUBPIX_VAR(4, 4)
 SUBPIX_AVG_VAR(4, 4)
 
-VAR(4, 8)
 SUBPIX_VAR(4, 8)
 SUBPIX_AVG_VAR(4, 8)
 
-VAR(8, 4)
 SUBPIX_VAR(8, 4)
 SUBPIX_AVG_VAR(8, 4)
 
-VAR(8, 8)
 SUBPIX_VAR(8, 8)
 SUBPIX_AVG_VAR(8, 8)
 
-VAR(8, 16)
 SUBPIX_VAR(8, 16)
 SUBPIX_AVG_VAR(8, 16)
 
-VAR(16, 8)
 SUBPIX_VAR(16, 8)
 SUBPIX_AVG_VAR(16, 8)
 
-VAR(16, 16)
 SUBPIX_VAR(16, 16)
 SUBPIX_AVG_VAR(16, 16)
 
-VAR(16, 32)
 SUBPIX_VAR(16, 32)
 SUBPIX_AVG_VAR(16, 32)
 
-VAR(32, 16)
 SUBPIX_VAR(32, 16)
 SUBPIX_AVG_VAR(32, 16)
 
-VAR(32, 32)
 SUBPIX_VAR(32, 32)
 SUBPIX_AVG_VAR(32, 32)
 
-VAR(32, 64)
 SUBPIX_VAR(32, 64)
 SUBPIX_AVG_VAR(32, 64)
 
-VAR(64, 32)
 SUBPIX_VAR(64, 32)
 SUBPIX_AVG_VAR(64, 32)
 
-VAR(64, 64)
 SUBPIX_VAR(64, 64)
 SUBPIX_AVG_VAR(64, 64)
 
-void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
-                       int height, const uint8_t *ref, int ref_stride) {
-  int i, j;
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      const int tmp = pred[j] + ref[j];
-      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
-    }
-    comp_pred += width;
-    pred += width;
-    ref += ref_stride;
-  }
-}
-
 #if CONFIG_VP9_HIGHBITDEPTH
-void highbd_variance64(const uint8_t *a8, int  a_stride,
-                       const uint8_t *b8, int  b_stride,
-                       int w, int h, uint64_t *sse,
-                       uint64_t *sum) {
-  int i, j;
-
-  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
-  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
-  *sum = 0;
-  *sse = 0;
-
-  for (i = 0; i < h; i++) {
-    for (j = 0; j < w; j++) {
-      const int diff = a[j] - b[j];
-      *sum += diff;
-      *sse += diff * diff;
-    }
-    a += a_stride;
-    b += b_stride;
-  }
-}
-
-void highbd_variance(const uint8_t *a8, int  a_stride,
-                     const uint8_t *b8, int  b_stride,
-                     int w, int h, unsigned int *sse,
-                     int *sum) {
-  uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sse = (unsigned int)sse_long;
-  *sum = (int)sum_long;
-}
-
-void highbd_10_variance(const uint8_t *a8, int  a_stride,
-                        const uint8_t *b8, int  b_stride,
-                        int w, int h, unsigned int *sse,
-                        int *sum) {
-  uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
-  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-void highbd_12_variance(const uint8_t *a8, int  a_stride,
-                        const uint8_t *b8, int  b_stride,
-                        int w, int h, unsigned int *sse,
-                        int *sum) {
-  uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
-  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
-}
-
 static void highbd_var_filter_block2d_bil_first_pass(
     const uint8_t *src_ptr8,
     uint16_t *output_ptr,
@@ -331,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
     int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int i, j;
   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
   for (i = 0; i < output_height; i++) {
@@ -357,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
     unsigned int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -374,35 +220,6 @@ static void highbd_var_filter_block2d_bil_second_pass(
   }
 }
 
-#define HIGHBD_VAR(W, H) \
-unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
-                                              const uint8_t *b, int b_stride, \
-                                              unsigned int *sse) { \
-  int sum; \
-  highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
-  return *sse - (((int64_t)sum * sum) / (W * H)); \
-} \
-\
-unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
-                                                 int a_stride, \
-                                                 const uint8_t *b, \
-                                                 int b_stride, \
-                                                 unsigned int *sse) { \
-  int sum; \
-  highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
-  return *sse - (((int64_t)sum * sum) / (W * H)); \
-} \
-\
-unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
-                                                 int a_stride, \
-                                                 const uint8_t *b, \
-                                                 int b_stride, \
-                                                 unsigned int *sse) { \
-  int sum; \
-  highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
-  return *sse - (((int64_t)sum * sum) / (W * H)); \
-}
-
 #define HIGHBD_SUBPIX_VAR(W, H) \
 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
   const uint8_t *src, int  src_stride, \
@@ -413,11 +230,11 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
+  return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
                                           dst_stride, sse); \
 } \
 \
@@ -430,11 +247,11 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+  return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
 } \
 \
@@ -447,11 +264,11 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+  return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
 }
 
@@ -467,14 +284,14 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
 \
-  return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
+  return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
                                           dst_stride, sse); \
 } \
 \
@@ -489,14 +306,14 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
 \
-  return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
+  return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
                                              W, dst, dst_stride, sse); \
 } \
 \
@@ -511,141 +328,53 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
-  vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+  vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
 \
-  return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
+  return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
                                              W, dst, dst_stride, sse); \
 }
 
-#define HIGHBD_GET_VAR(S) \
-void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
-                                    const uint8_t *ref, int ref_stride, \
-                                    unsigned int *sse, int *sum) { \
-  highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-} \
-\
-void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
-                                       const uint8_t *ref, int ref_stride, \
-                                       unsigned int *sse, int *sum) { \
-  highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-} \
-\
-void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
-                                       const uint8_t *ref, int ref_stride, \
-                                       unsigned int *sse, int *sum) { \
-  highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-}
-
-#define HIGHBD_MSE(W, H) \
-unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \
-                                         int src_stride, \
-                                         const uint8_t *ref, \
-                                         int ref_stride, \
-                                         unsigned int *sse) { \
-  int sum; \
-  highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
-  return *sse; \
-} \
-\
-unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
-                                            int src_stride, \
-                                            const uint8_t *ref, \
-                                            int ref_stride, \
-                                            unsigned int *sse) { \
-  int sum; \
-  highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
-  return *sse; \
-} \
-\
-unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
-                                            int src_stride, \
-                                            const uint8_t *ref, \
-                                            int ref_stride, \
-                                            unsigned int *sse) { \
-  int sum; \
-  highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
-  return *sse; \
-}
-
-HIGHBD_GET_VAR(8)
-HIGHBD_GET_VAR(16)
-
-HIGHBD_MSE(16, 16)
-HIGHBD_MSE(16, 8)
-HIGHBD_MSE(8, 16)
-HIGHBD_MSE(8, 8)
-
-HIGHBD_VAR(4, 4)
 HIGHBD_SUBPIX_VAR(4, 4)
 HIGHBD_SUBPIX_AVG_VAR(4, 4)
 
-HIGHBD_VAR(4, 8)
 HIGHBD_SUBPIX_VAR(4, 8)
 HIGHBD_SUBPIX_AVG_VAR(4, 8)
 
-HIGHBD_VAR(8, 4)
 HIGHBD_SUBPIX_VAR(8, 4)
 HIGHBD_SUBPIX_AVG_VAR(8, 4)
 
-HIGHBD_VAR(8, 8)
 HIGHBD_SUBPIX_VAR(8, 8)
 HIGHBD_SUBPIX_AVG_VAR(8, 8)
 
-HIGHBD_VAR(8, 16)
 HIGHBD_SUBPIX_VAR(8, 16)
 HIGHBD_SUBPIX_AVG_VAR(8, 16)
 
-HIGHBD_VAR(16, 8)
 HIGHBD_SUBPIX_VAR(16, 8)
 HIGHBD_SUBPIX_AVG_VAR(16, 8)
 
-HIGHBD_VAR(16, 16)
 HIGHBD_SUBPIX_VAR(16, 16)
 HIGHBD_SUBPIX_AVG_VAR(16, 16)
 
-HIGHBD_VAR(16, 32)
 HIGHBD_SUBPIX_VAR(16, 32)
 HIGHBD_SUBPIX_AVG_VAR(16, 32)
 
-HIGHBD_VAR(32, 16)
 HIGHBD_SUBPIX_VAR(32, 16)
 HIGHBD_SUBPIX_AVG_VAR(32, 16)
 
-HIGHBD_VAR(32, 32)
 HIGHBD_SUBPIX_VAR(32, 32)
 HIGHBD_SUBPIX_AVG_VAR(32, 32)
 
-HIGHBD_VAR(32, 64)
 HIGHBD_SUBPIX_VAR(32, 64)
 HIGHBD_SUBPIX_AVG_VAR(32, 64)
 
-HIGHBD_VAR(64, 32)
 HIGHBD_SUBPIX_VAR(64, 32)
 HIGHBD_SUBPIX_AVG_VAR(64, 32)
 
-HIGHBD_VAR(64, 64)
 HIGHBD_SUBPIX_VAR(64, 64)
 HIGHBD_SUBPIX_AVG_VAR(64, 64)
-
-void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
-                              int width, int height, const uint8_t *ref8,
-                              int ref_stride) {
-  int i, j;
-  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      const int tmp = pred[j] + ref[j];
-      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
-    }
-    comp_pred += width;
-    pred += width;
-    ref += ref_stride;
-  }
-}
 #endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
index 53148f23c56..0a8739510f4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
@@ -12,33 +12,12 @@
 #define VP9_ENCODER_VP9_VARIANCE_H_
 
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-void variance(const uint8_t *a, int a_stride,
-              const uint8_t *b, int b_stride,
-              int  w, int  h,
-              unsigned int *sse, int *sum);
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void highbd_variance(const uint8_t *a8, int a_stride,
-                     const uint8_t *b8, int b_stride,
-                     int w, int h,
-                     unsigned int *sse, int *sum);
-
-void highbd_10_variance(const uint8_t *a8, int a_stride,
-                        const uint8_t *b8, int b_stride,
-                        int w, int h,
-                        unsigned int *sse, int *sum);
-
-void highbd_12_variance(const uint8_t *a8, int a_stride,
-                        const uint8_t *b8, int b_stride,
-                        int w, int h,
-                        unsigned int *sse, int *sum);
-#endif
-
 typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
                                     int source_stride,
                                     const uint8_t *ref_ptr,
@@ -95,15 +74,6 @@ typedef struct vp9_variance_vtable {
   vp9_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
 
-void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
-                       int height, const uint8_t *ref, int ref_stride);
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred,
-                              int width, int height,
-                              const uint8_t *ref, int ref_stride);
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
index 9d161f95cf6..e347ea41441 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-typedef struct {
+typedef struct vp9_writer {
   unsigned int lowvalue;
   unsigned int range;
   int count;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index 4672aa6b8cf..4531d794a9c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -9,6 +9,8 @@
  */
 
 #include <emmintrin.h>
+
+#include "./vp9_rtcd.h"
 #include "vpx_ports/mem.h"
 
 void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
@@ -262,17 +264,18 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
     __m128i b2 = _mm_add_epi16(coeff2, coeff3);
     __m128i b3 = _mm_sub_epi16(coeff2, coeff3);
 
+    b0 = _mm_srai_epi16(b0, 1);
+    b1 = _mm_srai_epi16(b1, 1);
+    b2 = _mm_srai_epi16(b2, 1);
+    b3 = _mm_srai_epi16(b3, 1);
+
     coeff0 = _mm_add_epi16(b0, b2);
     coeff1 = _mm_add_epi16(b1, b3);
-    coeff0 = _mm_srai_epi16(coeff0, 1);
-    coeff1 = _mm_srai_epi16(coeff1, 1);
     _mm_store_si128((__m128i *)coeff, coeff0);
     _mm_store_si128((__m128i *)(coeff + 64), coeff1);
 
     coeff2 = _mm_sub_epi16(b0, b2);
     coeff3 = _mm_sub_epi16(b1, b3);
-    coeff2 = _mm_srai_epi16(coeff2, 1);
-    coeff3 = _mm_srai_epi16(coeff3, 1);
     _mm_store_si128((__m128i *)(coeff + 128), coeff2);
     _mm_store_si128((__m128i *)(coeff + 192), coeff3);
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h
index 66827ad8037..ae6bfe5fa2d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h
@@ -9,6 +9,8 @@
  */
 
 #include <immintrin.h>  // AVX2
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vpx_ports/mem.h"
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
index 099993aa6a0..003ebd13fe3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -9,6 +9,8 @@
  */
 
 #include <emmintrin.h>  // SSE2
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vp9/encoder/x86/vp9_dct_sse2.h"
 #include "vp9/encoder/vp9_dct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
index 3a19f52746c..8f3b61ad86d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
@@ -15,12 +15,12 @@
 
 #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
 #define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_avx2.c"
+#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h"
 #undef  FDCT32x32_2D_AVX2
 #undef  FDCT32x32_HIGH_PRECISION
 
 #define FDCT32x32_2D_AVX2 vp9_fdct32x32_avx2
 #define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h" // NOLINT
 #undef  FDCT32x32_2D_AVX2
 #undef  FDCT32x32_HIGH_PRECISION
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
index 564b7955e5b..cff4fcbdce0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
@@ -10,6 +10,8 @@
 
 #include <assert.h>
 #include <emmintrin.h>  // SSE2
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vp9/encoder/vp9_dct.h"
 #include "vp9/encoder/x86/vp9_dct_sse2.h"
@@ -96,7 +98,7 @@ static INLINE void transpose_4x4(__m128i *res) {
   res[3] = _mm_unpackhi_epi64(res[2], res[2]);
 }
 
-void fdct4_sse2(__m128i *in) {
+static void fdct4_sse2(__m128i *in) {
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -129,7 +131,7 @@ void fdct4_sse2(__m128i *in) {
   transpose_4x4(in);
 }
 
-void fadst4_sse2(__m128i *in) {
+static void fadst4_sse2(__m128i *in) {
   const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
   const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
   const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -831,7 +833,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
   // 07 17 27 37 47 57 67 77
 }
 
-void fdct8_sse2(__m128i *in) {
+static void fdct8_sse2(__m128i *in) {
   // constants
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -971,7 +973,7 @@ void fdct8_sse2(__m128i *in) {
   array_transpose_8x8(in, in);
 }
 
-void fadst8_sse2(__m128i *in) {
+static void fadst8_sse2(__m128i *in) {
   // Constants
   const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
   const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1353,7 +1355,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
   right_shift_8x8(res1 + 8, 2);
 }
 
-void fdct16_8col(__m128i *in) {
+static void fdct16_8col(__m128i *in) {
   // perform 16x16 1-D DCT for 8 columns
   __m128i i[8], s[8], p[8], t[8], u[16], v[16];
   const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
@@ -1675,7 +1677,7 @@ void fdct16_8col(__m128i *in) {
   in[15] = _mm_packs_epi32(v[14], v[15]);
 }
 
-void fadst16_8col(__m128i *in) {
+static void fadst16_8col(__m128i *in) {
   // perform 16x16 1-D ADST for 8 columns
   __m128i s[16], x[16], u[32], v[32];
   const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2145,13 +2147,13 @@ void fadst16_8col(__m128i *in) {
   in[15] = _mm_sub_epi16(kZero, s[1]);
 }
 
-void fdct16_sse2(__m128i *in0, __m128i *in1) {
+static void fdct16_sse2(__m128i *in0, __m128i *in1) {
   fdct16_8col(in0);
   fdct16_8col(in1);
   array_transpose_16x16(in0, in1);
 }
 
-void fadst16_sse2(__m128i *in0, __m128i *in1) {
+static void fadst16_sse2(__m128i *in0, __m128i *in1) {
   fadst16_8col(in0);
   fadst16_8col(in1);
   array_transpose_16x16(in0, in1);
@@ -2334,7 +2336,7 @@ void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,
   }
 }
 
-void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
+void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
                               int stride, int tx_type) {
   if (tx_type == DCT_DCT) {
     vp9_highbd_fdct16x16_sse2(input, output, stride);
@@ -2368,8 +2370,8 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
 
 /*
  * The DCTnxn functions are defined using the macros below. The main code for
- * them is in separate files (vp9/encoder/x86/vp9_dct_impl_sse2.c &
- * vp9/encoder/x86/vp9_dct32x32_sse2.c) which are used by both the 8 bit code
+ * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &
+ * vp9/encoder/x86/vp9_dct32x32_sse2_impl.h) which are used by both the 8 bit code
  * and the high bit depth code.
  */
 
@@ -2378,20 +2380,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
 #define FDCT4x4_2D vp9_fdct4x4_sse2
 #define FDCT8x8_2D vp9_fdct8x8_sse2
 #define FDCT16x16_2D vp9_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_impl_sse2.c"
+#include "vp9/encoder/x86/vp9_dct_sse2_impl.h"
 #undef  FDCT4x4_2D
 #undef  FDCT8x8_2D
 #undef  FDCT16x16_2D
 
 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2
 #define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c"
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h"
 #undef  FDCT32x32_2D
 #undef  FDCT32x32_HIGH_PRECISION
 
 #define FDCT32x32_2D vp9_fdct32x32_sse2
 #define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
 #undef  FDCT32x32_2D
 #undef  FDCT32x32_HIGH_PRECISION
 
@@ -2405,20 +2407,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
 #define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
 #define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
 #define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_impl_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT
 #undef  FDCT4x4_2D
 #undef  FDCT8x8_2D
 #undef  FDCT16x16_2D
 
 #define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
 #define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
 #undef  FDCT32x32_2D
 #undef  FDCT32x32_HIGH_PRECISION
 
 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
 #define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
 #undef  FDCT32x32_2D
 #undef  FDCT32x32_HIGH_PRECISION
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h
index e03a76d2e89..11bf5a25e62 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h
@@ -9,6 +9,8 @@
  */
 
 #include <emmintrin.h>  // SSE2
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vp9/encoder/vp9_dct.h"
 #include "vp9/encoder/x86/vp9_dct_sse2.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
index 1c1005aeeda..96038fee16b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -15,6 +15,8 @@
 #include <math.h>
 #endif
 #include <tmmintrin.h>  // SSSE3
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"
 
 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
index c67490fad34..dfebaab0ac6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
@@ -9,8 +9,9 @@
  */
 
 #include <immintrin.h>  // AVX2
-#include "vpx/vpx_integer.h"
 
+#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
 
 int64_t vp9_block_error_avx2(const int16_t *coeff,
                              const int16_t *dqcoeff,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
index ffa43b65a59..cbdd1c93e1c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
@@ -10,6 +10,7 @@
 
 #include <emmintrin.h>
 
+#include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 
 #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
index 987729f962c..4594bb1aabd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@ SECTION_RODATA
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 SECTION .text
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
index 4bc3e7e2d15..29b7b278217 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
@@ -13,237 +13,6 @@
 #include "vp9/encoder/vp9_variance.h"
 #include "vpx_ports/mem.h"
 
-typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
-                                        const uint16_t *ref, int ref_stride,
-                                        uint32_t *sse, int *sum);
-
-uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
-                                    const uint16_t *ref, int ref_stride,
-                                    uint32_t *sse, int *sum);
-
-uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
-                                      const uint16_t *ref, int ref_stride,
-                                      uint32_t *sse, int *sum);
-
-static void highbd_variance_sse2(const uint16_t *src, int src_stride,
-                                 const uint16_t *ref, int ref_stride,
-                                 int w, int h, uint32_t *sse, int *sum,
-                                 high_variance_fn_t var_fn, int block_size) {
-  int i, j;
-
-  *sse = 0;
-  *sum = 0;
-
-  for (i = 0; i < h; i += block_size) {
-    for (j = 0; j < w; j += block_size) {
-      unsigned int sse0;
-      int sum0;
-      var_fn(src + src_stride * i + j, src_stride,
-             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
-      *sse += sse0;
-      *sum += sum0;
-    }
-  }
-}
-
-static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
-                                    const uint16_t *ref, int ref_stride,
-                                    int w, int h, uint32_t *sse, int *sum,
-                                    high_variance_fn_t var_fn, int block_size) {
-  int i, j;
-  uint64_t sse_long = 0;
-  int64_t sum_long = 0;
-
-  for (i = 0; i < h; i += block_size) {
-    for (j = 0; j < w; j += block_size) {
-      unsigned int sse0;
-      int sum0;
-      var_fn(src + src_stride * i + j, src_stride,
-             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
-      sse_long += sse0;
-      sum_long += sum0;
-    }
-  }
-  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
-                                    const uint16_t *ref, int ref_stride,
-                                    int w, int h, uint32_t *sse, int *sum,
-                                    high_variance_fn_t var_fn, int block_size) {
-  int i, j;
-  uint64_t sse_long = 0;
-  int64_t sum_long = 0;
-
-  for (i = 0; i < h; i += block_size) {
-    for (j = 0; j < w; j += block_size) {
-      unsigned int sse0;
-      int sum0;
-      var_fn(src + src_stride * i + j, src_stride,
-             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
-      sse_long += sse0;
-      sum_long += sum0;
-    }
-  }
-  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
-  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
-}
-
-
-#define HIGH_GET_VAR(S) \
-void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
-                                       const uint8_t *ref8, int ref_stride, \
-                                       uint32_t *sse, int *sum) { \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
-                                     sse, sum); \
-} \
-\
-void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
-                                          const uint8_t *ref8, int ref_stride, \
-                                          uint32_t *sse, int *sum) { \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
-                                     sse, sum); \
-  *sum = ROUND_POWER_OF_TWO(*sum, 2); \
-  *sse = ROUND_POWER_OF_TWO(*sse, 4); \
-} \
-\
-void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
-                                          const uint8_t *ref8, int ref_stride, \
-                                          uint32_t *sse, int *sum) { \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
-                                     sse, sum); \
-  *sum = ROUND_POWER_OF_TWO(*sum, 4); \
-  *sse = ROUND_POWER_OF_TWO(*sse, 8); \
-}
-
-HIGH_GET_VAR(16);
-HIGH_GET_VAR(8);
-
-#undef HIGH_GET_VAR
-
-#define VAR_FN(w, h, block_size, shift) \
-uint32_t vp9_highbd_variance##w##x##h##_sse2( \
-    const uint8_t *src8, int src_stride, \
-    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
-  int sum; \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
-                       vp9_highbd_calc##block_size##x##block_size##var_sse2, \
-                       block_size); \
-  return *sse - (((int64_t)sum * sum) >> shift); \
-} \
-\
-uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \
-    const uint8_t *src8, int src_stride, \
-    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
-  int sum; \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  highbd_10_variance_sse2( \
-      src, src_stride, ref, ref_stride, w, h, sse, &sum, \
-      vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
-  return *sse - (((int64_t)sum * sum) >> shift); \
-} \
-\
-uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \
-    const uint8_t *src8, int src_stride, \
-    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
-  int sum; \
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
-  highbd_12_variance_sse2( \
-      src, src_stride, ref, ref_stride, w, h, sse, &sum, \
-      vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
-  return *sse - (((int64_t)sum * sum) >> shift); \
-}
-
-VAR_FN(64, 64, 16, 12);
-VAR_FN(64, 32, 16, 11);
-VAR_FN(32, 64, 16, 11);
-VAR_FN(32, 32, 16, 10);
-VAR_FN(32, 16, 16, 9);
-VAR_FN(16, 32, 16, 9);
-VAR_FN(16, 16, 16, 8);
-VAR_FN(16, 8, 8, 7);
-VAR_FN(8, 16, 8, 7);
-VAR_FN(8, 8, 8, 6);
-
-#undef VAR_FN
-
-unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride,
-                                      const uint8_t *ref8, int ref_stride,
-                                      unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
-                       sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
-  return *sse;
-}
-
-unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
-                                         const uint8_t *ref8, int ref_stride,
-                                         unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
-                          sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
-  return *sse;
-}
-
-unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
-                                         const uint8_t *ref8, int ref_stride,
-                                         unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
-                          sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
-  return *sse;
-}
-
-unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride,
-                                    const uint8_t *ref8, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
-                       sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
-  return *sse;
-}
-
-unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
-                                       const uint8_t *ref8, int ref_stride,
-                                       unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
-                          sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
-  return *sse;
-}
-
-unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
-                                       const uint8_t *ref8, int ref_stride,
-                                       unsigned int *sse) {
-  int sum;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
-                          sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
-  return *sse;
-}
-
 #define DECL(w, opt) \
 int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
                                                ptrdiff_t src_stride, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index 00abd3c4962..71fdfd71624 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -11,6 +11,7 @@
 #include <emmintrin.h>
 #include <xmmintrin.h>
 
+#include "./vp9_rtcd.h"
 #include "vpx/vpx_integer.h"
 
 void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
index 06b8b034a5e..292cf34d1a2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@ SECTION_RODATA
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 bilin_filter_m_ssse3: times  8 db 16,  0
-                      times  8 db 15,  1
                       times  8 db 14,  2
-                      times  8 db 13,  3
                       times  8 db 12,  4
-                      times  8 db 11,  5
                       times  8 db 10,  6
-                      times  8 db  9,  7
                       times 16 db  8
-                      times  8 db  7,  9
                       times  8 db  6, 10
-                      times  8 db  5, 11
                       times  8 db  4, 12
-                      times  8 db  3, 13
                       times  8 db  2, 14
-                      times  8 db  1, 15
 
 SECTION .text
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index a441cadaf70..b1c79752076 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -9,42 +9,28 @@
  */
 
 #include <immintrin.h>  // AVX2
+
+#include "./vp9_rtcd.h"
 #include "vpx_ports/mem.h"
 #include "vp9/encoder/vp9_variance.h"
 
 DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
 };
 
 #define FILTER_SRC(filter) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
index ea09b959e12..8cd071de5e2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
@@ -7,23 +7,12 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 
 #include "vp9/encoder/vp9_variance.h"
 #include "vpx_ports/mem.h"
 
-typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
-                             const uint8_t *ref, int ref_stride,
-                             unsigned int *sse, int *sum);
-
-void vp9_get16x16var_avx2(const uint8_t *src, int src_stride,
-                          const uint8_t *ref, int ref_stride,
-                          unsigned int *sse, int *sum);
-
-void vp9_get32x32var_avx2(const uint8_t *src, int src_stride,
-                          const uint8_t *ref, int ref_stride,
-                          unsigned int *sse, int *sum);
-
 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
                                              int x_offset, int y_offset,
                                              const uint8_t *dst, int dst_stride,
@@ -41,81 +30,6 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
                                                  int height,
                                                  unsigned int *sseptr);
 
-static void variance_avx2(const uint8_t *src, int src_stride,
-                          const uint8_t *ref, int  ref_stride,
-                          int w, int h, unsigned int *sse, int *sum,
-                          get_var_avx2 var_fn, int block_size) {
-  int i, j;
-
-  *sse = 0;
-  *sum = 0;
-
-  for (i = 0; i < h; i += 16) {
-    for (j = 0; j < w; j += block_size) {
-      unsigned int sse0;
-      int sum0;
-      var_fn(&src[src_stride * i + j], src_stride,
-             &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);
-      *sse += sse0;
-      *sum += sum0;
-    }
-  }
-}
-
-
-unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
-                sse, &sum, vp9_get16x16var_avx2, 16);
-  return *sse - (((unsigned int)sum * sum) >> 8);
-}
-
-unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride,
-                               const uint8_t *ref, int ref_stride,
-                               unsigned int *sse) {
-  int sum;
-  vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse;
-}
-
-unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_avx2(src, src_stride, ref, ref_stride, 32, 16,
-                sse, &sum, vp9_get32x32var_avx2, 32);
-  return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_avx2(src, src_stride, ref, ref_stride, 32, 32,
-                sse, &sum, vp9_get32x32var_avx2, 32);
-  return *sse - (((int64_t)sum * sum) >> 10);
-}
-
-unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_avx2(src, src_stride, ref, ref_stride, 64, 64,
-                sse, &sum, vp9_get32x32var_avx2, 32);
-  return *sse - (((int64_t)sum * sum) >> 12);
-}
-
-unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_avx2(src, src_stride, ref, ref_stride, 64, 32,
-                sse, &sum, vp9_get32x32var_avx2, 32);
-  return *sse - (((int64_t)sum * sum) >> 11);
-}
-
 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
                                               int src_stride,
                                               int x_offset,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
index 8490bbbdc2e..961efe34ee6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
@@ -10,310 +10,12 @@
 
 #include <emmintrin.h>  // SSE2
 
+#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 
 #include "vp9/encoder/vp9_variance.h"
 #include "vpx_ports/mem.h"
 
-typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
-                                       const unsigned char *ref, int ref_stride,
-                                       unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
-  __m128i vsum = _mm_setzero_si128();
-  int i;
-
-  for (i = 0; i < 32; ++i) {
-    const __m128i v = _mm_loadu_si128((const __m128i *)src);
-    vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
-    src += 8;
-  }
-
-  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
-  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
-  return  _mm_cvtsi128_si32(vsum);
-}
-
-#define READ64(p, stride, i) \
-  _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
-      _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
-
-unsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride,
-                                const uint8_t *ref, int ref_stride,
-                                unsigned int *sse, int *sum) {
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
-  const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
-  const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
-  const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
-  const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-  const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
-  // sum
-  __m128i vsum = _mm_add_epi16(diff0, diff1);
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
-  *sum = (int16_t)_mm_extract_epi16(vsum, 0);
-
-  // sse
-  vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0),
-                       _mm_madd_epi16(diff1, diff1));
-  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
-  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
-  *sse = _mm_cvtsi128_si32(vsum);
-
-  return 0;
-}
-
-unsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride,
-                                const uint8_t *ref, int ref_stride,
-                                unsigned int *sse, int *sum) {
-  const __m128i zero = _mm_setzero_si128();
-  __m128i vsum = _mm_setzero_si128();
-  __m128i vsse = _mm_setzero_si128();
-  int i;
-
-  for (i = 0; i < 8; i += 2) {
-    const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
-        (const __m128i *)(src + i * src_stride)), zero);
-    const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
-        (const __m128i *)(ref + i * ref_stride)), zero);
-    const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-
-    const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
-        (const __m128i *)(src + (i + 1) * src_stride)), zero);
-    const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
-        (const __m128i *)(ref + (i + 1) * ref_stride)), zero);
-    const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
-    vsum = _mm_add_epi16(vsum, diff0);
-    vsum = _mm_add_epi16(vsum, diff1);
-    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
-    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
-  }
-
-  // sum
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
-  *sum = (int16_t)_mm_extract_epi16(vsum, 0);
-
-  // sse
-  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
-  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
-  *sse = _mm_cvtsi128_si32(vsse);
-
-  return 0;
-}
-
-unsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride,
-                                  const uint8_t *ref, int ref_stride,
-                                  unsigned int *sse, int *sum) {
-  const __m128i zero = _mm_setzero_si128();
-  __m128i vsum = _mm_setzero_si128();
-  __m128i vsse = _mm_setzero_si128();
-  int i;
-
-  for (i = 0; i < 16; ++i) {
-    const __m128i s = _mm_loadu_si128((const __m128i *)src);
-    const __m128i r = _mm_loadu_si128((const __m128i *)ref);
-
-    const __m128i src0 = _mm_unpacklo_epi8(s, zero);
-    const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
-    const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-
-    const __m128i src1 = _mm_unpackhi_epi8(s, zero);
-    const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
-    const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
-    vsum = _mm_add_epi16(vsum, diff0);
-    vsum = _mm_add_epi16(vsum, diff1);
-    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
-    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
-
-    src += src_stride;
-    ref += ref_stride;
-  }
-
-  // sum
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
-  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
-  *sum = (int16_t)_mm_extract_epi16(vsum, 0) +
-             (int16_t)_mm_extract_epi16(vsum, 1);
-
-  // sse
-  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
-  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
-  *sse = _mm_cvtsi128_si32(vsse);
-
-  return 0;
-}
-
-
-static void variance_sse2(const unsigned char *src, int src_stride,
-                          const unsigned char *ref, int ref_stride,
-                          int w, int h, unsigned int *sse, int *sum,
-                          variance_fn_t var_fn, int block_size) {
-  int i, j;
-
-  *sse = 0;
-  *sum = 0;
-
-  for (i = 0; i < h; i += block_size) {
-    for (j = 0; j < w; j += block_size) {
-      unsigned int sse0;
-      int sum0;
-      var_fn(src + src_stride * i + j, src_stride,
-             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
-      *sse += sse0;
-      *sum += sum0;
-    }
-  }
-}
-
-unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
-                                  const unsigned char *ref, int ref_stride,
-                                  unsigned int *sse) {
-  int sum;
-  vp9_get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 4);
-}
-
-unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
-                                  const uint8_t *ref, int ref_stride,
-                                  unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
-                sse, &sum, vp9_get4x4var_sse2, 4);
-  return *sse - (((unsigned int)sum * sum) >> 5);
-}
-
-unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
-                                  const uint8_t *ref, int ref_stride,
-                                  unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
-                sse, &sum, vp9_get4x4var_sse2, 4);
-  return *sse - (((unsigned int)sum * sum) >> 5);
-}
-
-unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride,
-                                  const unsigned char *ref, int ref_stride,
-                                  unsigned int *sse) {
-  int sum;
-  vp9_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 6);
-}
-
-unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride,
-                                   const unsigned char *ref, int ref_stride,
-                                   unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
-                sse, &sum, vp9_get8x8var_sse2, 8);
-  return *sse - (((unsigned int)sum * sum) >> 7);
-}
-
-unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride,
-                                   const unsigned char *ref, int ref_stride,
-                                   unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
-                sse, &sum, vp9_get8x8var_sse2, 8);
-  return *sse - (((unsigned int)sum * sum) >> 7);
-}
-
-unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride,
-                                    const unsigned char *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 8);
-}
-
-unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 10);
-}
-
-unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 12);
-}
-
-unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 11);
-}
-
-unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride,
-                                    const uint8_t *ref, int ref_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
-                sse, &sum, vp9_get16x16var_sse2, 16);
-  return *sse - (((int64_t)sum * sum) >> 11);
-}
-
-unsigned int vp9_mse8x8_sse2(const uint8_t *src, int src_stride,
-                             const uint8_t *ref, int ref_stride,
-                             unsigned int *sse) {
-  vp9_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
-  return *sse;
-}
-
-unsigned int vp9_mse8x16_sse2(const uint8_t *src, int src_stride,
-                              const uint8_t *ref, int ref_stride,
-                              unsigned int *sse) {
-  vp9_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
-  return *sse;
-}
-
-unsigned int vp9_mse16x8_sse2(const uint8_t *src, int src_stride,
-                              const uint8_t *ref, int ref_stride,
-                              unsigned int *sse) {
-  vp9_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
-  return *sse;
-}
-
-unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,
-                               const uint8_t *ref, int ref_stride,
-                               unsigned int *sse) {
-  vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
-  return *sse;
-}
-
 // The 2 unused parameters are place holders for PIC enabled build.
 #define DECL(w, opt) \
 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
index c9326eeea69..6f091eefb63 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
@@ -69,6 +69,7 @@ VP9_COMMON_SRCS-yes += common/vp9_common_data.h
 VP9_COMMON_SRCS-yes += common/vp9_scan.c
 VP9_COMMON_SRCS-yes += common/vp9_scan.h
 
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
 VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c
@@ -131,14 +132,29 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_mblpf_vert_loopfilter_ds
 
 # common (msa)
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_macros_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_vert_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_horiz_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_vert_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h
+
+ifeq ($(CONFIG_VP9_POSTPROC),yes)
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
+endif
 
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
@@ -197,8 +213,9 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c
 # TODO(johannkoenig): re-enable when chromium build is fixed
 # # https://code.google.com/p/chromium/issues/detail?id=443839
 #VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon.c
 endif  # HAVE_NEON
 endif  # HAVE_NEON_ASM
 
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon.c
+
 $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
index cba15e693e9..9462be9faf1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
@@ -176,15 +176,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
   RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
 
+  if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS)
+    ERROR("ss_number_layers * ts_number_layers is out of range");
   if (cfg->ts_number_layers > 1) {
-    unsigned int i;
-    for (i = 1; i < cfg->ts_number_layers; ++i)
-      if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1])
+    unsigned int sl, tl;
+    for (sl = 1; sl < cfg->ss_number_layers; ++sl) {
+      for (tl = 1; tl < cfg->ts_number_layers; ++tl) {
+        const int layer =
+            LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers);
+        if (cfg->layer_target_bitrate[layer] <
+            cfg->layer_target_bitrate[layer - 1])
         ERROR("ts_target_bitrate entries are not increasing");
+      }
+    }
 
     RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1);
-    for (i = cfg->ts_number_layers - 2; i > 0; --i)
-      if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i])
+    for (tl = cfg->ts_number_layers - 2; tl > 0; --tl)
+      if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl])
         ERROR("ts_rate_decimator factors are not powers of 2");
   }
 
@@ -360,6 +368,7 @@ static vpx_codec_err_t set_encoder_config(
   const vpx_codec_enc_cfg_t *cfg,
   const struct vp9_extracfg *extra_cfg) {
   const int is_vbr = cfg->rc_end_usage == VPX_VBR;
+  int sl, tl;
   oxcf->profile = cfg->g_profile;
   oxcf->max_threads = (int)cfg->g_threads;
   oxcf->width   = cfg->g_w;
@@ -460,35 +469,33 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->frame_periodic_boost =  extra_cfg->frame_periodic_boost;
 
   oxcf->ss_number_layers = cfg->ss_number_layers;
+  oxcf->ts_number_layers = cfg->ts_number_layers;
+  oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)
+      cfg->temporal_layering_mode;
 
-  if (oxcf->ss_number_layers > 1) {
-    int i;
-    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
-      oxcf->ss_target_bitrate[i] =  1000 * cfg->ss_target_bitrate[i];
+  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
 #if CONFIG_SPATIAL_SVC
-      oxcf->ss_enable_auto_arf[i] =  cfg->ss_enable_auto_alt_ref[i];
+    oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
 #endif
+    for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+      oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] =
+          1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl];
     }
-  } else if (oxcf->ss_number_layers == 1) {
+  }
+  if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) {
     oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
 #if CONFIG_SPATIAL_SVC
     oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref;
 #endif
   }
-
-  oxcf->ts_number_layers = cfg->ts_number_layers;
-
   if (oxcf->ts_number_layers > 1) {
-    int i;
-    for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) {
-      oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i];
-      oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i];
+    for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) {
+      oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ?
+          cfg->ts_rate_decimator[tl] : 1;
     }
   } else if (oxcf->ts_number_layers == 1) {
-    oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
     oxcf->ts_rate_decimator[0] = 1;
   }
-
   /*
   printf("Current VP9 Settings: \n");
   printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
@@ -902,11 +909,12 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
                                                    unsigned int lib_flags) {
   vpx_codec_frame_flags_t flags = lib_flags << 16;
 
-  if (lib_flags & FRAMEFLAGS_KEY
-#if CONFIG_SPATIAL_SVC
-      || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame)
-#endif
-        )
+  if (lib_flags & FRAMEFLAGS_KEY ||
+      (cpi->use_svc &&
+          cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+              cpi->svc.number_temporal_layers +
+              cpi->svc.temporal_layer_id].is_key_frame)
+     )
     flags |= VPX_FRAME_IS_KEY;
 
   if (cpi->droppable)
@@ -1022,16 +1030,15 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
         vpx_codec_cx_pkt_t pkt;
 
 #if CONFIG_SPATIAL_SVC
-        if (is_two_pass_svc(cpi))
-          cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;
+        if (cpi->use_svc)
+          cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+              cpi->svc.number_temporal_layers].layer_size += size;
 #endif
 
         // Pack invisible frames with the next visible frame
-        if (!cpi->common.show_frame
-#if CONFIG_SPATIAL_SVC
-            || (is_two_pass_svc(cpi) &&
-                cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
-#endif
+        if (!cpi->common.show_frame ||
+            (cpi->use_svc &&
+             cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
             ) {
           if (ctx->pending_cx_data == 0)
             ctx->pending_cx_data = cx_data;
@@ -1089,24 +1096,27 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
         pkt.data.frame.partition_id = -1;
 
         if(ctx->output_cx_pkt_cb.output_cx_pkt)
-          ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv);
+          ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
+                                              ctx->output_cx_pkt_cb.user_priv);
         else
           vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
 
         cx_data += size;
         cx_data_sz -= size;
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
 #if CONFIG_SPATIAL_SVC
-        if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) {
+        if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) {
           vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
-          int i;
+          int sl;
           vp9_zero(pkt_sizes);
           vp9_zero(pkt_psnr);
           pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
           pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR;
-          for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
-            LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
-            pkt_sizes.data.layer_sizes[i] = lc->layer_size;
-            pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt;
+          for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+            LAYER_CONTEXT *lc =
+                &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+            pkt_sizes.data.layer_sizes[sl] = lc->layer_size;
+            pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt;
             lc->layer_size = 0;
           }
 
@@ -1115,6 +1125,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
           vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
         }
 #endif
+#endif
+        if (is_one_pass_cbr_svc(cpi) &&
+            (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+          // Encoded all spatial layers; exit loop.
+          break;
+        }
       }
     }
   }
@@ -1292,16 +1308,20 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
 static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
   int data = va_arg(args, int);
   const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
+  // Both one-pass and two-pass RC are supported now.
+  // User setting this has to make sure of the following.
+  // In two-pass setting: either (but not both)
+  //      cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
+  // In one-pass setting:
+  //      either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
 
   vp9_set_svc(ctx->cpi, data);
-  // CBR or two pass mode for SVC with both temporal and spatial layers
-  // not yet supported.
+
   if (data == 1 &&
-      (cfg->rc_end_usage == VPX_CBR ||
-       cfg->g_pass == VPX_RC_FIRST_PASS ||
+      (cfg->g_pass == VPX_RC_FIRST_PASS ||
        cfg->g_pass == VPX_RC_LAST_PASS) &&
-      cfg->ss_number_layers > 1 &&
-      cfg->ts_number_layers > 1) {
+       cfg->ss_number_layers > 1 &&
+       cfg->ts_number_layers > 1) {
     return VPX_CODEC_INVALID_PARAM;
   }
   return VPX_CODEC_OK;
@@ -1313,9 +1333,7 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
   VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
   SVC *const svc = &cpi->svc;
 
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   svc->spatial_layer_id = data->spatial_layer_id;
-#endif
   svc->temporal_layer_id = data->temporal_layer_id;
   // Checks on valid layer_id input.
   if (svc->temporal_layer_id < 0 ||
@@ -1335,9 +1353,7 @@ static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx,
   VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
   SVC *const svc = &cpi->svc;
 
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   data->spatial_layer_id = svc->spatial_layer_id;
-#endif
   data->temporal_layer_id = svc->temporal_layer_id;
 
   return VPX_CODEC_OK;
@@ -1347,15 +1363,21 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
                                                va_list args) {
   VP9_COMP *const cpi = ctx->cpi;
   vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *);
-  int i;
-
-  for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
-
-    lc->max_q = params->max_quantizers[i];
-    lc->min_q = params->min_quantizers[i];
-    lc->scaling_factor_num = params->scaling_factor_num[i];
-    lc->scaling_factor_den = params->scaling_factor_den[i];
+  int sl, tl;
+
+  // Number of temporal layers and number of spatial layers have to be set
+  // properly before calling this control function.
+  for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+    for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) {
+      const int layer =
+          LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers);
+      LAYER_CONTEXT *lc =
+          &cpi->svc.layer_context[layer];
+      lc->max_q = params->max_quantizers[sl];
+      lc->min_q = params->min_quantizers[sl];
+      lc->scaling_factor_num = params->scaling_factor_num[sl];
+      lc->scaling_factor_den = params->scaling_factor_den[sl];
+    }
   }
 
   return VPX_CODEC_OK;
@@ -1416,10 +1438,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP9E_SET_AQ_MODE,                  ctrl_set_aq_mode},
   {VP9E_SET_FRAME_PERIODIC_BOOST,     ctrl_set_frame_periodic_boost},
   {VP9E_SET_SVC,                      ctrl_set_svc},
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   {VP9E_SET_SVC_PARAMETERS,           ctrl_set_svc_parameters},
   {VP9E_REGISTER_CX_CALLBACK,         ctrl_register_cx_callback},
-#endif
   {VP9E_SET_SVC_LAYER_ID,             ctrl_set_svc_layer_id},
   {VP9E_SET_TUNE_CONTENT,             ctrl_set_tune_content},
   {VP9E_SET_COLOR_SPACE,              ctrl_set_color_space},
@@ -1429,9 +1449,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
   {VP8E_GET_LAST_QUANTIZER_64,        ctrl_get_quantizer64},
   {VP9_GET_REFERENCE,                 ctrl_get_reference},
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   {VP9E_GET_SVC_LAYER_ID,             ctrl_get_svc_layer_id},
-#endif
   {VP9E_GET_ACTIVEMAP,                ctrl_get_active_map},
 
   { -1, NULL},
@@ -1495,6 +1513,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
       {0},                    // ts_rate_decimator
       0,                      // ts_periodicity
       {0},                    // ts_layer_id
+      {0},                  // layer_taget_bitrate
+      0                     // temporal_layering_mode
     }
   },
 };
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
index ff76204d822..4080d64c170 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
@@ -55,6 +55,7 @@ struct vpx_codec_alg_priv {
   int                     invert_tile_order;
   int                     last_show_frame;  // Index of last output frame.
   int                     byte_alignment;
+  int                     skip_loop_filter;
 
   // Frame parallel related.
   int                     frame_parallel_decode;  // frame-based threading.
@@ -285,6 +286,7 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) {
 
     cm->new_fb_idx = INVALID_IDX;
     cm->byte_alignment = ctx->byte_alignment;
+    cm->skip_loop_filter = ctx->skip_loop_filter;
 
     if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
       pool->get_fb_cb = ctx->get_ext_fb_cb;
@@ -937,7 +939,8 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
           frame_worker_data->pbi->common.buffer_pool->frame_bufs;
       if (frame_worker_data->pbi->common.frame_to_show == NULL)
         return VPX_CODEC_ERROR;
-      *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
+      if (ctx->last_show_frame >= 0)
+        *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
       return VPX_CODEC_OK;
     } else {
       return VPX_CODEC_ERROR;
@@ -1058,6 +1061,19 @@ static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx,
   return VPX_CODEC_OK;
 }
 
+static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx,
+                                                 va_list args) {
+  ctx->skip_loop_filter = va_arg(args, int);
+
+  if (ctx->frame_workers) {
+    VP9Worker *const worker = ctx->frame_workers;
+    FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+    frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter;
+  }
+
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,            ctrl_copy_reference},
 
@@ -1071,6 +1087,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   {VP9_INVERT_TILE_DECODE_ORDER,  ctrl_set_invert_tile_order},
   {VPXD_SET_DECRYPTOR,            ctrl_set_decryptor},
   {VP9_SET_BYTE_ALIGNMENT,        ctrl_set_byte_alignment},
+  {VP9_SET_SKIP_LOOP_FILTER,      ctrl_set_skip_loop_filter},
 
   // Getters
   {VP8D_GET_LAST_REF_UPDATES,     ctrl_get_last_ref_updates},
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
index e585aa14725..58bb7d5d648 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
@@ -10,6 +10,8 @@
 #ifndef VP9_VP9_IFACE_COMMON_H_
 #define VP9_VP9_IFACE_COMMON_H_
 
+#include "vpx_ports/mem.h"
+
 static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG  *yv12,
                             void *user_priv) {
   /** vpx_img_wrap() doesn't allow specifying independent strides for
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
index 7359b2de05d..e78c111f085 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
@@ -102,13 +102,11 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
 
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
 endif
@@ -134,14 +132,14 @@ VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_impl_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2_impl.h
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_impl.h
 
 ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
 endif
 
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2_impl.h
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
@@ -154,4 +152,14 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
 VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_subtract_neon.c
 VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
 
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct32x32_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_subtract_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_temporal_filter_msa.c
+
 VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec b/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
index 3ce1499b77d..c694ebae128 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
@@ -1,10 +1,8 @@
 text vpx_codec_dec_init_ver
 text vpx_codec_decode
 text vpx_codec_get_frame
-text vpx_codec_get_mem_map
 text vpx_codec_get_stream_info
 text vpx_codec_peek_stream_info
 text vpx_codec_register_put_frame_cb
 text vpx_codec_register_put_slice_cb
 text vpx_codec_set_frame_buffer_functions
-text vpx_codec_set_mem_map
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
index e711cf909ba..9844ace54dc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
@@ -302,31 +302,79 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
                            vpx_codec_enc_cfg_t *const enc_cfg) {
   int i;
   const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
+  int sl, tl, spatial_layer_target;
+
+  if (svc_ctx->temporal_layering_mode != 0) {
+    if (si->bitrates[0] != 0) {
+      enc_cfg->rc_target_bitrate = 0;
+      for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+        enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] = 0;
+        for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
+          enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers]
+              += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl];
+          enc_cfg->layer_target_bitrate[sl*svc_ctx->temporal_layers + tl]
+              = si->bitrates[sl * svc_ctx->temporal_layers + tl];
+        }
+      }
+    } else {
+      float total = 0;
+      float alloc_ratio[VPX_MAX_LAYERS] = {0};
+
+      for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+        if (si->svc_params.scaling_factor_den[sl] > 0) {
+          alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] *
+              1.0 / si->svc_params.scaling_factor_den[sl]);
+          total += alloc_ratio[sl];
+        }
+      }
 
-  if (si->bitrates[0] != 0) {
-    enc_cfg->rc_target_bitrate = 0;
-    for (i = 0; i < svc_ctx->spatial_layers; ++i) {
-      enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
-      enc_cfg->rc_target_bitrate += si->bitrates[i];
+      for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+        enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+            (unsigned int)(enc_cfg->rc_target_bitrate *
+                alloc_ratio[sl] / total);
+        if (svc_ctx->temporal_layering_mode == 3) {
+          enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+              spatial_layer_target >> 1;
+          enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+              (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+          enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =
+              spatial_layer_target;
+        } else if (svc_ctx->temporal_layering_mode == 2) {
+          enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+              spatial_layer_target * 2 / 3;
+          enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+              spatial_layer_target;
+        } else {
+          // User should explicitly assign bitrates in this case.
+          assert(0);
+        }
+      }
     }
   } else {
-    float total = 0;
-    float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
+    if (si->bitrates[0] != 0) {
+      enc_cfg->rc_target_bitrate = 0;
+      for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+        enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
+        enc_cfg->rc_target_bitrate += si->bitrates[i];
+      }
+    } else {
+      float total = 0;
+      float alloc_ratio[VPX_MAX_LAYERS] = {0};
 
-    for (i = 0; i < svc_ctx->spatial_layers; ++i) {
-      if (si->svc_params.scaling_factor_den[i] > 0) {
-        alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
-                                 si->svc_params.scaling_factor_den[i]);
+      for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+        if (si->svc_params.scaling_factor_den[i] > 0) {
+          alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
+                                   si->svc_params.scaling_factor_den[i]);
 
-        alloc_ratio[i] *= alloc_ratio[i];
-        total += alloc_ratio[i];
+          alloc_ratio[i] *= alloc_ratio[i];
+          total += alloc_ratio[i];
+        }
       }
-    }
-
-    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
-      if (total > 0) {
-        enc_cfg->ss_target_bitrate[i] = (unsigned int)
-            (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+      for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
+        if (total > 0) {
+          enc_cfg->layer_target_bitrate[i] = (unsigned int)
+              (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+        }
       }
     }
   }
@@ -365,6 +413,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
     return VPX_CODEC_INVALID_PARAM;
   }
 
+  // Note: temporal_layering_mode only applies to one-pass CBR
+  // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode;
+  if (svc_ctx->temporal_layering_mode == 3) {
+    svc_ctx->temporal_layers = 3;
+  } else if (svc_ctx->temporal_layering_mode == 2) {
+    svc_ctx->temporal_layers = 2;
+  }
+
   for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
     si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
     si->svc_params.min_quantizers[i] = 0;
@@ -387,6 +443,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS)
     svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS;
 
+  if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) {
+      svc_log(svc_ctx, SVC_LOG_ERROR,
+          "spatial layers * temporal layers exceeds the maximum number of "
+          "allowed layers of %d\n",
+          svc_ctx->spatial_layers * svc_ctx->temporal_layers,
+          (int) VPX_MAX_LAYERS);
+      return VPX_CODEC_INVALID_PARAM;
+  }
   assign_layer_bitrates(svc_ctx, enc_cfg);
 
 #if CONFIG_SPATIAL_SVC
@@ -403,10 +467,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
     }
   }
 
-  // modify encoder configuration
+  if (svc_ctx->threads)
+    enc_cfg->g_threads = svc_ctx->threads;
+
+  // Modify encoder configuration
   enc_cfg->ss_number_layers = svc_ctx->spatial_layers;
   enc_cfg->ts_number_layers = svc_ctx->temporal_layers;
 
+  if (enc_cfg->rc_end_usage == VPX_CBR) {
+    enc_cfg->rc_resize_allowed = 0;
+    enc_cfg->rc_min_quantizer = 2;
+    enc_cfg->rc_max_quantizer = 63;
+    enc_cfg->rc_undershoot_pct = 50;
+    enc_cfg->rc_overshoot_pct = 50;
+    enc_cfg->rc_buf_initial_sz = 20;
+    enc_cfg->rc_buf_optimal_sz = 600;
+    enc_cfg->rc_buf_sz = 1000;
+  }
+
   if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
     enc_cfg->g_error_resilient = 1;
 
@@ -451,6 +529,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx,
   iter = NULL;
   while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
     switch (cx_pkt->kind) {
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
 #if CONFIG_SPATIAL_SVC
       case VPX_CODEC_SPATIAL_SVC_LAYER_PSNR: {
         int i;
@@ -489,6 +568,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx,
         break;
       }
 #endif
+#endif
       default: {
         break;
       }
@@ -554,7 +634,7 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
             mse[1], mse[2], mse[3]);
 
     bytes_total += si->bytes_sum[i];
-    // clear sums for next time
+    // Clear sums for next time.
     si->bytes_sum[i] = 0;
     for (j = 0; j < COMPONENTS; ++j) {
       si->psnr_sum[i][j] = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
index cf791bdeb56..a09651cc991 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
@@ -33,10 +33,13 @@ typedef struct {
   // public interface to svc_command options
   int spatial_layers;               // number of spatial layers
   int temporal_layers;               // number of temporal layers
+  int temporal_layering_mode;
   SVC_LOG_LEVEL log_level;  // amount of information to display
   int log_print;  // when set, printf log messages instead of returning the
                   // message with svc_get_message
-
+  int output_rc_stat;  // for outputting rc stats
+  int speed;  // speed setting for codec
+  int threads;
   // private storage for vpx_svc_encode
   void *internal;
 } SvcContext;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
index 0e8adc134c5..19bc4bdcce1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
@@ -327,6 +327,8 @@ enum vp8e_enc_control_id {
 
   /*!\brief Codec control function to set encoder screen content mode.
    *
+   * 0: off, 1: On, 2: On with more aggressive rate control.
+   *
    * Supported in codecs: VP8
    */
   VP8E_SET_SCREEN_CONTENT_MODE,
@@ -448,7 +450,6 @@ enum vp8e_enc_control_id {
    */
   VP9E_SET_SVC,
 
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   /*!\brief Codec control function to set parameters for SVC.
    * \note Parameters contain min_q, max_q, scaling factor for each of the
    *       SVC layers.
@@ -456,7 +457,6 @@ enum vp8e_enc_control_id {
    * Supported in codecs: VP9
    */
   VP9E_SET_SVC_PARAMETERS,
-#endif
 
   /*!\brief Codec control function to set svc layer for spatial and temporal.
    * \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial
@@ -476,7 +476,6 @@ enum vp8e_enc_control_id {
    */
   VP9E_SET_TUNE_CONTENT,
 
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
   /*!\brief Codec control function to get svc layer ID.
    * \note The layer ID returned is for the data packet from the registered
    *       callback function.
@@ -492,7 +491,6 @@ enum vp8e_enc_control_id {
    * Supported in codecs: VP9
    */
   VP9E_REGISTER_CX_CALLBACK,
-#endif
 
   /*!\brief Codec control function to set color space info.
    * \note Valid ranges: 0..7, default is "UNKNOWN".
@@ -509,6 +507,17 @@ enum vp8e_enc_control_id {
    */
   VP9E_SET_COLOR_SPACE,
 
+  /*!\brief Codec control function to set temporal layering mode.
+   * \note Valid ranges: 0..3, default is "0" (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING).
+   *                     0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING
+   *                     1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS
+   *                     2 = VP9E_TEMPORAL_LAYERING_MODE_0101
+   *                     3 = VP9E_TEMPORAL_LAYERING_MODE_0212
+   *
+   * Supported in codecs: VP9
+   */
+  VP9E_SET_TEMPORAL_LAYERING_MODE,
+
   /*!\brief Codec control function to get an Active map back from the encoder.
    *
    * Supported in codecs: VP9
@@ -527,6 +536,32 @@ typedef enum vpx_scaling_mode_1d {
   VP8E_ONETWO      = 3
 } VPX_SCALING_MODE;
 
+/*!\brief Temporal layering mode enum for VP9 SVC.
+ *
+ * This set of macros define the different temporal layering modes.
+ * Supported codecs: VP9 (in SVC mode)
+ *
+ */
+typedef enum vp9e_temporal_layering_mode {
+  /*!\brief No temporal layering.
+   * Used when only spatial layering is used.
+   */
+  VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING   = 0,
+
+  /*!\brief Bypass mode.
+   * Used when application needs to control temporal layering.
+   * This will only work when the number of spatial layers equals 1.
+   */
+  VP9E_TEMPORAL_LAYERING_MODE_BYPASS       = 1,
+
+  /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers.
+   */
+  VP9E_TEMPORAL_LAYERING_MODE_0101         = 2,
+
+  /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers.
+   */
+  VP9E_TEMPORAL_LAYERING_MODE_0212         = 3
+} VP9E_TEMPORAL_LAYERING_MODE;
 
 /*!\brief  vpx region of interest map
  *
@@ -602,7 +637,6 @@ typedef enum {
   VP8_TUNE_SSIM
 } vp8e_tuning;
 
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
 /*!\brief  vp9 svc layer parameters
  *
  * This defines the spatial and temporal layer id numbers for svc encoding.
@@ -614,18 +648,6 @@ typedef struct vpx_svc_layer_id {
   int spatial_layer_id;       /**< Spatial layer id number. */
   int temporal_layer_id;      /**< Temporal layer id number. */
 } vpx_svc_layer_id_t;
-#else
-/*!\brief  vp9 svc layer parameters
- *
- * This defines the temporal layer id numbers for svc encoding.
- * This is used with the #VP9E_SET_SVC_LAYER_ID control to set the
- * temporal layer id for the current frame.
- *
- */
-typedef struct vpx_svc_layer_id {
-  int temporal_layer_id;      /**< Temporal layer id number. */
-} vpx_svc_layer_id_t;
-#endif
 
 /*!\brief VP8 encoder control function parameter type
  *
@@ -649,10 +671,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP,          vpx_active_map_t *)
 VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE,          vpx_scaling_mode_t *)
 
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC,                int)
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS,     void *)
 VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK,   void *)
-#endif
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID,       vpx_svc_layer_id_t *)
 
 VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED,            int)
@@ -673,9 +693,7 @@ VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS,  int)
 
 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER,     int *)
 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64,  int *)
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
 VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID,  vpx_svc_layer_id_t *)
-#endif
 
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
index 83898bf8496..bc9cb1a62fc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
@@ -106,6 +106,13 @@ enum vp8_dec_control_id {
    */
   VP9_INVERT_TILE_DECODE_ORDER,
 
+  /** control function to set the skip loop filter flag. Valid values are
+   * integers. The decoder will skip the loop filter when its value is set to
+   * nonzero. If the loop filter is skipped the decoder may accumulate decode
+   * artifacts. The default value is 0.
+   */
+  VP9_SET_SKIP_LOOP_FILTER,
+
   VP8_DECODER_CTRL_ID_MAX
 };
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
index a1ad3c5312c..ccdef040c3a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
@@ -31,17 +31,17 @@ API_DOC_SRCS-yes += vpx_encoder.h
 API_DOC_SRCS-yes += vpx_frame_buffer.h
 API_DOC_SRCS-yes += vpx_image.h
 
-API_SRCS-yes                += src/vpx_decoder.c
-API_SRCS-yes                += vpx_decoder.h
-API_SRCS-yes                += src/vpx_encoder.c
-API_SRCS-yes                += vpx_encoder.h
-API_SRCS-yes                += internal/vpx_codec_internal.h
-API_SRCS-yes                += internal/vpx_psnr.h
-API_SRCS-yes                += src/vpx_codec.c
-API_SRCS-yes                += src/vpx_image.c
-API_SRCS-yes                += src/vpx_psnr.c
-API_SRCS-yes                += vpx_codec.h
-API_SRCS-yes                += vpx_codec.mk
-API_SRCS-yes                += vpx_frame_buffer.h
-API_SRCS-yes                += vpx_image.h
-API_SRCS-$(BUILD_LIBVPX)    += vpx_integer.h
+API_SRCS-yes += src/vpx_decoder.c
+API_SRCS-yes += vpx_decoder.h
+API_SRCS-yes += src/vpx_encoder.c
+API_SRCS-yes += vpx_encoder.h
+API_SRCS-yes += internal/vpx_codec_internal.h
+API_SRCS-yes += internal/vpx_psnr.h
+API_SRCS-yes += src/vpx_codec.c
+API_SRCS-yes += src/vpx_image.c
+API_SRCS-yes += src/vpx_psnr.c
+API_SRCS-yes += vpx_codec.h
+API_SRCS-yes += vpx_codec.mk
+API_SRCS-yes += vpx_frame_buffer.h
+API_SRCS-yes += vpx_image.h
+API_SRCS-yes += vpx_integer.h
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
index bf75584d589..2b17f98a231 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
@@ -42,8 +42,11 @@ extern "C" {
   /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */
 #define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY
 
-  /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */
-#define MAX_LAYERS      VPX_TS_MAX_LAYERS
+/*! Temporal+Spatial Scalability: Maximum number of coding layers */
+#define VPX_MAX_LAYERS  12  // 3 temporal + 4 spatial layers are allowed.
+
+/*!\deprecated Use #VPX_MAX_LAYERS instead. */
+#define MAX_LAYERS    VPX_MAX_LAYERS  // 3 temporal + 4 spatial layers allowed.
 
 /*! Spatial Scalability: Maximum number of coding layers */
 #define VPX_SS_MAX_LAYERS       5
@@ -59,7 +62,7 @@ extern "C" {
    * types, removing or reassigning enums, adding/removing/rearranging
    * fields to structures
    */
-#define VPX_ENCODER_ABI_VERSION (4 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
 
 
   /*! \brief Encoder capabilities bitfield
@@ -163,7 +166,7 @@ extern "C" {
     VPX_CODEC_PSNR_PKT,        /**< PSNR statistics for this frame */
     // Spatial SVC is still experimental and may be removed before the next ABI
     // bump.
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
     VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/
     VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/
 #endif
@@ -205,7 +208,7 @@ extern "C" {
       vpx_fixed_buf_t raw;     /**< data for arbitrary packets */
       // Spatial SVC is still experimental and may be removed before the next
       // ABI bump.
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
       size_t layer_sizes[VPX_SS_MAX_LAYERS];
       struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS];
 #endif
@@ -729,6 +732,22 @@ extern "C" {
      * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1).
     */
     unsigned int           ts_layer_id[VPX_TS_MAX_PERIODICITY];
+
+    /*!\brief Target bitrate for each spatial/temporal layer.
+     *
+     * These values specify the target coding bitrate to be used for each
+     * spatial/temporal layer.
+     *
+     */
+    unsigned int           layer_target_bitrate[VPX_MAX_LAYERS];
+
+    /*!\brief Temporal layering mode indicating which temporal layering scheme to use.
+     *
+     * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the
+     * temporal layering mode to use.
+     *
+     */
+    int                    temporal_layering_mode;
   } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
 
   /*!\brief  vp9 svc extra configure parameters
@@ -737,10 +756,11 @@ extern "C" {
    *
    */
   typedef struct vpx_svc_parameters {
-    int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */
-    int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */
-    int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/
-    int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/
+    int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */
+    int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */
+    int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */
+    int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */
+    int temporal_layering_mode; /**< Temporal layering mode */
   } vpx_svc_extra_cfg_t;
 
 
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm
new file mode 100644
index 00000000000..f7f9e14b0a7
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm
@@ -0,0 +1,358 @@
+;
+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vpx_variance16x16_media|
+    EXPORT  |vpx_variance8x8_media|
+    EXPORT  |vpx_mse16x16_media|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char *src_ptr
+; r1    int source_stride
+; r2    unsigned char *ref_ptr
+; r3    int  recon_stride
+; stack unsigned int *sse
+|vpx_variance16x16_media| PROC
+
+    stmfd   sp!, {r4-r12, lr}
+
+    pld     [r0, r1, lsl #0]
+    pld     [r2, r3, lsl #0]
+
+    mov     r8, #0              ; initialize sum = 0
+    mov     r11, #0             ; initialize sse = 0
+    mov     r12, #16            ; set loop counter to 16 (=block height)
+
+loop16x16
+    ; 1st 4 pixels
+    ldr     r4, [r0, #0]        ; load 4 src pixels
+    ldr     r5, [r2, #0]        ; load 4 ref pixels
+
+    mov     lr, #0              ; constant zero
+
+    usub8   r6, r4, r5          ; calculate difference
+    pld     [r0, r1, lsl #1]
+    sel     r7, r6, lr          ; select bytes with positive difference
+    usub8   r9, r5, r4          ; calculate difference with reversed operands
+    pld     [r2, r3, lsl #1]
+    sel     r6, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r4, r7, lr          ; calculate sum of positive differences
+    usad8   r5, r6, lr          ; calculate sum of negative differences
+    orr     r6, r6, r7          ; differences of all 4 pixels
+    ; calculate total sum
+    adds    r8, r8, r4          ; add positive differences to sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r5, r6              ; byte (two pixels) to halfwords
+    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
+    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
+
+    ; 2nd 4 pixels
+    ldr     r4, [r0, #4]        ; load 4 src pixels
+    ldr     r5, [r2, #4]        ; load 4 ref pixels
+    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
+
+    usub8   r6, r4, r5          ; calculate difference
+    sel     r7, r6, lr          ; select bytes with positive difference
+    usub8   r9, r5, r4          ; calculate difference with reversed operands
+    sel     r6, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r4, r7, lr          ; calculate sum of positive differences
+    usad8   r5, r6, lr          ; calculate sum of negative differences
+    orr     r6, r6, r7          ; differences of all 4 pixels
+
+    ; calculate total sum
+    add     r8, r8, r4          ; add positive differences to sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r5, r6              ; byte (two pixels) to halfwords
+    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
+    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
+
+    ; 3rd 4 pixels
+    ldr     r4, [r0, #8]        ; load 4 src pixels
+    ldr     r5, [r2, #8]        ; load 4 ref pixels
+    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
+
+    usub8   r6, r4, r5          ; calculate difference
+    sel     r7, r6, lr          ; select bytes with positive difference
+    usub8   r9, r5, r4          ; calculate difference with reversed operands
+    sel     r6, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r4, r7, lr          ; calculate sum of positive differences
+    usad8   r5, r6, lr          ; calculate sum of negative differences
+    orr     r6, r6, r7          ; differences of all 4 pixels
+
+    ; calculate total sum
+    add     r8, r8, r4          ; add positive differences to sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r5, r6              ; byte (two pixels) to halfwords
+    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
+    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
+
+    ; 4th 4 pixels
+    ldr     r4, [r0, #12]       ; load 4 src pixels
+    ldr     r5, [r2, #12]       ; load 4 ref pixels
+    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
+
+    usub8   r6, r4, r5          ; calculate difference
+    add     r0, r0, r1          ; set src_ptr to next row
+    sel     r7, r6, lr          ; select bytes with positive difference
+    usub8   r9, r5, r4          ; calculate difference with reversed operands
+    add     r2, r2, r3          ; set dst_ptr to next row
+    sel     r6, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r4, r7, lr          ; calculate sum of positive differences
+    usad8   r5, r6, lr          ; calculate sum of negative differences
+    orr     r6, r6, r7          ; differences of all 4 pixels
+
+    ; calculate total sum
+    add     r8, r8, r4          ; add positive differences to sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r5, r6              ; byte (two pixels) to halfwords
+    uxtb16  r10, r6, ror #8     ; another two pixels to halfwords
+    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
+    smlad   r11, r10, r10, r11  ; dual signed multiply, add and accumulate (2)
+
+
+    subs    r12, r12, #1
+
+    bne     loop16x16
+
+    ; return stuff
+    ldr     r6, [sp, #40]       ; get address of sse
+    mul     r0, r8, r8          ; sum * sum
+    str     r11, [r6]           ; store sse
+    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+
+    ldmfd   sp!, {r4-r12, pc}
+
+    ENDP
+
+; r0    unsigned char *src_ptr
+; r1    int source_stride
+; r2    unsigned char *ref_ptr
+; r3    int  recon_stride
+; stack unsigned int *sse
+|vpx_variance8x8_media| PROC
+
+    push    {r4-r10, lr}
+
+    pld     [r0, r1, lsl #0]
+    pld     [r2, r3, lsl #0]
+
+    mov     r12, #8             ; set loop counter to 8 (=block height)
+    mov     r4, #0              ; initialize sum = 0
+    mov     r5, #0              ; initialize sse = 0
+
+loop8x8
+    ; 1st 4 pixels
+    ldr     r6, [r0, #0x0]      ; load 4 src pixels
+    ldr     r7, [r2, #0x0]      ; load 4 ref pixels
+
+    mov     lr, #0              ; constant zero
+
+    usub8   r8, r6, r7          ; calculate difference
+    pld     [r0, r1, lsl #1]
+    sel     r10, r8, lr         ; select bytes with positive difference
+    usub8   r9, r7, r6          ; calculate difference with reversed operands
+    pld     [r2, r3, lsl #1]
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r6, r10, lr         ; calculate sum of positive differences
+    usad8   r7, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r10         ; differences of all 4 pixels
+    ; calculate total sum
+    add    r4, r4, r6           ; add positive differences to sum
+    sub    r4, r4, r7           ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r7, r8              ; byte (two pixels) to halfwords
+    uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
+    smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
+
+    ; 2nd 4 pixels
+    ldr     r6, [r0, #0x4]      ; load 4 src pixels
+    ldr     r7, [r2, #0x4]      ; load 4 ref pixels
+    smlad   r5, r10, r10, r5    ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r6, r7          ; calculate difference
+    add     r0, r0, r1          ; set src_ptr to next row
+    sel     r10, r8, lr         ; select bytes with positive difference
+    usub8   r9, r7, r6          ; calculate difference with reversed operands
+    add     r2, r2, r3          ; set dst_ptr to next row
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r6, r10, lr         ; calculate sum of positive differences
+    usad8   r7, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r10         ; differences of all 4 pixels
+
+    ; calculate total sum
+    add     r4, r4, r6          ; add positive differences to sum
+    sub     r4, r4, r7          ; subtract negative differences from sum
+
+    ; calculate sse
+    uxtb16  r7, r8              ; byte (two pixels) to halfwords
+    uxtb16  r10, r8, ror #8     ; another two pixels to halfwords
+    smlad   r5, r7, r7, r5      ; dual signed multiply, add and accumulate (1)
+    subs    r12, r12, #1        ; next row
+    smlad   r5, r10, r10, r5    ; dual signed multiply, add and accumulate (2)
+
+    bne     loop8x8
+
+    ; return stuff
+    ldr     r8, [sp, #32]       ; get address of sse
+    mul     r1, r4, r4          ; sum * sum
+    str     r5, [r8]            ; store sse
+    sub     r0, r5, r1, ASR #6  ; return (sse - ((sum * sum) >> 6))
+
+    pop     {r4-r10, pc}
+
+    ENDP
+
+; r0    unsigned char *src_ptr
+; r1    int source_stride
+; r2    unsigned char *ref_ptr
+; r3    int  recon_stride
+; stack unsigned int *sse
+;
+;note: Based on vpx_variance16x16_media. In this function, sum is never used.
+;      So, we can remove this part of calculation.
+
+|vpx_mse16x16_media| PROC
+
+    push    {r4-r9, lr}
+
+    pld     [r0, r1, lsl #0]
+    pld     [r2, r3, lsl #0]
+
+    mov     r12, #16            ; set loop counter to 16 (=block height)
+    mov     r4, #0              ; initialize sse = 0
+
+loopmse
+    ; 1st 4 pixels
+    ldr     r5, [r0, #0x0]      ; load 4 src pixels
+    ldr     r6, [r2, #0x0]      ; load 4 ref pixels
+
+    mov     lr, #0              ; constant zero
+
+    usub8   r8, r5, r6          ; calculate difference
+    pld     [r0, r1, lsl #1]
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    pld     [r2, r3, lsl #1]
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    ldr     r5, [r0, #0x4]      ; load 4 src pixels
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 2nd 4 pixels
+    ldr     r6, [r2, #0x4]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+    ldr     r5, [r0, #0x8]      ; load 4 src pixels
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 3rd 4 pixels
+    ldr     r6, [r2, #0x8]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    ldr     r5, [r0, #0xc]      ; load 4 src pixels
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 4th 4 pixels
+    ldr     r6, [r2, #0xc]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    add     r0, r0, r1          ; set src_ptr to next row
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    add     r2, r2, r3          ; set dst_ptr to next row
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    subs    r12, r12, #1        ; next row
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    bne     loopmse
+
+    ; return stuff
+    ldr     r1, [sp, #28]       ; get address of sse
+    mov     r0, r4              ; return sse
+    str     r4, [r1]            ; store sse
+
+    pop     {r4-r9, pc}
+
+    ENDP
+
+    END
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c
new file mode 100644
index 00000000000..ede6e7bbb03
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c
@@ -0,0 +1,418 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
+  const int32x4_t a = vpaddlq_s16(v_16x8);
+  const int64x2_t b = vpaddlq_s32(a);
+  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
+                               vreinterpret_s32_s64(vget_high_s64(b)));
+  return vget_lane_s32(c, 0);
+}
+
+static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
+  const int64x2_t b = vpaddlq_s32(v_32x4);
+  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
+                               vreinterpret_s32_s64(vget_high_s64(b)));
+  return vget_lane_s32(c, 0);
+}
+
+// w * h must be less than 2048 or local variable v_sum may overflow.
+static void variance_neon_w8(const uint8_t *a, int a_stride,
+                             const uint8_t *b, int b_stride,
+                             int w, int h, uint32_t *sse, int *sum) {
+  int i, j;
+  int16x8_t v_sum = vdupq_n_s16(0);
+  int32x4_t v_sse_lo = vdupq_n_s32(0);
+  int32x4_t v_sse_hi = vdupq_n_s32(0);
+
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; j += 8) {
+      const uint8x8_t v_a = vld1_u8(&a[j]);
+      const uint8x8_t v_b = vld1_u8(&b[j]);
+      const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
+      const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
+      v_sum = vaddq_s16(v_sum, sv_diff);
+      v_sse_lo = vmlal_s16(v_sse_lo,
+                           vget_low_s16(sv_diff),
+                           vget_low_s16(sv_diff));
+      v_sse_hi = vmlal_s16(v_sse_hi,
+                           vget_high_s16(sv_diff),
+                           vget_high_s16(sv_diff));
+    }
+    a += a_stride;
+    b += b_stride;
+  }
+
+  *sum = horizontal_add_s16x8(v_sum);
+  *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
+}
+
+void vpx_get8x8var_neon(const uint8_t *a, int a_stride,
+                        const uint8_t *b, int b_stride,
+                        unsigned int *sse, int *sum) {
+  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
+}
+
+void vpx_get16x16var_neon(const uint8_t *a, int a_stride,
+                          const uint8_t *b, int b_stride,
+                          unsigned int *sse, int *sum) {
+  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
+}
+
+unsigned int vpx_variance8x8_neon(const uint8_t *a, int a_stride,
+                                  const uint8_t *b, int b_stride,
+                                  unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 6);  //  >> 6 = / 8 * 8
+}
+
+unsigned int vpx_variance16x16_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 8);  //  >> 8 = / 16 * 16
+}
+
+unsigned int vpx_variance32x32_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
+  return *sse - (((int64_t)sum * sum) >> 10);  // >> 10 = / 32 * 32
+}
+
+unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum1, sum2;
+  uint32_t sse1, sse2;
+  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
+  variance_neon_w8(a + (32 * a_stride), a_stride,
+                   b + (32 * b_stride), b_stride, 32, 32,
+                   &sse2, &sum2);
+  *sse = sse1 + sse2;
+  sum1 += sum2;
+  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
+}
+
+unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum1, sum2;
+  uint32_t sse1, sse2;
+  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
+  variance_neon_w8(a + (16 * a_stride), a_stride,
+                   b + (16 * b_stride), b_stride, 64, 16,
+                   &sse2, &sum2);
+  *sse = sse1 + sse2;
+  sum1 += sum2;
+  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
+}
+
+unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride,
+                                    const uint8_t *b, int b_stride,
+                                    unsigned int *sse) {
+  int sum1, sum2;
+  uint32_t sse1, sse2;
+
+  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
+  variance_neon_w8(a + (16 * a_stride), a_stride,
+                   b + (16 * b_stride), b_stride, 64, 16,
+                   &sse2, &sum2);
+  sse1 += sse2;
+  sum1 += sum2;
+
+  variance_neon_w8(a + (16 * 2 * a_stride), a_stride,
+                   b + (16 * 2 * b_stride), b_stride,
+                   64, 16, &sse2, &sum2);
+  sse1 += sse2;
+  sum1 += sum2;
+
+  variance_neon_w8(a + (16 * 3 * a_stride), a_stride,
+                   b + (16 * 3 * b_stride), b_stride,
+                   64, 16, &sse2, &sum2);
+  *sse = sse1 + sse2;
+  sum1 += sum2;
+  return *sse - (((int64_t)sum1 * sum1) >> 12);  // >> 12 = / 64 * 64
+}
+
+unsigned int vpx_variance16x8_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint8x16_t q0u8, q1u8, q2u8, q3u8;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 4; i++) {
+        q0u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        q1u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        __builtin_prefetch(src_ptr);
+
+        q2u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        q3u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        __builtin_prefetch(ref_ptr);
+
+        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+        q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+        q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vpx_variance8x16_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    uint8x8_t d0u8, d2u8, d4u8, d6u8;
+    int16x4_t d22s16, d23s16, d24s16, d25s16;
+    uint32x2_t d0u32, d10u32;
+    int64x1_t d0s64, d1s64;
+    uint16x8_t q11u16, q12u16;
+    int32x4_t q8s32, q9s32, q10s32;
+    int64x2_t q0s64, q1s64, q5s64;
+
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 8; i++) {
+        d0u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        d2u8 = vld1_u8(src_ptr);
+        src_ptr += source_stride;
+        __builtin_prefetch(src_ptr);
+
+        d4u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        d6u8 = vld1_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        __builtin_prefetch(ref_ptr);
+
+        q11u16 = vsubl_u8(d0u8, d4u8);
+        q12u16 = vsubl_u8(d2u8, d6u8);
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+        q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+        q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+    }
+
+    q10s32 = vaddq_s32(q10s32, q9s32);
+    q0s64 = vpaddlq_s32(q8s32);
+    q1s64 = vpaddlq_s32(q10s32);
+
+    d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+    d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+                      vreinterpret_s32_s64(d0s64));
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+    d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+    d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+    return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vpx_mse16x16_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride,
+        unsigned int *sse) {
+    int i;
+    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+    int64x1_t d0s64;
+    uint8x16_t q0u8, q1u8, q2u8, q3u8;
+    int32x4_t q7s32, q8s32, q9s32, q10s32;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int64x2_t q1s64;
+
+    q7s32 = vdupq_n_s32(0);
+    q8s32 = vdupq_n_s32(0);
+    q9s32 = vdupq_n_s32(0);
+    q10s32 = vdupq_n_s32(0);
+
+    for (i = 0; i < 8; i++) {  // mse16x16_neon_loop
+        q0u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        q1u8 = vld1q_u8(src_ptr);
+        src_ptr += source_stride;
+        q2u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+        q3u8 = vld1q_u8(ref_ptr);
+        ref_ptr += recon_stride;
+
+        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+        q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
+        q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
+
+        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+        q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
+        q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
+
+        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+    }
+
+    q7s32 = vaddq_s32(q7s32, q8s32);
+    q9s32 = vaddq_s32(q9s32, q10s32);
+    q10s32 = vaddq_s32(q7s32, q9s32);
+
+    q1s64 = vpaddlq_s32(q10s32);
+    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
+    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+}
+
+unsigned int vpx_get4x4sse_cs_neon(
+        const unsigned char *src_ptr,
+        int source_stride,
+        const unsigned char *ref_ptr,
+        int recon_stride) {
+    int16x4_t d22s16, d24s16, d26s16, d28s16;
+    int64x1_t d0s64;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+    int32x4_t q7s32, q8s32, q9s32, q10s32;
+    uint16x8_t q11u16, q12u16, q13u16, q14u16;
+    int64x2_t q1s64;
+
+    d0u8 = vld1_u8(src_ptr);
+    src_ptr += source_stride;
+    d4u8 = vld1_u8(ref_ptr);
+    ref_ptr += recon_stride;
+    d1u8 = vld1_u8(src_ptr);
+    src_ptr += source_stride;
+    d5u8 = vld1_u8(ref_ptr);
+    ref_ptr += recon_stride;
+    d2u8 = vld1_u8(src_ptr);
+    src_ptr += source_stride;
+    d6u8 = vld1_u8(ref_ptr);
+    ref_ptr += recon_stride;
+    d3u8 = vld1_u8(src_ptr);
+    src_ptr += source_stride;
+    d7u8 = vld1_u8(ref_ptr);
+    ref_ptr += recon_stride;
+
+    q11u16 = vsubl_u8(d0u8, d4u8);
+    q12u16 = vsubl_u8(d1u8, d5u8);
+    q13u16 = vsubl_u8(d2u8, d6u8);
+    q14u16 = vsubl_u8(d3u8, d7u8);
+
+    d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
+    d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
+    d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
+    d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
+
+    q7s32 = vmull_s16(d22s16, d22s16);
+    q8s32 = vmull_s16(d24s16, d24s16);
+    q9s32 = vmull_s16(d26s16, d26s16);
+    q10s32 = vmull_s16(d28s16, d28s16);
+
+    q7s32 = vaddq_s32(q7s32, q8s32);
+    q9s32 = vaddq_s32(q9s32, q10s32);
+    q9s32 = vaddq_s32(q7s32, q9s32);
+
+    q1s64 = vpaddlq_s32(q9s32);
+    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
index 9db312fbe05..c0c3ff99645 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
@@ -14,13 +14,7 @@
 #include "./vpx_dsp_rtcd.h"
 
 #include "vpx/vpx_integer.h"
-
-#if CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-// Temporary ...
-#define ROUND_POWER_OF_TWO(value, n) \
-     (((value) + (1 << ((n) - 1))) >> (n))
+#include "vpx_ports/mem.h"
 
 /* Sum the difference between every corresponding element of the buffers. */
 static INLINE unsigned int sad(const uint8_t *a, int a_stride,
@@ -39,6 +33,7 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride,
   return sad;
 }
 
+// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up.
 /* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
  * The function averages every corresponding element of the buffers and stores
  * the value in a third buffer, comp_pred.
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c
new file mode 100644
index 00000000000..084dd7b7ead
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c
@@ -0,0 +1,306 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_integer.h"
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *a, int  a_stride,
+                                const unsigned char *b, int  b_stride) {
+  int distortion = 0;
+  int r, c;
+
+  for (r = 0; r < 4; r++) {
+    for (c = 0; c < 4; c++) {
+      int diff = a[c] - b[c];
+      distortion += diff * diff;
+    }
+
+    a += a_stride;
+    b += b_stride;
+  }
+
+  return distortion;
+}
+
+unsigned int vpx_get_mb_ss_c(const int16_t *a) {
+  unsigned int i, sum = 0;
+
+  for (i = 0; i < 256; ++i) {
+    sum += a[i] * a[i];
+  }
+
+  return sum;
+}
+
+static void variance(const uint8_t *a, int  a_stride,
+                     const uint8_t *b, int  b_stride,
+                     int  w, int  h, unsigned int *sse, int *sum) {
+  int i, j;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+#define VAR(W, H) \
+unsigned int vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
+                                       const uint8_t *b, int b_stride, \
+                                       unsigned int *sse) { \
+  int sum; \
+  variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
+                             const uint8_t *b, int b_stride, \
+                             unsigned int *sse, int *sum) { \
+  variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+}
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+unsigned int vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
+                                  const uint8_t *b, int b_stride, \
+                                  unsigned int *sse) { \
+  int sum; \
+  variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse; \
+}
+
+VAR(64, 64)
+VAR(64, 32)
+VAR(32, 64)
+VAR(32, 32)
+VAR(32, 16)
+VAR(16, 32)
+VAR(16, 16)
+VAR(16, 8)
+VAR(8, 16)
+VAR(8, 8)
+VAR(8, 4)
+VAR(4, 8)
+VAR(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+                         int height, const uint8_t *ref, int ref_stride) {
+  int i, j;
+
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_variance64(const uint8_t *a8, int  a_stride,
+                              const uint8_t *b8, int  b_stride,
+                              int w, int h, uint64_t *sse, uint64_t *sum) {
+  int i, j;
+
+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = a[j] - b[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+    a += a_stride;
+    b += b_stride;
+  }
+}
+
+static void highbd_8_variance(const uint8_t *a8, int  a_stride,
+                              const uint8_t *b8, int  b_stride,
+                              int w, int h, unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  uint64_t sum_long = 0;
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+  *sse = (unsigned int)sse_long;
+  *sum = (int)sum_long;
+}
+
+static void highbd_10_variance(const uint8_t *a8, int  a_stride,
+                               const uint8_t *b8, int  b_stride,
+                               int w, int h, unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  uint64_t sum_long = 0;
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
+  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
+}
+
+static void highbd_12_variance(const uint8_t *a8, int  a_stride,
+                               const uint8_t *b8, int  b_stride,
+                               int w, int h, unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  uint64_t sum_long = 0;
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
+  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
+}
+
+#define HIGHBD_VAR(W, H) \
+unsigned int vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, \
+                                                int a_stride, \
+                                                const uint8_t *b, \
+                                                int b_stride, \
+                                                unsigned int *sse) { \
+  int sum; \
+  highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
+                                                 int a_stride, \
+                                                 const uint8_t *b, \
+                                                 int b_stride, \
+                                                 unsigned int *sse) { \
+  int sum; \
+  highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
+                                                 int a_stride, \
+                                                 const uint8_t *b, \
+                                                 int b_stride, \
+                                                 unsigned int *sse) { \
+  int sum; \
+  highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+#define HIGHBD_GET_VAR(S) \
+void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+                                    const uint8_t *ref, int ref_stride, \
+                                    unsigned int *sse, int *sum) { \
+  highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+} \
+\
+void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+                                       const uint8_t *ref, int ref_stride, \
+                                       unsigned int *sse, int *sum) { \
+  highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+} \
+\
+void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+                                       const uint8_t *ref, int ref_stride, \
+                                       unsigned int *sse, int *sum) { \
+  highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+}
+
+#define HIGHBD_MSE(W, H) \
+unsigned int vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, \
+                                         int src_stride, \
+                                         const uint8_t *ref, \
+                                         int ref_stride, \
+                                         unsigned int *sse) { \
+  int sum; \
+  highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+  return *sse; \
+} \
+\
+unsigned int vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
+                                            int src_stride, \
+                                            const uint8_t *ref, \
+                                            int ref_stride, \
+                                            unsigned int *sse) { \
+  int sum; \
+  highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+  return *sse; \
+} \
+\
+unsigned int vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
+                                            int src_stride, \
+                                            const uint8_t *ref, \
+                                            int ref_stride, \
+                                            unsigned int *sse) { \
+  int sum; \
+  highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+  return *sse; \
+}
+
+HIGHBD_GET_VAR(8)
+HIGHBD_GET_VAR(16)
+
+HIGHBD_MSE(16, 16)
+HIGHBD_MSE(16, 8)
+HIGHBD_MSE(8, 16)
+HIGHBD_MSE(8, 8)
+
+HIGHBD_VAR(64, 64)
+HIGHBD_VAR(64, 32)
+HIGHBD_VAR(32, 64)
+HIGHBD_VAR(32, 32)
+HIGHBD_VAR(32, 16)
+HIGHBD_VAR(16, 32)
+HIGHBD_VAR(16, 16)
+HIGHBD_VAR(16, 8)
+HIGHBD_VAR(8, 16)
+HIGHBD_VAR(8, 8)
+HIGHBD_VAR(8, 4)
+HIGHBD_VAR(4, 8)
+HIGHBD_VAR(4, 4)
+
+void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
+                              int width, int height, const uint8_t *ref8,
+                              int ref_stride) {
+  int i, j;
+  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
index 606515d2c19..f23534adc15 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
@@ -17,6 +17,7 @@ DSP_SRCS-$(HAVE_MEDIA)  += arm/sad_media$(ASM)
 DSP_SRCS-$(HAVE_NEON)   += arm/sad4d_neon.c
 DSP_SRCS-$(HAVE_NEON)   += arm/sad_neon.c
 
+
 DSP_SRCS-$(HAVE_MMX)    += x86/sad_mmx.asm
 DSP_SRCS-$(HAVE_SSE2)   += x86/sad4d_sse2.asm
 DSP_SRCS-$(HAVE_SSE2)   += x86/sad_sse2.asm
@@ -29,9 +30,28 @@ DSP_SRCS-$(HAVE_AVX2)   += x86/sad_avx2.c
 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
 DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
+
 endif  # CONFIG_VP9_HIGHBITDEPTH
 endif  # CONFIG_ENCODERS
 
+ifneq ($(filter yes,$(CONFIG_ENCODERS) $(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
+DSP_SRCS-yes            += variance.c
+
+DSP_SRCS-$(HAVE_MEDIA)  += arm/variance_media$(ASM)
+DSP_SRCS-$(HAVE_NEON)   += arm/variance_neon.c
+
+DSP_SRCS-$(HAVE_MMX)    += x86/variance_mmx.c
+DSP_SRCS-$(HAVE_MMX)    += x86/variance_impl_mmx.asm
+DSP_SRCS-$(HAVE_SSE2)   += x86/variance_sse2.c
+DSP_SRCS-$(HAVE_AVX2)   += x86/variance_avx2.c
+DSP_SRCS-$(HAVE_AVX2)   += x86/variance_impl_avx2.c
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_variance_sse2.c
+DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_variance_impl_sse2.asm
+endif  # CONFIG_VP9_HIGHBITDEPTH
+endif  # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
+
 DSP_SRCS-no += $(DSP_SRCS_REMOVE-yes)
 
 DSP_SRCS-yes += vpx_dsp_rtcd.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index ebec9ec0660..55271cf9c14 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -392,4 +392,212 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 }  # CONFIG_VP9_HIGHBITDEPTH
 }  # CONFIG_ENCODERS
 
+if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+
+add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance64x64 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance64x32 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance32x64 sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance32x32 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance32x16 sse2 avx2/;
+
+add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance16x32 sse2/;
+
+add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon/;
+
+add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance16x8 mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance8x16 mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance8x8 mmx sse2 media neon/;
+
+add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance8x4 sse2/;
+
+add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance4x8 sse2/;
+
+add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_variance4x4 mmx sse2/;
+
+
+add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  specialize qw/vpx_get16x16var sse2 avx2 neon/;
+
+add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  specialize qw/vpx_get8x8var mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon/;
+
+add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_mse16x8 sse2/;
+
+add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_mse8x16 sse2/;
+
+add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_mse8x8 sse2/;
+
+add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
+  specialize qw/vpx_get_mb_ss mmx sse2/;
+
+add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
+  specialize qw/vpx_get4x4sse_cs neon/;
+
+add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance64x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance64x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance32x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance32x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance32x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance16x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance16x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance8x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_variance8x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+  add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance64x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance64x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance32x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance32x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance32x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance16x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance16x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance8x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_variance8x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+  add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance64x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance64x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance32x64 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance32x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance32x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance16x32 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance16x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance8x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_variance8x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+  add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+  add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+  add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+  add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_mse16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_8_mse8x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_mse16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_10_mse8x8 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_mse16x16 sse2/;
+
+  add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  specialize qw/vpx_highbd_12_mse8x8 sse2/;
+
+  add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
+}  # CONFIG_VP9_HIGHBITDEPTH
+}  # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
+
 1;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm
index 821dd0660bc..923418a9921 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm
@@ -11,7 +11,7 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;unsigned int vp9_highbd_calc16x16var_sse2
+;unsigned int vpx_highbd_calc16x16var_sse2
 ;(
 ;    unsigned char   *  src_ptr,
 ;    int             source_stride,
@@ -20,8 +20,8 @@
 ;    unsigned int    *  SSE,
 ;    int             *  Sum
 ;)
-global sym(vp9_highbd_calc16x16var_sse2) PRIVATE
-sym(vp9_highbd_calc16x16var_sse2):
+global sym(vpx_highbd_calc16x16var_sse2) PRIVATE
+sym(vpx_highbd_calc16x16var_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -164,7 +164,7 @@ sym(vp9_highbd_calc16x16var_sse2):
     ret
 
 
-;unsigned int vp9_highbd_calc8x8var_sse2
+;unsigned int vpx_highbd_calc8x8var_sse2
 ;(
 ;    unsigned char   *  src_ptr,
 ;    int             source_stride,
@@ -173,8 +173,8 @@ sym(vp9_highbd_calc16x16var_sse2):
 ;    unsigned int    *  SSE,
 ;    int             *  Sum
 ;)
-global sym(vp9_highbd_calc8x8var_sse2) PRIVATE
-sym(vp9_highbd_calc8x8var_sse2):
+global sym(vpx_highbd_calc8x8var_sse2) PRIVATE
+sym(vpx_highbd_calc8x8var_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
new file mode 100644
index 00000000000..343c0478b9a
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
@@ -0,0 +1,245 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx_ports/mem.h"
+
+typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
+                                        const uint16_t *ref, int ref_stride,
+                                        uint32_t *sse, int *sum);
+
+uint32_t vpx_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
+                                    const uint16_t *ref, int ref_stride,
+                                    uint32_t *sse, int *sum);
+
+uint32_t vpx_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
+                                      const uint16_t *ref, int ref_stride,
+                                      uint32_t *sse, int *sum);
+
+static void highbd_8_variance_sse2(const uint16_t *src, int src_stride,
+                                   const uint16_t *ref, int ref_stride,
+                                   int w, int h, uint32_t *sse, int *sum,
+                                   high_variance_fn_t var_fn, int block_size) {
+  int i, j;
+
+  *sse = 0;
+  *sum = 0;
+
+  for (i = 0; i < h; i += block_size) {
+    for (j = 0; j < w; j += block_size) {
+      unsigned int sse0;
+      int sum0;
+      var_fn(src + src_stride * i + j, src_stride,
+             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+      *sse += sse0;
+      *sum += sum0;
+    }
+  }
+}
+
+static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
+                                    const uint16_t *ref, int ref_stride,
+                                    int w, int h, uint32_t *sse, int *sum,
+                                    high_variance_fn_t var_fn, int block_size) {
+  int i, j;
+  uint64_t sse_long = 0;
+  int64_t sum_long = 0;
+
+  for (i = 0; i < h; i += block_size) {
+    for (j = 0; j < w; j += block_size) {
+      unsigned int sse0;
+      int sum0;
+      var_fn(src + src_stride * i + j, src_stride,
+             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+      sse_long += sse0;
+      sum_long += sum0;
+    }
+  }
+  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
+  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+}
+
+static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
+                                    const uint16_t *ref, int ref_stride,
+                                    int w, int h, uint32_t *sse, int *sum,
+                                    high_variance_fn_t var_fn, int block_size) {
+  int i, j;
+  uint64_t sse_long = 0;
+  int64_t sum_long = 0;
+
+  for (i = 0; i < h; i += block_size) {
+    for (j = 0; j < w; j += block_size) {
+      unsigned int sse0;
+      int sum0;
+      var_fn(src + src_stride * i + j, src_stride,
+             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+      sse_long += sse0;
+      sum_long += sum0;
+    }
+  }
+  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
+  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+}
+
+
+#define HIGH_GET_VAR(S) \
+void vpx_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+                                       const uint8_t *ref8, int ref_stride, \
+                                       uint32_t *sse, int *sum) { \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+                                     sse, sum); \
+} \
+\
+void vpx_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+                                          const uint8_t *ref8, int ref_stride, \
+                                          uint32_t *sse, int *sum) { \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+                                     sse, sum); \
+  *sum = ROUND_POWER_OF_TWO(*sum, 2); \
+  *sse = ROUND_POWER_OF_TWO(*sse, 4); \
+} \
+\
+void vpx_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+                                          const uint8_t *ref8, int ref_stride, \
+                                          uint32_t *sse, int *sum) { \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+                                     sse, sum); \
+  *sum = ROUND_POWER_OF_TWO(*sum, 4); \
+  *sse = ROUND_POWER_OF_TWO(*sse, 8); \
+}
+
+HIGH_GET_VAR(16);
+HIGH_GET_VAR(8);
+
+#undef HIGH_GET_VAR
+
+#define VAR_FN(w, h, block_size, shift) \
+uint32_t vpx_highbd_8_variance##w##x##h##_sse2( \
+    const uint8_t *src8, int src_stride, \
+    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+  int sum; \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  highbd_8_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+                         vpx_highbd_calc##block_size##x##block_size##var_sse2, \
+                         block_size); \
+  return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \
+    const uint8_t *src8, int src_stride, \
+    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+  int sum; \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  highbd_10_variance_sse2( \
+      src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+      vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+  return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \
+    const uint8_t *src8, int src_stride, \
+    const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+  int sum; \
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+  highbd_12_variance_sse2( \
+      src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+      vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+  return *sse - (((int64_t)sum * sum) >> shift); \
+}
+
+VAR_FN(64, 64, 16, 12);
+VAR_FN(64, 32, 16, 11);
+VAR_FN(32, 64, 16, 11);
+VAR_FN(32, 32, 16, 10);
+VAR_FN(32, 16, 16, 9);
+VAR_FN(16, 32, 16, 9);
+VAR_FN(16, 16, 16, 8);
+VAR_FN(16, 8, 8, 7);
+VAR_FN(8, 16, 8, 7);
+VAR_FN(8, 8, 8, 6);
+
+#undef VAR_FN
+
+unsigned int vpx_highbd_8_mse16x16_sse2(const uint8_t *src8, int src_stride,
+                                      const uint8_t *ref8, int ref_stride,
+                                      unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+                         sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+  return *sse;
+}
+
+unsigned int vpx_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
+                                         const uint8_t *ref8, int ref_stride,
+                                         unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+                          sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+  return *sse;
+}
+
+unsigned int vpx_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
+                                         const uint8_t *ref8, int ref_stride,
+                                         unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+                          sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+  return *sse;
+}
+
+unsigned int vpx_highbd_8_mse8x8_sse2(const uint8_t *src8, int src_stride,
+                                    const uint8_t *ref8, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+                         sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+  return *sse;
+}
+
+unsigned int vpx_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
+                                       const uint8_t *ref8, int ref_stride,
+                                       unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+                          sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+  return *sse;
+}
+
+unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
+                                       const uint8_t *ref8, int ref_stride,
+                                       unsigned int *sse) {
+  int sum;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+                          sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+  return *sse;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
index 4128f2ac37c..793658f9ea9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
@@ -8,18 +8,19 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 #include <immintrin.h>  // AVX2
+#include "./vpx_dsp_rtcd.h"
 #include "vpx/vpx_integer.h"
 
-void vpx_sad32x32x4d_avx2(uint8_t *src,
+void vpx_sad32x32x4d_avx2(const uint8_t *src,
                           int src_stride,
-                          uint8_t *ref[4],
+                          const uint8_t *const ref[4],
                           int ref_stride,
                           uint32_t res[4]) {
   __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
   __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
   __m256i sum_mlow, sum_mhigh;
   int i;
-  uint8_t *ref0, *ref1, *ref2, *ref3;
+  const uint8_t *ref0, *ref1, *ref2, *ref3;
 
   ref0 = ref[0];
   ref1 = ref[1];
@@ -31,11 +32,11 @@ void vpx_sad32x32x4d_avx2(uint8_t *src,
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 32 ; i++) {
     // load src and all refs
-    src_reg = _mm256_loadu_si256((__m256i *)(src));
-    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
-    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
-    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
-    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+    src_reg = _mm256_loadu_si256((const __m256i *)src);
+    ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
+    ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
+    ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
+    ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
     // sum of the absolute differences between every ref-i to src
     ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
     ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
@@ -80,9 +81,9 @@ void vpx_sad32x32x4d_avx2(uint8_t *src,
   }
 }
 
-void vpx_sad64x64x4d_avx2(uint8_t *src,
+void vpx_sad64x64x4d_avx2(const uint8_t *src,
                           int src_stride,
-                          uint8_t *ref[4],
+                          const uint8_t *const ref[4],
                           int ref_stride,
                           uint32_t res[4]) {
   __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
@@ -91,7 +92,7 @@ void vpx_sad64x64x4d_avx2(uint8_t *src,
   __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
   __m256i sum_mlow, sum_mhigh;
   int i;
-  uint8_t *ref0, *ref1, *ref2, *ref3;
+  const uint8_t *ref0, *ref1, *ref2, *ref3;
 
   ref0 = ref[0];
   ref1 = ref[1];
@@ -103,16 +104,16 @@ void vpx_sad64x64x4d_avx2(uint8_t *src,
   sum_ref3 = _mm256_set1_epi16(0);
   for (i = 0; i < 64 ; i++) {
     // load 64 bytes from src and all refs
-    src_reg = _mm256_loadu_si256((__m256i *)(src));
-    srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
-    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
-    ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
-    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
-    ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32));
-    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
-    ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32));
-    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
-    ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32));
+    src_reg = _mm256_loadu_si256((const __m256i *)src);
+    srcnext_reg = _mm256_loadu_si256((const __m256i *)(src + 32));
+    ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
+    ref0next_reg = _mm256_loadu_si256((const __m256i *)(ref0 + 32));
+    ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
+    ref1next_reg = _mm256_loadu_si256((const __m256i *)(ref1 + 32));
+    ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
+    ref2next_reg = _mm256_loadu_si256((const __m256i *)(ref2 + 32));
+    ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
+    ref3next_reg = _mm256_loadu_si256((const __m256i *)(ref3 + 32));
     // sum of the absolute differences between every ref-i to src
     ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
     ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
index 78536a47218..ce9ad8f780c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 #include <immintrin.h>
+#include "./vpx_dsp_rtcd.h"
 #include "vpx_ports/mem.h"
 
 #define FSAD64_H(h) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
new file mode 100644
index 00000000000..82cef4af0af
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
@@ -0,0 +1,93 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_dsp_rtcd.h"
+
+typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
+                             const uint8_t *ref, int ref_stride,
+                             unsigned int *sse, int *sum);
+
+void vpx_get32x32var_avx2(const uint8_t *src, int src_stride,
+                          const uint8_t *ref, int ref_stride,
+                          unsigned int *sse, int *sum);
+
+static void variance_avx2(const uint8_t *src, int src_stride,
+                          const uint8_t *ref, int  ref_stride,
+                          int w, int h, unsigned int *sse, int *sum,
+                          get_var_avx2 var_fn, int block_size) {
+  int i, j;
+
+  *sse = 0;
+  *sum = 0;
+
+  for (i = 0; i < h; i += 16) {
+    for (j = 0; j < w; j += block_size) {
+      unsigned int sse0;
+      int sum0;
+      var_fn(&src[src_stride * i + j], src_stride,
+             &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);
+      *sse += sse0;
+      *sum += sum0;
+    }
+  }
+}
+
+
+unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
+                sse, &sum, vpx_get16x16var_avx2, 16);
+  return *sse - (((unsigned int)sum * sum) >> 8);
+}
+
+unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
+                               const uint8_t *ref, int ref_stride,
+                               unsigned int *sse) {
+  int sum;
+  vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
+  return *sse;
+}
+
+unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_avx2(src, src_stride, ref, ref_stride, 32, 16,
+                sse, &sum, vpx_get32x32var_avx2, 32);
+  return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_avx2(src, src_stride, ref, ref_stride, 32, 32,
+                sse, &sum, vpx_get32x32var_avx2, 32);
+  return *sse - (((int64_t)sum * sum) >> 10);
+}
+
+unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_avx2(src, src_stride, ref, ref_stride, 64, 64,
+                sse, &sum, vpx_get32x32var_avx2, 32);
+  return *sse - (((int64_t)sum * sum) >> 12);
+}
+
+unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_avx2(src, src_stride, ref, ref_stride, 64, 32,
+                sse, &sum, vpx_get32x32var_avx2, 32);
+  return *sse - (((int64_t)sum * sum) >> 11);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c
index f9923280a34..0e40959aa9d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c
@@ -10,7 +10,9 @@
 
 #include <immintrin.h>  // AVX2
 
-void vp9_get16x16var_avx2(const unsigned char *src_ptr,
+#include "./vpx_dsp_rtcd.h"
+
+void vpx_get16x16var_avx2(const unsigned char *src_ptr,
                           int source_stride,
                           const unsigned char *ref_ptr,
                           int recon_stride,
@@ -121,7 +123,7 @@ void vp9_get16x16var_avx2(const unsigned char *src_ptr,
     }
 }
 
-void vp9_get32x32var_avx2(const unsigned char *src_ptr,
+void vpx_get32x32var_avx2(const unsigned char *src_ptr,
                           int source_stride,
                           const unsigned char *ref_ptr,
                           int recon_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
index 7d5e6810bf0..a8d7d99dbc0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
@@ -11,9 +11,9 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
-global sym(vp8_get_mb_ss_mmx) PRIVATE
-sym(vp8_get_mb_ss_mmx):
+;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
+global sym(vpx_get_mb_ss_mmx) PRIVATE
+sym(vpx_get_mb_ss_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -63,7 +63,7 @@ sym(vp8_get_mb_ss_mmx):
     ret
 
 
-;unsigned int vp8_get8x8var_mmx
+;void vpx_get8x8var_mmx
 ;(
 ;    unsigned char *src_ptr,
 ;    int  source_stride,
@@ -72,8 +72,8 @@ sym(vp8_get_mb_ss_mmx):
 ;    unsigned int *SSE,
 ;    int *Sum
 ;)
-global sym(vp8_get8x8var_mmx) PRIVATE
-sym(vp8_get8x8var_mmx):
+global sym(vpx_get8x8var_mmx) PRIVATE
+sym(vpx_get8x8var_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -310,8 +310,8 @@ sym(vp8_get8x8var_mmx):
 
 
 
-;unsigned int
-;vp8_get4x4var_mmx
+;void
+;vpx_get4x4var_mmx
 ;(
 ;    unsigned char *src_ptr,
 ;    int  source_stride,
@@ -320,8 +320,8 @@ sym(vp8_get8x8var_mmx):
 ;    unsigned int *SSE,
 ;    int *Sum
 ;)
-global sym(vp8_get4x4var_mmx) PRIVATE
-sym(vp8_get4x4var_mmx):
+global sym(vpx_get4x4var_mmx) PRIVATE
+sym(vpx_get4x4var_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -422,430 +422,3 @@ sym(vp8_get4x4var_mmx):
     UNSHADOW_ARGS
     pop         rbp
     ret
-
-
-
-;unsigned int
-;vp8_get4x4sse_cs_mmx
-;(
-;    unsigned char *src_ptr,
-;    int  source_stride,
-;    unsigned char *ref_ptr,
-;    int  recon_stride
-;)
-global sym(vp8_get4x4sse_cs_mmx) PRIVATE
-sym(vp8_get4x4sse_cs_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 4
-    push rsi
-    push rdi
-    push rbx
-    ; end prolog
-
-
-        pxor        mm6, mm6                    ; Blank mmx7
-        pxor        mm7, mm7                    ; Blank mmx7
-
-        mov         rax, arg(0) ;[src_ptr]  ; Load base addresses
-        mov         rbx, arg(2) ;[ref_ptr]
-        movsxd      rcx, dword ptr arg(1) ;[source_stride]
-        movsxd      rdx, dword ptr arg(3) ;[recon_stride]
-        ; Row 1
-        movd        mm0, [rax]                  ; Copy eight bytes to mm0
-        movd        mm1, [rbx]                  ; Copy eight bytes to mm1
-        punpcklbw   mm0, mm6                    ; unpack to higher prrcision
-        punpcklbw   mm1, mm6
-        psubsw      mm0, mm1                    ; A-B (low order) to MM0
-        pmaddwd     mm0, mm0                    ; square and accumulate
-        add         rbx,rdx                     ; Inc pointer into ref data
-        add         rax,rcx                     ; Inc pointer into the new data
-        movd        mm1, [rbx]                  ; Copy eight bytes to mm1
-        paddd       mm7, mm0                    ; accumulate in mm7
-
-        ; Row 2
-        movd        mm0, [rax]                  ; Copy eight bytes to mm0
-        punpcklbw   mm0, mm6                    ; unpack to higher prrcision
-        punpcklbw   mm1, mm6
-        psubsw      mm0, mm1                    ; A-B (low order) to MM0
-        pmaddwd     mm0, mm0                    ; square and accumulate
-        add         rbx,rdx                     ; Inc pointer into ref data
-        add         rax,rcx                     ; Inc pointer into the new data
-        movd        mm1, [rbx]                  ; Copy eight bytes to mm1
-        paddd       mm7, mm0                    ; accumulate in mm7
-
-        ; Row 3
-        movd        mm0, [rax]                  ; Copy eight bytes to mm0
-        punpcklbw   mm1, mm6
-        punpcklbw   mm0, mm6                    ; unpack to higher prrcision
-        psubsw      mm0, mm1                    ; A-B (low order) to MM0
-
-        pmaddwd     mm0, mm0                    ; square and accumulate
-        add         rbx,rdx                     ; Inc pointer into ref data
-        add         rax,rcx                     ; Inc pointer into the new data
-        movd        mm1, [rbx]                  ; Copy eight bytes to mm1
-        paddd       mm7, mm0                    ; accumulate in mm7
-
-        ; Row 4
-        movd        mm0, [rax]                  ; Copy eight bytes to mm0
-        punpcklbw   mm0, mm6                    ; unpack to higher prrcision
-        punpcklbw   mm1, mm6
-        psubsw      mm0, mm1                    ; A-B (low order) to MM0
-        pmaddwd     mm0, mm0                    ; square and accumulate
-        paddd       mm7, mm0                    ; accumulate in mm7
-
-        movq        mm0,    mm7                 ;
-        psrlq       mm7,    32
-
-        paddd       mm0,    mm7
-        movq        rax,    mm0
-
-
-    ; begin epilog
-    pop rbx
-    pop rdi
-    pop rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-%define mmx_filter_shift            7
-
-;void vp8_filter_block2d_bil4x4_var_mmx
-;(
-;    unsigned char *ref_ptr,
-;    int ref_pixels_per_line,
-;    unsigned char *src_ptr,
-;    int src_pixels_per_line,
-;    unsigned short *HFilter,
-;    unsigned short *VFilter,
-;    int *sum,
-;    unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil4x4_var_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 8
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    sub         rsp, 16
-    ; end prolog
-
-
-        pxor            mm6,            mm6                 ;
-        pxor            mm7,            mm7                 ;
-
-        mov             rax,            arg(4) ;HFilter             ;
-        mov             rdx,            arg(5) ;VFilter             ;
-
-        mov             rsi,            arg(0) ;ref_ptr              ;
-        mov             rdi,            arg(2) ;src_ptr              ;
-
-        mov             rcx,            4                   ;
-        pxor            mm0,            mm0                 ;
-
-        movd            mm1,            [rsi]               ;
-        movd            mm3,            [rsi+1]             ;
-
-        punpcklbw       mm1,            mm0                 ;
-        pmullw          mm1,            [rax]               ;
-
-        punpcklbw       mm3,            mm0                 ;
-        pmullw          mm3,            [rax+8]             ;
-
-        paddw           mm1,            mm3                 ;
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-
-        psraw           mm1,            mmx_filter_shift    ;
-        movq            mm5,            mm1
-
-%if ABI_IS_32BIT
-        add             rsi, dword ptr  arg(1) ;ref_pixels_per_line    ;
-%else
-        movsxd          r8, dword ptr  arg(1) ;ref_pixels_per_line    ;
-        add             rsi, r8
-%endif
-
-.filter_block2d_bil4x4_var_mmx_loop:
-
-        movd            mm1,            [rsi]               ;
-        movd            mm3,            [rsi+1]             ;
-
-        punpcklbw       mm1,            mm0                 ;
-        pmullw          mm1,            [rax]               ;
-
-        punpcklbw       mm3,            mm0                 ;
-        pmullw          mm3,            [rax+8]             ;
-
-        paddw           mm1,            mm3                 ;
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-
-        psraw           mm1,            mmx_filter_shift    ;
-        movq            mm3,            mm5                 ;
-
-        movq            mm5,            mm1                 ;
-        pmullw          mm3,            [rdx]               ;
-
-        pmullw          mm1,            [rdx+8]             ;
-        paddw           mm1,            mm3                 ;
-
-
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-        psraw           mm1,            mmx_filter_shift    ;
-
-        movd            mm3,            [rdi]               ;
-        punpcklbw       mm3,            mm0                 ;
-
-        psubw           mm1,            mm3                 ;
-        paddw           mm6,            mm1                 ;
-
-        pmaddwd         mm1,            mm1                 ;
-        paddd           mm7,            mm1                 ;
-
-%if ABI_IS_32BIT
-        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
-        add             rdi,            dword ptr arg(3) ;src_pixels_per_line    ;
-%else
-        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line
-        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line
-        add             rsi,            r8
-        add             rdi,            r9
-%endif
-        sub             rcx,            1                   ;
-        jnz             .filter_block2d_bil4x4_var_mmx_loop       ;
-
-
-        pxor            mm3,            mm3                 ;
-        pxor            mm2,            mm2                 ;
-
-        punpcklwd       mm2,            mm6                 ;
-        punpckhwd       mm3,            mm6                 ;
-
-        paddd           mm2,            mm3                 ;
-        movq            mm6,            mm2                 ;
-
-        psrlq           mm6,            32                  ;
-        paddd           mm2,            mm6                 ;
-
-        psrad           mm2,            16                  ;
-        movq            mm4,            mm7                 ;
-
-        psrlq           mm4,            32                  ;
-        paddd           mm4,            mm7                 ;
-
-        mov             rdi,            arg(6) ;sum
-        mov             rsi,            arg(7) ;sumsquared
-
-        movd            dword ptr [rdi],          mm2                 ;
-        movd            dword ptr [rsi],          mm4                 ;
-
-
-
-    ; begin epilog
-    add rsp, 16
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-
-
-;void vp8_filter_block2d_bil_var_mmx
-;(
-;    unsigned char *ref_ptr,
-;    int ref_pixels_per_line,
-;    unsigned char *src_ptr,
-;    int src_pixels_per_line,
-;    unsigned int Height,
-;    unsigned short *HFilter,
-;    unsigned short *VFilter,
-;    int *sum,
-;    unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil_var_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 9
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    sub         rsp, 16
-    ; end prolog
-
-        pxor            mm6,            mm6                 ;
-        pxor            mm7,            mm7                 ;
-        mov             rax,            arg(5) ;HFilter             ;
-
-        mov             rdx,            arg(6) ;VFilter             ;
-        mov             rsi,            arg(0) ;ref_ptr              ;
-
-        mov             rdi,            arg(2) ;src_ptr              ;
-        movsxd          rcx,            dword ptr arg(4) ;Height              ;
-
-        pxor            mm0,            mm0                 ;
-        movq            mm1,            [rsi]               ;
-
-        movq            mm3,            [rsi+1]             ;
-        movq            mm2,            mm1                 ;
-
-        movq            mm4,            mm3                 ;
-        punpcklbw       mm1,            mm0                 ;
-
-        punpckhbw       mm2,            mm0                 ;
-        pmullw          mm1,            [rax]               ;
-
-        pmullw          mm2,            [rax]               ;
-        punpcklbw       mm3,            mm0                 ;
-
-        punpckhbw       mm4,            mm0                 ;
-        pmullw          mm3,            [rax+8]             ;
-
-        pmullw          mm4,            [rax+8]             ;
-        paddw           mm1,            mm3                 ;
-
-        paddw           mm2,            mm4                 ;
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-
-        psraw           mm1,            mmx_filter_shift    ;
-        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
-
-        psraw           mm2,            mmx_filter_shift    ;
-        movq            mm5,            mm1
-
-        packuswb        mm5,            mm2                 ;
-%if ABI_IS_32BIT
-        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line
-%else
-        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line
-        add             rsi,            r8
-%endif
-
-.filter_block2d_bil_var_mmx_loop:
-
-        movq            mm1,            [rsi]               ;
-        movq            mm3,            [rsi+1]             ;
-
-        movq            mm2,            mm1                 ;
-        movq            mm4,            mm3                 ;
-
-        punpcklbw       mm1,            mm0                 ;
-        punpckhbw       mm2,            mm0                 ;
-
-        pmullw          mm1,            [rax]               ;
-        pmullw          mm2,            [rax]               ;
-
-        punpcklbw       mm3,            mm0                 ;
-        punpckhbw       mm4,            mm0                 ;
-
-        pmullw          mm3,            [rax+8]             ;
-        pmullw          mm4,            [rax+8]             ;
-
-        paddw           mm1,            mm3                 ;
-        paddw           mm2,            mm4                 ;
-
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-        psraw           mm1,            mmx_filter_shift    ;
-
-        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
-        psraw           mm2,            mmx_filter_shift    ;
-
-        movq            mm3,            mm5                 ;
-        movq            mm4,            mm5                 ;
-
-        punpcklbw       mm3,            mm0                 ;
-        punpckhbw       mm4,            mm0                 ;
-
-        movq            mm5,            mm1                 ;
-        packuswb        mm5,            mm2                 ;
-
-        pmullw          mm3,            [rdx]               ;
-        pmullw          mm4,            [rdx]               ;
-
-        pmullw          mm1,            [rdx+8]             ;
-        pmullw          mm2,            [rdx+8]             ;
-
-        paddw           mm1,            mm3                 ;
-        paddw           mm2,            mm4                 ;
-
-        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
-        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
-
-        psraw           mm1,            mmx_filter_shift    ;
-        psraw           mm2,            mmx_filter_shift    ;
-
-        movq            mm3,            [rdi]               ;
-        movq            mm4,            mm3                 ;
-
-        punpcklbw       mm3,            mm0                 ;
-        punpckhbw       mm4,            mm0                 ;
-
-        psubw           mm1,            mm3                 ;
-        psubw           mm2,            mm4                 ;
-
-        paddw           mm6,            mm1                 ;
-        pmaddwd         mm1,            mm1                 ;
-
-        paddw           mm6,            mm2                 ;
-        pmaddwd         mm2,            mm2                 ;
-
-        paddd           mm7,            mm1                 ;
-        paddd           mm7,            mm2                 ;
-
-%if ABI_IS_32BIT
-        add             rsi,            dword ptr arg(1) ;ref_pixels_per_line    ;
-        add             rdi,            dword ptr arg(3) ;src_pixels_per_line    ;
-%else
-        movsxd          r8,             dword ptr arg(1) ;ref_pixels_per_line    ;
-        movsxd          r9,             dword ptr arg(3) ;src_pixels_per_line    ;
-        add             rsi,            r8
-        add             rdi,            r9
-%endif
-        sub             rcx,            1                   ;
-        jnz             .filter_block2d_bil_var_mmx_loop       ;
-
-
-        pxor            mm3,            mm3                 ;
-        pxor            mm2,            mm2                 ;
-
-        punpcklwd       mm2,            mm6                 ;
-        punpckhwd       mm3,            mm6                 ;
-
-        paddd           mm2,            mm3                 ;
-        movq            mm6,            mm2                 ;
-
-        psrlq           mm6,            32                  ;
-        paddd           mm2,            mm6                 ;
-
-        psrad           mm2,            16                  ;
-        movq            mm4,            mm7                 ;
-
-        psrlq           mm4,            32                  ;
-        paddd           mm4,            mm7                 ;
-
-        mov             rdi,            arg(7) ;sum
-        mov             rsi,            arg(8) ;sumsquared
-
-        movd            dword ptr [rdi],          mm2                 ;
-        movd            dword ptr [rsi],          mm4                 ;
-
-    ; begin epilog
-    add rsp, 16
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-SECTION_RODATA
-;short mmx_bi_rd[4] = { 64, 64, 64, 64};
-align 16
-mmx_bi_rd:
-    times 4 dw 64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c
new file mode 100644
index 00000000000..99dd741bca5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c
@@ -0,0 +1,107 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+
+extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride,
+                              const uint8_t *b, int b_stride,
+                              unsigned int *sse, int *sum);
+
+unsigned int vpx_variance4x4_mmx(const unsigned char *a, int  a_stride,
+                                 const unsigned char *b, int  b_stride,
+                                 unsigned int *sse) {
+    unsigned int var;
+    int avg;
+
+    vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg);
+    *sse = var;
+    return (var - (((unsigned int)avg * avg) >> 4));
+}
+
+unsigned int vpx_variance8x8_mmx(const unsigned char *a, int  a_stride,
+                                 const unsigned char *b, int  b_stride,
+                                 unsigned int *sse) {
+    unsigned int var;
+    int avg;
+
+    vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg);
+    *sse = var;
+
+    return (var - (((unsigned int)avg * avg) >> 6));
+}
+
+unsigned int vpx_mse16x16_mmx(const unsigned char *a, int  a_stride,
+                              const unsigned char *b, int  b_stride,
+                              unsigned int *sse) {
+    unsigned int sse0, sse1, sse2, sse3, var;
+    int sum0, sum1, sum2, sum3;
+
+    vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+    vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+    vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+                      b + 8 * b_stride, b_stride, &sse2, &sum2);
+    vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
+                      b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
+
+    var = sse0 + sse1 + sse2 + sse3;
+    *sse = var;
+    return var;
+}
+
+unsigned int vpx_variance16x16_mmx(const unsigned char *a, int  a_stride,
+                                   const unsigned char *b, int  b_stride,
+                                   unsigned int *sse) {
+    unsigned int sse0, sse1, sse2, sse3, var;
+    int sum0, sum1, sum2, sum3, avg;
+
+    vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+    vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+    vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+                      b + 8 * b_stride, b_stride, &sse2, &sum2);
+    vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
+                      b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
+
+    var = sse0 + sse1 + sse2 + sse3;
+    avg = sum0 + sum1 + sum2 + sum3;
+    *sse = var;
+    return (var - (((unsigned int)avg * avg) >> 8));
+}
+
+unsigned int vpx_variance16x8_mmx(const unsigned char *a, int  a_stride,
+                                  const unsigned char *b, int  b_stride,
+                                  unsigned int *sse) {
+    unsigned int sse0, sse1, var;
+    int sum0, sum1, avg;
+
+    vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+    vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+
+    var = sse0 + sse1;
+    avg = sum0 + sum1;
+    *sse = var;
+    return (var - (((unsigned int)avg * avg) >> 7));
+}
+
+unsigned int vpx_variance8x16_mmx(const unsigned char *a, int  a_stride,
+                                  const unsigned char *b, int  b_stride,
+                                  unsigned int *sse) {
+    unsigned int sse0, sse1, var;
+    int sum0, sum1, avg;
+
+    vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+    vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+                      b + 8 * b_stride, b_stride, &sse1, &sum1);
+
+    var = sse0 + sse1;
+    avg = sum0 + sum1;
+    *sse = var;
+
+    return (var - (((unsigned int)avg * avg) >> 7));
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
new file mode 100644
index 00000000000..6256bc53621
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
@@ -0,0 +1,309 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>  // SSE2
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx_ports/mem.h"
+
+typedef void (*getNxMvar_fn_t) (const unsigned char *src, int src_stride,
+                                const unsigned char *ref, int ref_stride,
+                                unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
+  __m128i vsum = _mm_setzero_si128();
+  int i;
+
+  for (i = 0; i < 32; ++i) {
+    const __m128i v = _mm_loadu_si128((const __m128i *)src);
+    vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
+    src += 8;
+  }
+
+  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+  return  _mm_cvtsi128_si32(vsum);
+}
+
+#define READ64(p, stride, i) \
+  _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
+      _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
+
+static void get4x4var_sse2(const uint8_t *src, int src_stride,
+                           const uint8_t *ref, int ref_stride,
+                           unsigned int *sse, int *sum) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
+  const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
+  const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
+  const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
+  const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+  const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+  // sum
+  __m128i vsum = _mm_add_epi16(diff0, diff1);
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
+  *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+
+  // sse
+  vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0),
+                       _mm_madd_epi16(diff1, diff1));
+  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+  vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+  *sse = _mm_cvtsi128_si32(vsum);
+}
+
+void vpx_get8x8var_sse2(const uint8_t *src, int src_stride,
+                        const uint8_t *ref, int ref_stride,
+                        unsigned int *sse, int *sum) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i vsum = _mm_setzero_si128();
+  __m128i vsse = _mm_setzero_si128();
+  int i;
+
+  for (i = 0; i < 8; i += 2) {
+    const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+        (const __m128i *)(src + i * src_stride)), zero);
+    const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+        (const __m128i *)(ref + i * ref_stride)), zero);
+    const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+
+    const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+        (const __m128i *)(src + (i + 1) * src_stride)), zero);
+    const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+        (const __m128i *)(ref + (i + 1) * ref_stride)), zero);
+    const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+    vsum = _mm_add_epi16(vsum, diff0);
+    vsum = _mm_add_epi16(vsum, diff1);
+    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
+    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+  }
+
+  // sum
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
+  *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+
+  // sse
+  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
+  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
+  *sse = _mm_cvtsi128_si32(vsse);
+}
+
+void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
+                          const uint8_t *ref, int ref_stride,
+                          unsigned int *sse, int *sum) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i vsum = _mm_setzero_si128();
+  __m128i vsse = _mm_setzero_si128();
+  int i;
+
+  for (i = 0; i < 16; ++i) {
+    const __m128i s = _mm_loadu_si128((const __m128i *)src);
+    const __m128i r = _mm_loadu_si128((const __m128i *)ref);
+
+    const __m128i src0 = _mm_unpacklo_epi8(s, zero);
+    const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
+    const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+
+    const __m128i src1 = _mm_unpackhi_epi8(s, zero);
+    const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
+    const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+    vsum = _mm_add_epi16(vsum, diff0);
+    vsum = _mm_add_epi16(vsum, diff1);
+    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
+    vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+
+    src += src_stride;
+    ref += ref_stride;
+  }
+
+  // sum
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+  vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+  *sum = (int16_t)_mm_extract_epi16(vsum, 0) +
+             (int16_t)_mm_extract_epi16(vsum, 1);
+
+  // sse
+  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
+  vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
+  *sse = _mm_cvtsi128_si32(vsse);
+}
+
+
+static void variance_sse2(const unsigned char *src, int src_stride,
+                          const unsigned char *ref, int ref_stride,
+                          int w, int h, unsigned int *sse, int *sum,
+                          getNxMvar_fn_t var_fn, int block_size) {
+  int i, j;
+
+  *sse = 0;
+  *sum = 0;
+
+  for (i = 0; i < h; i += block_size) {
+    for (j = 0; j < w; j += block_size) {
+      unsigned int sse0;
+      int sum0;
+      var_fn(src + src_stride * i + j, src_stride,
+             ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+      *sse += sse0;
+      *sum += sum0;
+    }
+  }
+}
+
+unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride,
+                                  const unsigned char *ref, int ref_stride,
+                                  unsigned int *sse) {
+  int sum;
+  get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+  return *sse - (((unsigned int)sum * sum) >> 4);
+}
+
+unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
+                                  const uint8_t *ref, int ref_stride,
+                                  unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
+                sse, &sum, get4x4var_sse2, 4);
+  return *sse - (((unsigned int)sum * sum) >> 5);
+}
+
+unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
+                                  const uint8_t *ref, int ref_stride,
+                                  unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
+                sse, &sum, get4x4var_sse2, 4);
+  return *sse - (((unsigned int)sum * sum) >> 5);
+}
+
+unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
+                                  const unsigned char *ref, int ref_stride,
+                                  unsigned int *sse) {
+  int sum;
+  vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+  return *sse - (((unsigned int)sum * sum) >> 6);
+}
+
+unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
+                                   const unsigned char *ref, int ref_stride,
+                                   unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
+                sse, &sum, vpx_get8x8var_sse2, 8);
+  return *sse - (((unsigned int)sum * sum) >> 7);
+}
+
+unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
+                                   const unsigned char *ref, int ref_stride,
+                                   unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
+                sse, &sum, vpx_get8x8var_sse2, 8);
+  return *sse - (((unsigned int)sum * sum) >> 7);
+}
+
+unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
+                                    const unsigned char *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+  return *sse - (((unsigned int)sum * sum) >> 8);
+}
+
+unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 10);
+}
+
+unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 12);
+}
+
+unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 11);
+}
+
+unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
+                sse, &sum, vpx_get16x16var_sse2, 16);
+  return *sse - (((int64_t)sum * sum) >> 11);
+}
+
+unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride,
+                             const uint8_t *ref, int ref_stride,
+                             unsigned int *sse) {
+  vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
+  return *sse;
+}
+
+unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride,
+                              const uint8_t *ref, int ref_stride,
+                              unsigned int *sse) {
+  vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
+  return *sse;
+}
+
+unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride,
+                              const uint8_t *ref, int ref_stride,
+                              unsigned int *sse) {
+  vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
+  return *sse;
+}
+
+unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
+                               const uint8_t *ref, int ref_stride,
+                               unsigned int *sse) {
+  vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
+  return *sse;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
index 0106a45d6e4..7502f906325 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
@@ -38,4 +38,16 @@
 #define __builtin_prefetch(x)
 #endif
 
+/* Shift down with rounding */
+#define ROUND_POWER_OF_TWO(value, n) \
+    (((value) + (1 << ((n) - 1))) >> (n))
+
+#define ALIGN_POWER_OF_TWO(value, n) \
+    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1))
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 #endif  // VPX_PORTS_MEM_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h
new file mode 100644
index 00000000000..43a36e76184
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_PORTS_MSVC_H_
+#define VPX_PORTS_MSVC_H_
+#ifdef _MSC_VER
+
+#include "./vpx_config.h"
+
+# if _MSC_VER < 1900  // VS2015 provides snprintf
+#  define snprintf _snprintf
+# endif  // _MSC_VER < 1900
+
+#endif  // _MSC_VER
+#endif  // VPX_PORTS_MSVC_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk b/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
index a7275431fe9..ab7fc4ac7a3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
@@ -11,13 +11,14 @@
 
 PORTS_SRCS-yes += vpx_ports.mk
 
-PORTS_SRCS-$(BUILD_LIBVPX) += mem.h
-PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h
+PORTS_SRCS-yes += mem.h
+PORTS_SRCS-yes += msvc.h
+PORTS_SRCS-yes += vpx_timer.h
 
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm
-PORTS_SRCS-$(BUILD_LIBVPX) += x86.h
-PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm
+PORTS_SRCS-yes += emms.asm
+PORTS_SRCS-yes += x86.h
+PORTS_SRCS-yes += x86_abi_support.asm
 endif
 
 PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
index 995c45b6ab6..dab324edfcc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
+#include "./vpx_scale_rtcd.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vpx_mem/vpx_mem.h"
 /****************************************************************************
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
index 089e673757c..15e4ba87e72 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
@@ -22,6 +22,7 @@
 ****************************************************************************/
 #include "./vpx_scale_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_scale/vpx_scale.h"
 #include "vpx_scale/yv12config.h"
 
 typedef struct {
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
index 169c2ab2d73..7582792d939 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
@@ -12,9 +12,7 @@
 
 #include "vpx_scale/yv12config.h"
 #include "vpx_mem/vpx_mem.h"
-#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif
+#include "vpx_ports/mem.h"
 
 /****************************************************************************
 *  Exports
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
index 6214a12189c..086e2f398fb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
@@ -10,8 +10,10 @@
 
 #include <assert.h>
 #include "./vpx_config.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 #include "vpx_scale/yv12config.h"
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
 #include "vp9/common/vp9_common.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxdec.c b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
index 8c938df8de6..baa61151ca8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxdec.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
@@ -106,24 +106,25 @@ static const arg_def_t *all_args[] = {
 };
 
 #if CONFIG_VP8_DECODER
-static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
-                                                "Enable VP8 postproc add noise");
-static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
-                                         "Enable VP8 deblocking");
-static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
-                                                    "Enable VP8 demacroblocking, w/ level");
-static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
-                                               "Enable VP8 visible debug info");
-static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
-                                                   "Display only selected reference frame per macro block");
-static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
-                                                  "Display only selected macro block modes");
-static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
-                                                 "Display only selected block modes");
-static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
-                                             "Draw only selected motion vectors");
-static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
-                                      "Enable multiframe quality enhancement");
+static const arg_def_t addnoise_level = ARG_DEF(
+    NULL, "noise-level", 1, "Enable VP8 postproc add noise");
+static const arg_def_t deblock = ARG_DEF(
+    NULL, "deblock", 0, "Enable VP8 deblocking");
+static const arg_def_t demacroblock_level = ARG_DEF(
+    NULL, "demacroblock-level", 1, "Enable VP8 demacroblocking, w/ level");
+static const arg_def_t pp_debug_info = ARG_DEF(
+    NULL, "pp-debug-info", 1, "Enable VP8 visible debug info");
+static const arg_def_t pp_disp_ref_frame = ARG_DEF(
+    NULL, "pp-dbg-ref-frame", 1,
+    "Display only selected reference frame per macro block");
+static const arg_def_t pp_disp_mb_modes = ARG_DEF(
+    NULL, "pp-dbg-mb-modes", 1, "Display only selected macro block modes");
+static const arg_def_t pp_disp_b_modes = ARG_DEF(
+    NULL, "pp-dbg-b-modes", 1, "Display only selected block modes");
+static const arg_def_t pp_disp_mvs = ARG_DEF(
+    NULL, "pp-dbg-mvs", 1, "Draw only selected motion vectors");
+static const arg_def_t mfqe = ARG_DEF(
+    NULL, "mfqe", 0, "Enable multiframe quality enhancement");
 
 static const arg_def_t *vp8_pp_args[] = {
   &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
@@ -169,7 +170,7 @@ static INLINE int libyuv_scale(vpx_image_t *src, vpx_image_t *dst,
 }
 #endif
 
-void usage_exit() {
+void usage_exit(void) {
   int i;
 
   fprintf(stderr, "Usage: %s <options> filename\n\n"
@@ -312,7 +313,7 @@ static void write_image_file(const vpx_image_t *img, const int planes[3],
   }
 }
 
-int file_is_raw(struct VpxInputContext *input) {
+static int file_is_raw(struct VpxInputContext *input) {
   uint8_t buf[32];
   int is_raw = 0;
   vpx_codec_stream_info_t si;
@@ -343,7 +344,7 @@ int file_is_raw(struct VpxInputContext *input) {
   return is_raw;
 }
 
-void show_progress(int frame_in, int frame_out, uint64_t dx_time) {
+static void show_progress(int frame_in, int frame_out, uint64_t dx_time) {
   fprintf(stderr,
           "%d decoded frames/%d showed frames in %"PRId64" us (%.2f fps)\r",
           frame_in, frame_out, dx_time,
@@ -365,8 +366,8 @@ struct ExternalFrameBufferList {
 // Application private data passed into the set function. |min_size| is the
 // minimum size in bytes needed to decode the next frame. |fb| pointer to the
 // frame buffer.
-int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
-                         vpx_codec_frame_buffer_t *fb) {
+static int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
+                                vpx_codec_frame_buffer_t *fb) {
   int i;
   struct ExternalFrameBufferList *const ext_fb_list =
       (struct ExternalFrameBufferList *)cb_priv;
@@ -403,8 +404,8 @@ int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
 // Callback used by libvpx when there are no references to the frame buffer.
 // |cb_priv| user private data passed into the set function. |fb| pointer
 // to the frame buffer.
-int release_vp9_frame_buffer(void *cb_priv,
-                             vpx_codec_frame_buffer_t *fb) {
+static int release_vp9_frame_buffer(void *cb_priv,
+                                    vpx_codec_frame_buffer_t *fb) {
   struct ExternalFrameBuffer *const ext_fb =
       (struct ExternalFrameBuffer *)fb->priv;
   (void)cb_priv;
@@ -412,9 +413,9 @@ int release_vp9_frame_buffer(void *cb_priv,
   return 0;
 }
 
-void generate_filename(const char *pattern, char *out, size_t q_len,
-                       unsigned int d_w, unsigned int d_h,
-                       unsigned int frame_in) {
+static void generate_filename(const char *pattern, char *out, size_t q_len,
+                              unsigned int d_w, unsigned int d_h,
+                              unsigned int frame_in) {
   const char *p = pattern;
   char *q = out;
 
@@ -536,7 +537,7 @@ static int img_shifted_realloc_required(const vpx_image_t *img,
 }
 #endif
 
-int main_loop(int argc, const char **argv_) {
+static int main_loop(int argc, const char **argv_) {
   vpx_codec_ctx_t       decoder;
   char                  *fn = NULL;
   int                    i;
@@ -813,34 +814,42 @@ int main_loop(int argc, const char **argv_) {
     fprintf(stderr, "%s\n", decoder.name);
 
 #if CONFIG_VP8_DECODER
-
   if (vp8_pp_cfg.post_proc_flag
       && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
-    fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+    fprintf(stderr, "Failed to configure postproc: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 
   if (vp8_dbg_color_ref_frame
-      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) {
-    fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME,
+                           vp8_dbg_color_ref_frame)) {
+    fprintf(stderr, "Failed to configure reference block visualizer: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 
   if (vp8_dbg_color_mb_modes
-      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) {
-    fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES,
+                           vp8_dbg_color_mb_modes)) {
+    fprintf(stderr, "Failed to configure macro block visualizer: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 
   if (vp8_dbg_color_b_modes
-      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) {
-    fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+      && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES,
+                           vp8_dbg_color_b_modes)) {
+    fprintf(stderr, "Failed to configure block visualizer: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 
   if (vp8_dbg_display_mv
-      && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) {
-    fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+      && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV,
+                           vp8_dbg_display_mv)) {
+    fprintf(stderr, "Failed to configure motion vector visualizer: %s\n",
+            vpx_codec_error(&decoder));
     return EXIT_FAILURE;
   }
 #endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxenc.c b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
index 851d43291cd..8bbb9fc6a20 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxenc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
@@ -99,7 +99,7 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal,
   va_end(ap);
 }
 
-int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
+static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
   FILE *f = input_ctx->file;
   y4m_input *y4m = &input_ctx->y4m;
   int shortread = 0;
@@ -114,14 +114,14 @@ int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
   return !shortread;
 }
 
-int file_is_y4m(const char detect[4]) {
+static int file_is_y4m(const char detect[4]) {
   if (memcmp(detect, "YUV4", 4) == 0) {
     return 1;
   }
   return 0;
 }
 
-int fourcc_is_ivf(const char detect[4]) {
+static int fourcc_is_ivf(const char detect[4]) {
   if (memcmp(detect, "DKIF", 4) == 0) {
     return 1;
   }
@@ -330,10 +330,6 @@ static const arg_def_t sharpness = ARG_DEF(
     NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
 static const arg_def_t static_thresh = ARG_DEF(
     NULL, "static-thresh", 1, "Motion detection threshold");
-static const arg_def_t cpu_used_vp8 = ARG_DEF(
-    NULL, "cpu-used", 1, "CPU Used (-16..16)");
-static const arg_def_t cpu_used_vp9 = ARG_DEF(
-    NULL, "cpu-used", 1, "CPU Used (-8..8)");
 static const arg_def_t auto_altref = ARG_DEF(
     NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
 static const arg_def_t arnr_maxframes = ARG_DEF(
@@ -353,17 +349,14 @@ static const arg_def_t cq_level = ARG_DEF(
     NULL, "cq-level", 1, "Constant/Constrained Quality level");
 static const arg_def_t max_intra_rate_pct = ARG_DEF(
     NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
-static const arg_def_t max_inter_rate_pct = ARG_DEF(
-    NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
-static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
-    NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
-
-static const arg_def_t screen_content_mode = ARG_DEF(NULL, "screen-content-mode", 1,
-                                                     "Screen content mode");
 
 #if CONFIG_VP8_ENCODER
+static const arg_def_t cpu_used_vp8 = ARG_DEF(
+    NULL, "cpu-used", 1, "CPU Used (-16..16)");
 static const arg_def_t token_parts = ARG_DEF(
     NULL, "token-parts", 1, "Number of token partitions to use, log2");
+static const arg_def_t screen_content_mode = ARG_DEF(
+    NULL, "screen-content-mode", 1, "Screen content mode");
 static const arg_def_t *vp8_args[] = {
   &cpu_used_vp8, &auto_altref, &noise_sens, &sharpness, &static_thresh,
   &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
@@ -382,6 +375,8 @@ static const int vp8_arg_ctrl_map[] = {
 #endif
 
 #if CONFIG_VP9_ENCODER
+static const arg_def_t cpu_used_vp9 = ARG_DEF(
+    NULL, "cpu-used", 1, "CPU Used (-8..8)");
 static const arg_def_t tile_cols = ARG_DEF(
     NULL, "tile-columns", 1, "Number of tile columns to use, log2");
 static const arg_def_t tile_rows = ARG_DEF(
@@ -397,6 +392,10 @@ static const arg_def_t aq_mode = ARG_DEF(
 static const arg_def_t frame_periodic_boost = ARG_DEF(
     NULL, "frame-boost", 1,
     "Enable frame periodic boost (0: off (default), 1: on)");
+static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
+    NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
+static const arg_def_t max_inter_rate_pct = ARG_DEF(
+    NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
 
 static const struct arg_enum_list color_space_enum[] = {
   { "unknown", VPX_CS_UNKNOWN },
@@ -467,8 +466,9 @@ static const int vp9_arg_ctrl_map[] = {
 
 static const arg_def_t *no_args[] = { NULL };
 
-void usage_exit() {
+void usage_exit(void) {
   int i;
+  const int num_encoder = get_vpx_encoder_count();
 
   fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
           exec_name);
@@ -496,11 +496,15 @@ void usage_exit() {
           "  in fractional seconds. Default is 1/1000.\n");
   fprintf(stderr, "\nIncluded encoders:\n\n");
 
-  for (i = 0; i < get_vpx_encoder_count(); ++i) {
+  for (i = 0; i < num_encoder; ++i) {
     const VpxInterface *const encoder = get_vpx_encoder_by_index(i);
-    fprintf(stderr, "    %-6s - %s\n",
-            encoder->name, vpx_codec_iface_name(encoder->codec_interface()));
+    const char* defstr = (i == (num_encoder - 1)) ? "(default)" : "";
+      fprintf(stderr, "    %-6s - %s %s\n",
+              encoder->name, vpx_codec_iface_name(encoder->codec_interface()),
+              defstr);
   }
+  fprintf(stderr, "\n        ");
+  fprintf(stderr, "Use --codec to switch to a non-default encoder.\n\n");
 
   exit(EXIT_FAILURE);
 }
@@ -793,8 +797,8 @@ struct stream_state {
 };
 
 
-void validate_positive_rational(const char          *msg,
-                                struct vpx_rational *rat) {
+static void validate_positive_rational(const char          *msg,
+                                       struct vpx_rational *rat) {
   if (rat->den < 0) {
     rat->num *= -1;
     rat->den *= -1;
@@ -811,10 +815,14 @@ void validate_positive_rational(const char          *msg,
 static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
   char       **argi, **argj;
   struct arg   arg;
+  const int num_encoder = get_vpx_encoder_count();
+
+  if (num_encoder < 1)
+    die("Error: no valid encoder available\n");
 
   /* Initialize default parameters */
   memset(global, 0, sizeof(*global));
-  global->codec = get_vpx_encoder_by_index(0);
+  global->codec = get_vpx_encoder_by_index(num_encoder - 1);
   global->passes = 0;
   global->color_type = I420;
   /* Assign default deadline to good quality */
@@ -919,7 +927,7 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
 }
 
 
-void open_input_file(struct VpxInputContext *input) {
+static void open_input_file(struct VpxInputContext *input) {
   /* Parse certain options from the input file, if possible */
   input->file = strcmp(input->filename, "-")
       ? fopen(input->filename, "rb") : set_binary_mode(stdin);
@@ -935,6 +943,10 @@ void open_input_file(struct VpxInputContext *input) {
     rewind(input->file);
   }
 
+  /* Default to 1:1 pixel aspect ratio. */
+  input->pixel_aspect_ratio.numerator = 1;
+  input->pixel_aspect_ratio.denominator = 1;
+
   /* For RAW input sources, these bytes will applied on the first frame
    *  in read_frame().
    */
@@ -948,6 +960,8 @@ void open_input_file(struct VpxInputContext *input) {
       input->file_type = FILE_TYPE_Y4M;
       input->width = input->y4m.pic_w;
       input->height = input->y4m.pic_h;
+      input->pixel_aspect_ratio.numerator = input->y4m.par_n;
+      input->pixel_aspect_ratio.denominator = input->y4m.par_d;
       input->framerate.numerator = input->y4m.fps_n;
       input->framerate.denominator = input->y4m.fps_d;
       input->fmt = input->y4m.vpx_fmt;
@@ -1381,7 +1395,8 @@ static void show_stream_config(struct stream_state *stream,
 
 
 static void open_output_file(struct stream_state *stream,
-                             struct VpxEncoderConfig *global) {
+                             struct VpxEncoderConfig *global,
+                             const struct VpxRational *pixel_aspect_ratio) {
   const char *fn = stream->config.out_fn;
   const struct vpx_codec_enc_cfg *const cfg = &stream->config.cfg;
 
@@ -1402,7 +1417,8 @@ static void open_output_file(struct stream_state *stream,
     write_webm_file_header(&stream->ebml, cfg,
                            &global->framerate,
                            stream->config.stereo_fmt,
-                           global->codec->fourcc);
+                           global->codec->fourcc,
+                           pixel_aspect_ratio);
   }
 #endif
 
@@ -2035,7 +2051,8 @@ int main(int argc, const char **argv_) {
     }
 
     FOREACH_STREAM(setup_pass(stream, &global, pass));
-    FOREACH_STREAM(open_output_file(stream, &global));
+    FOREACH_STREAM(open_output_file(stream, &global,
+                                    &input.pixel_aspect_ratio));
     FOREACH_STREAM(initialize_encoder(stream, &global));
 
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/webmenc.cc b/chromium/third_party/libvpx/source/libvpx/webmenc.cc
index a0e542b17cf..8212ee36cf6 100644
--- a/chromium/third_party/libvpx/source/libvpx/webmenc.cc
+++ b/chromium/third_party/libvpx/source/libvpx/webmenc.cc
@@ -24,7 +24,8 @@ void write_webm_file_header(struct EbmlGlobal *glob,
                             const vpx_codec_enc_cfg_t *cfg,
                             const struct vpx_rational *fps,
                             stereo_format_t stereo_fmt,
-                            unsigned int fourcc) {
+                            unsigned int fourcc,
+                            const struct VpxRational *par) {
   mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream);
   mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
   segment->Init(writer);
@@ -49,6 +50,15 @@ void write_webm_file_header(struct EbmlGlobal *glob,
           segment->GetTrackByNumber(video_track_id));
   video_track->SetStereoMode(stereo_fmt);
   video_track->set_codec_id(fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
+  if (par->numerator > 1 || par->denominator > 1) {
+    // TODO(fgalligan): Add support of DisplayUnit, Display Aspect Ratio type
+    // to WebM format.
+    const uint64_t display_width =
+        static_cast<uint64_t>(((cfg->g_w * par->numerator * 1.0) /
+                               par->denominator) + .5);
+    video_track->set_display_width(display_width);
+    video_track->set_display_height(cfg->g_h);
+  }
   if (glob->debug) {
     video_track->set_uid(kDebugTrackUid);
   }
diff --git a/chromium/third_party/libvpx/source/libvpx/webmenc.h b/chromium/third_party/libvpx/source/libvpx/webmenc.h
index 0ac606be483..c255d3de669 100644
--- a/chromium/third_party/libvpx/source/libvpx/webmenc.h
+++ b/chromium/third_party/libvpx/source/libvpx/webmenc.h
@@ -42,7 +42,8 @@ void write_webm_file_header(struct EbmlGlobal *glob,
                             const vpx_codec_enc_cfg_t *cfg,
                             const struct vpx_rational *fps,
                             stereo_format_t stereo_fmt,
-                            unsigned int fourcc);
+                            unsigned int fourcc,
+                            const struct VpxRational *par);
 
 void write_webm_block(struct EbmlGlobal *glob,
                       const vpx_codec_enc_cfg_t *cfg,
author	Allan Sandfeld Jensen <allan.jensen@theqtcompany.com>	2015-08-14 11:38:45 +0200
committer	Allan Sandfeld Jensen <allan.jensen@theqtcompany.com>	2015-08-14 17:16:47 +0000
commit	3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859 (patch)
tree	43cc572ba067417c7341db81f71ae7cc6e0fcc3e /chromium/third_party/libvpx
parent	f61ab1ac7f855cd281809255c0aedbb1895e1823 (diff)
download	qtwebengine-chromium-3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859.tar.gz