17 files changed, 324 insertions, 249 deletions
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/breakpad/dump_reader_multipart.py b/chromium/third_party/blink/tools/blinkpy/web_tests/breakpad/dump_reader_multipart.py
index e83025d3383..ee47da7635d 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/breakpad/dump_reader_multipart.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/breakpad/dump_reader_multipart.py
@@ -175,7 +175,7 @@ class DumpReaderLinux(DumpReaderMultipart):
     """Linux breakpad dump reader."""
 
     def _binaries_to_symbolize(self):
-        return ['content_shell', 'libosmesa.so']
+        return ['content_shell']
 
     def _file_extension(self):
         return 'dmp'
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder.py b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder.py
index 165d2595aa4..df0a9bbab63 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder.py
@@ -196,14 +196,20 @@ class LayoutTestFinder(object):
 
     @staticmethod
     def _split_into_chunks(test_names, index, count):
-        chunk_size = int(math.ceil(len(test_names) * 1.0 / count))
-
-        chunk_start = index * chunk_size
-        chunk_end = (index + 1) * chunk_size
-
-        tests_to_run = test_names[chunk_start:chunk_end]
-        other_tests = test_names[:chunk_start] + test_names[chunk_end:]
-
-        _log.debug('chunk slice [%d:%d] of %d is %d tests', chunk_start, chunk_end, len(test_names), len(tests_to_run))
+        tests_and_indices = [
+            (test_name, hash(test_name) % count)
+            for test_name in test_names]
+
+        tests_to_run = [
+            test_name
+            for test_name, test_index in tests_and_indices
+            if test_index == index]
+        other_tests = [
+            test_name
+            for test_name, test_index in tests_and_indices
+            if test_index != index]
+
+        _log.debug('chunk %d of %d contains %d tests of %d',
+                   index, count, len(tests_to_run), len(test_names))
 
         return tests_to_run, other_tests
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder_unittest.py b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder_unittest.py
index a9a19ac5b7a..a3474ac48e5 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder_unittest.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_finder_unittest.py
@@ -2,11 +2,19 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+import os
+import sys
 import unittest
 
+from blinkpy.common import path_finder
 from blinkpy.common.host_mock import MockHost
 from blinkpy.web_tests.controllers import layout_test_finder
 
+_MOCK_ROOT = os.path.join(
+    path_finder.get_chromium_src_dir(), 'third_party', 'pymock')
+sys.path.append(_MOCK_ROOT)
+import mock
+
 
 class LayoutTestFinderTests(unittest.TestCase):
 
@@ -89,32 +97,34 @@ class LayoutTestFinderTests(unittest.TestCase):
     def test_split_chunks(self):
         split = layout_test_finder.LayoutTestFinder._split_into_chunks  # pylint: disable=protected-access
 
-        tests = [1, 2, 3, 4]
-        self.assertEqual(([1, 2, 3, 4], []), split(tests, 0, 1))
+        with mock.patch('__builtin__.hash', int):
+
+          tests = [1, 2, 3, 4]
+          self.assertEqual(([1, 2, 3, 4], []), split(tests, 0, 1))
 
-        self.assertEqual(([1, 2], [3, 4]), split(tests, 0, 2))
-        self.assertEqual(([3, 4], [1, 2]), split(tests, 1, 2))
+          self.assertEqual(([2, 4], [1, 3]), split(tests, 0, 2))
+          self.assertEqual(([1, 3], [2, 4]), split(tests, 1, 2))
 
-        self.assertEqual(([1, 2], [3, 4]), split(tests, 0, 3))
-        self.assertEqual(([3, 4], [1, 2]), split(tests, 1, 3))
-        self.assertEqual(([], [1, 2, 3, 4]), split(tests, 2, 3))
+          self.assertEqual(([3], [1, 2, 4]), split(tests, 0, 3))
+          self.assertEqual(([1, 4], [2, 3]), split(tests, 1, 3))
+          self.assertEqual(([2], [1, 3, 4]), split(tests, 2, 3))
 
-        tests = [1, 2, 3, 4, 5]
-        self.assertEqual(([1, 2, 3, 4, 5], []), split(tests, 0, 1))
+          tests = [1, 2, 3, 4, 5]
+          self.assertEqual(([1, 2, 3, 4, 5], []), split(tests, 0, 1))
 
-        self.assertEqual(([1, 2, 3], [4, 5]), split(tests, 0, 2))
-        self.assertEqual(([4, 5], [1, 2, 3]), split(tests, 1, 2))
+          self.assertEqual(([2, 4], [1, 3, 5]), split(tests, 0, 2))
+          self.assertEqual(([1, 3, 5], [2, 4]), split(tests, 1, 2))
 
-        self.assertEqual(([1, 2], [3, 4, 5]), split(tests, 0, 3))
-        self.assertEqual(([3, 4], [1, 2, 5]), split(tests, 1, 3))
-        self.assertEqual(([5], [1, 2, 3, 4]), split(tests, 2, 3))
+          self.assertEqual(([3], [1, 2, 4, 5]), split(tests, 0, 3))
+          self.assertEqual(([1, 4], [2, 3, 5]), split(tests, 1, 3))
+          self.assertEqual(([2, 5], [1, 3, 4]), split(tests, 2, 3))
 
-        tests = [1, 2, 3, 4, 5, 6]
-        self.assertEqual(([1, 2, 3, 4, 5, 6], []), split(tests, 0, 1))
+          tests = [1, 2, 3, 4, 5, 6]
+          self.assertEqual(([1, 2, 3, 4, 5, 6], []), split(tests, 0, 1))
 
-        self.assertEqual(([1, 2, 3], [4, 5, 6]), split(tests, 0, 2))
-        self.assertEqual(([4, 5, 6], [1, 2, 3]), split(tests, 1, 2))
+          self.assertEqual(([2, 4, 6], [1, 3, 5]), split(tests, 0, 2))
+          self.assertEqual(([1, 3, 5], [2, 4, 6]), split(tests, 1, 2))
 
-        self.assertEqual(([1, 2], [3, 4, 5, 6]), split(tests, 0, 3))
-        self.assertEqual(([3, 4], [1, 2, 5, 6]), split(tests, 1, 3))
-        self.assertEqual(([5, 6], [1, 2, 3, 4]), split(tests, 2, 3))
+          self.assertEqual(([3, 6], [1, 2, 4, 5]), split(tests, 0, 3))
+          self.assertEqual(([1, 4], [2, 3, 5, 6]), split(tests, 1, 3))
+          self.assertEqual(([2, 5], [1, 3, 4, 6]), split(tests, 2, 3))
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py
index c924d263f1d..1fdd62f649d 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/layout_test_runner.py
@@ -26,6 +26,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import collections
+import itertools
 import logging
 import math
 import time
@@ -96,6 +98,9 @@ class LayoutTestRunner(object):
             self._options.fully_parallel,
             self._options.batch_size == 1)
 
+        self._reorder_tests_by_args(locked_shards)
+        self._reorder_tests_by_args(unlocked_shards)
+
         # We don't have a good way to coordinate the workers so that they don't
         # try to run the shards that need a lock. The easiest solution is to
         # run all of the locked shards first.
@@ -135,6 +140,17 @@ class LayoutTestRunner(object):
 
         return test_run_results
 
+    def _reorder_tests_by_args(self, shards):
+        reordered_shards = []
+        for shard in shards:
+            tests_by_args = collections.OrderedDict()
+            for test_input in shard.test_inputs:
+                args = tuple(self._port.args_for_test(test_input.test_name))
+                if args not in tests_by_args:
+                    tests_by_args[args] = []
+                tests_by_args[args].append(test_input)
+            shard.test_inputs = list(itertools.chain(*tests_by_args.values()))
+
     def _worker_factory(self, worker_connection):
         results_directory = self._results_directory
         if self._retry_attempt > 0:
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py
index 16e184f770d..9c72d6a15c4 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py
@@ -111,15 +111,13 @@ class Manager(object):
             # This is raised if --test-list doesn't exist
             return test_run_results.RunDetails(exit_code=exit_codes.NO_TESTS_EXIT_STATUS)
 
-        # Create a sorted list of test files so the subset chunk,
-        # if used, contains alphabetically consecutive tests.
+        test_names, tests_in_other_chunks = self._finder.split_into_chunks(all_test_names)
+
         if self._options.order == 'natural':
-            all_test_names.sort(key=self._port.test_key)
+            test_names.sort(key=self._port.test_key)
         elif self._options.order == 'random':
-            all_test_names.sort()
-            random.Random(self._options.seed).shuffle(all_test_names)
-
-        test_names, tests_in_other_chunks = self._finder.split_into_chunks(all_test_names)
+            test_names.sort()
+            random.Random(self._options.seed).shuffle(test_names)
 
         self._printer.write_update('Parsing expectations ...')
         self._expectations = test_expectations.TestExpectations(self._port, test_names)
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py
index aa2b3d32d13..9a1862cb5bd 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/controllers/single_test_runner.py
@@ -113,18 +113,8 @@ class SingleTestRunner(object):
         if self._should_fetch_expected_checksum():
             image_hash = self._port.expected_checksum(self._test_name)
 
-        test_base = self._port.lookup_virtual_test_base(self._test_name)
-        if test_base:
-            # If the file actually exists under the virtual dir, we want to use it (largely for virtual references),
-            # but we want to use the extra command line args either way.
-            if self._filesystem.exists(self._port.abspath_for_test(self._test_name)):
-                test_name = self._test_name
-            else:
-                test_name = test_base
-            args = self._port.lookup_virtual_test_args(self._test_name)
-        else:
-            test_name = self._test_name
-            args = self._port.lookup_physical_test_args(self._test_name)
+        args = self._port.args_for_test(self._test_name)
+        test_name = self._port.name_for_test(self._test_name)
         return DriverInput(test_name, self._timeout_ms, image_hash, self._should_run_pixel_test, args)
 
     def run(self):
@@ -189,19 +179,36 @@ class SingleTestRunner(object):
         if (test_failures.has_failure_type(test_failures.FailureTimeout, failures) or
                 test_failures.has_failure_type(test_failures.FailureCrash, failures)):
             return
+        # We usually don't want to create a new baseline if there isn't one
+        # existing (which usually means this baseline isn't necessary, e.g.
+        # an image-first test without text expectation files). However, in the
+        # following cases, we do:
+        # 1. The failure is MISSING; a baseline is apparently needed.
+        # 2. A testharness.js test fails assertions: testharness.js tests
+        #    without baselines are implicitly expected to pass all assertions;
+        #    if there are failed assertions we need to create a new baseline.
+        #    Note that the created baseline might be redundant, but users can
+        #    optimize them later with optimize-baselines.
         self._save_baseline_data(driver_output.text, '.txt',
-                                 test_failures.has_failure_type(test_failures.FailureMissingResult, failures))
+                                 test_failures.has_failure_type(test_failures.FailureMissingResult, failures) or
+                                 test_failures.has_failure_type(test_failures.FailureTestHarnessAssertion, failures))
         self._save_baseline_data(driver_output.audio, '.wav',
                                  test_failures.has_failure_type(test_failures.FailureMissingAudio, failures))
         self._save_baseline_data(driver_output.image, '.png',
                                  test_failures.has_failure_type(test_failures.FailureMissingImage, failures))
 
-    def _save_baseline_data(self, data, extension, is_missing):
+    def _save_baseline_data(self, data, extension, force_create_new_baseline):
         if data is None:
             return
+
         port = self._port
         fs = self._filesystem
 
+        # Do not create a new baseline unless we are specifically told so.
+        current_expected_path = port.expected_filename(self._test_name, extension)
+        if not fs.exists(current_expected_path) and not force_create_new_baseline:
+            return
+
         flag_specific_dir = port.baseline_flag_specific_dir()
         if flag_specific_dir:
             output_dir = fs.join(flag_specific_dir, fs.dirname(self._test_name))
@@ -222,11 +229,9 @@ class SingleTestRunner(object):
             _log.info('Removing the current baseline "%s"', port.relative_test_filename(output_path))
             fs.remove(output_path)
 
+        # Note that current_expected_path may change because of the above file removal.
         current_expected_path = port.expected_filename(self._test_name, extension)
-        if not fs.exists(current_expected_path):
-            if not is_missing or not self._options.reset_results:
-                return
-        elif fs.sha1(current_expected_path) == hashlib.sha1(data).hexdigest():
+        if fs.exists(current_expected_path) and fs.sha1(current_expected_path) == hashlib.sha1(data).hexdigest():
             if self._options.reset_results:
                 _log.info('Not writing new expected result "%s" because it is the same as the current expected result',
                           port.relative_test_filename(output_path))
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py b/chromium/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
index f7ce07b7233..81d036eb38b 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
@@ -218,7 +218,7 @@ class BotTestExpectations(object):
             # Distinct resulting expectations.
             result_exp = map(string_to_exp, result_strings)
 
-            expected = lambda e: TestExpectations.result_was_expected(e, expectations, False)
+            expected = lambda e: TestExpectations.result_was_expected(e, expectations)
 
             additional_expectations = set(e for e in result_exp if not expected(e))
 
@@ -328,7 +328,7 @@ class BotTestExpectations(object):
                 # individual runs' full_results.json, which would be slow and more complicated.
                 # The only thing we lose by not fixing this is that a test that was flaky
                 # and got fixed will still get printed out until 100 runs have passed.
-                if not TestExpectations.result_was_expected(result_enum, latest_expectations, test_needs_rebaselining=False):
+                if not TestExpectations.result_was_expected(result_enum, latest_expectations):
                     has_unexpected_results = True
                     break
 
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
index ee549c340d0..70f1267627b 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
@@ -44,7 +44,7 @@ _log = logging.getLogger(__name__)
 # FIXME: range() starts with 0 which makes if expectation checks harder
 # as PASS is 0.
 (PASS, FAIL, TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO, TIMEOUT, CRASH, LEAK, SKIP, WONTFIX,
- SLOW, REBASELINE, NEEDS_REBASELINE_UNUSED, NEEDS_MANUAL_REBASELINE, MISSING, FLAKY, NOW, NONE) = range(19)
+ SLOW, MISSING, FLAKY, NOW, NONE) = range(16)
 
 WEBKIT_BUG_PREFIX = 'webkit.org/b/'
 CHROMIUM_BUG_PREFIX = 'crbug.com/'
@@ -73,7 +73,6 @@ class TestExpectationParser(object):
 
     # FIXME: Rename these to *_KEYWORD as in MISSING_KEYWORD above, but make
     # the case studdly-caps to match the actual file contents.
-    REBASELINE_MODIFIER = 'rebaseline'
     PASS_EXPECTATION = 'pass'
     SKIP_MODIFIER = 'skip'
     SLOW_MODIFIER = 'slow'
@@ -160,12 +159,7 @@ class TestExpectationParser(object):
         expectations = [expectation.lower() for expectation in expectation_line.expectations]
         if not expectation_line.bugs and self.WONTFIX_MODIFIER not in expectations:
             expectation_line.warnings.append(self.MISSING_BUG_WARNING)
-        if self.REBASELINE_MODIFIER in expectations:
-            expectation_line.warnings.append('REBASELINE should only be used for running rebaseline.py. Cannot be checked in.')
-
         specifiers = [specifier.lower() for specifier in expectation_line.specifiers]
-        if self.REBASELINE_MODIFIER in expectations and ('debug' in specifiers or 'release' in specifiers):
-            expectation_line.warnings.append('A test cannot be rebaselined for Debug/Release.')
 
     def _parse_expectations(self, expectation_line):
         result = set()
@@ -296,7 +290,6 @@ class TestExpectationLine(object):
         'Failure': 'FAIL',
         MISSING_KEYWORD: 'MISSING',
         'Pass': 'PASS',
-        'Rebaseline': 'REBASELINE',
         'Skip': 'SKIP',
         'Slow': 'SLOW',
         'Timeout': 'TIMEOUT',
@@ -714,7 +707,7 @@ class TestExpectationsModel(object):
         return ' '.join(retval)
 
     def remove_expectation_line(self, test):
-        if not self.has_test(test.name):
+        if not self.has_test(test):
             return
         self._clear_expectations_for_test(test)
         del self._test_to_expectation_line[test]
@@ -904,7 +897,6 @@ class TestExpectations(object):
         TestExpectationParser.SKIP_MODIFIER: SKIP,
         TestExpectationParser.WONTFIX_MODIFIER: WONTFIX,
         TestExpectationParser.SLOW_MODIFIER: SLOW,
-        TestExpectationParser.REBASELINE_MODIFIER: REBASELINE,
     }
 
     EXPECTATIONS_TO_STRING = {k: v.upper() for (v, k) in EXPECTATIONS.iteritems()}
@@ -924,8 +916,6 @@ class TestExpectations(object):
         MISSING: 'missing results',
     }
 
-    NON_TEST_OUTCOME_EXPECTATIONS = (REBASELINE, SKIP, SLOW, WONTFIX)
-
     BUILD_TYPES = ('debug', 'release')
 
     TIMELINES = {
@@ -955,25 +945,26 @@ class TestExpectations(object):
             raise ValueError(expectation)
 
     @staticmethod
-    def result_was_expected(result, expected_results, test_needs_rebaselining):
+    def result_was_expected(result, expected_results):
         """Returns whether we got a result we were expecting.
         Args:
             result: actual result of a test execution
             expected_results: set of results listed in test_expectations
-            test_needs_rebaselining: whether test was marked as REBASELINE
         """
-        if not set(expected_results) - set(TestExpectations.NON_TEST_OUTCOME_EXPECTATIONS):
-            expected_results = set([PASS])
-
-        if result in expected_results:
-            return True
-        if result in (PASS, TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO, MISSING) and NEEDS_MANUAL_REBASELINE in expected_results:
+        local_expected = set(expected_results)
+        if WONTFIX in local_expected:
+            # WontFix should be treated as if we expected a Skip.
+            local_expected.add(SKIP)
+
+        # Make sure we have at least one result type that may actually happen.
+        local_expected.discard(WONTFIX)
+        local_expected.discard(SLOW)
+        if not local_expected:
+            local_expected = {PASS}
+
+        if result in local_expected:
             return True
-        if result in (TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO) and FAIL in expected_results:
-            return True
-        if result == MISSING and test_needs_rebaselining:
-            return True
-        if result == SKIP:
+        if result in (TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO) and FAIL in local_expected:
             return True
         return False
 
@@ -1085,9 +1076,6 @@ class TestExpectations(object):
     def expectations(self):
         return self._expectations
 
-    def get_rebaselining_failures(self):
-        return self._model.get_test_set(REBASELINE)
-
     # FIXME: Change the callsites to use TestExpectationsModel and remove.
     def get_expectations(self, test):
         return self._model.get_expectations(test)
@@ -1113,10 +1101,7 @@ class TestExpectations(object):
             expected_results = self.remove_non_sanitizer_failures(expected_results)
         elif not pixel_tests_are_enabled:
             expected_results = self.remove_pixel_failures(expected_results)
-        return self.result_was_expected(result, expected_results, self.is_rebaselining(test))
-
-    def is_rebaselining(self, test):
-        return REBASELINE in self._model.get_expectations(test)
+        return self.result_was_expected(result, expected_results)
 
     def _shorten_filename(self, filename):
         finder = PathFinder(self._port.host.filesystem)
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py
index 18344a0042e..ff92d5a1cb4 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py
@@ -36,7 +36,7 @@ from blinkpy.web_tests.models.test_configuration import TestConfiguration, TestC
 from blinkpy.web_tests.models.test_expectations import (
     TestExpectationLine, TestExpectations, ParseError, TestExpectationParser,
     PASS, FAIL, TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO,
-    TIMEOUT, CRASH, LEAK, SKIP, WONTFIX, NEEDS_MANUAL_REBASELINE, MISSING
+    TIMEOUT, CRASH, LEAK, SKIP, WONTFIX, MISSING
 )
 
 
@@ -121,35 +121,15 @@ class MiscTests(Base):
 
     def test_result_was_expected(self):
         # test basics
-        self.assertEqual(TestExpectations.result_was_expected(PASS, set([PASS]), test_needs_rebaselining=False), True)
-        self.assertEqual(TestExpectations.result_was_expected(FAIL, set([PASS]), test_needs_rebaselining=False), False)
+        self.assertEqual(TestExpectations.result_was_expected(PASS, set([PASS])), True)
+        self.assertEqual(TestExpectations.result_was_expected(FAIL, set([PASS])), False)
 
         # test handling of SKIPped tests and results
-        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH]), test_needs_rebaselining=False), True)
-        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK]), test_needs_rebaselining=False), True)
-
-        # test handling of MISSING results and the REBASELINE specifier
-        self.assertEqual(TestExpectations.result_was_expected(MISSING, set([PASS]), test_needs_rebaselining=True), True)
-        self.assertEqual(TestExpectations.result_was_expected(MISSING, set([PASS]), test_needs_rebaselining=False), False)
-
-        self.assertTrue(TestExpectations.result_was_expected(
-            PASS, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertTrue(TestExpectations.result_was_expected(
-            MISSING, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertTrue(TestExpectations.result_was_expected(
-            TEXT, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertTrue(TestExpectations.result_was_expected(
-            IMAGE, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertTrue(TestExpectations.result_was_expected(
-            IMAGE_PLUS_TEXT, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertTrue(TestExpectations.result_was_expected(
-            AUDIO, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertFalse(TestExpectations.result_was_expected(
-            TIMEOUT, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertFalse(TestExpectations.result_was_expected(
-            CRASH, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
-        self.assertFalse(TestExpectations.result_was_expected(
-            LEAK, set([NEEDS_MANUAL_REBASELINE]), test_needs_rebaselining=False))
+        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH])), False)
+        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK])), False)
+
+        # test handling of MISSING results
+        self.assertEqual(TestExpectations.result_was_expected(MISSING, set([PASS])), False)
 
     def test_remove_pixel_failures(self):
         self.assertEqual(TestExpectations.remove_pixel_failures(set([FAIL])), set([FAIL]))
@@ -776,17 +756,6 @@ Bug(y) [ Mac ] failures/expected/foo.html [ Crash ]
 """, actual_expectations)
 
 
-class RebaseliningTest(Base):
-
-    def test_get_rebaselining_failures(self):
-        # Make sure we find a test as needing a rebaseline even if it is not marked as a failure.
-        self.parse_exp('Bug(x) failures/expected/text.html [ Rebaseline ]\n')
-        self.assertEqual(len(self._exp.get_rebaselining_failures()), 1)
-
-        self.parse_exp(self.get_basic_expectations())
-        self.assertEqual(len(self._exp.get_rebaselining_failures()), 0)
-
-
 class TestExpectationsParserTests(unittest.TestCase):
 
     def __init__(self, testFunc):
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
index 3e7668df311..ad6448ded52 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
@@ -245,16 +245,28 @@ def summarize_results(port_obj, expectations, initial_results,
                     has_unexpected_pass = True
             else:
                 has_expected = True
-        # A test is flaky if it has both expected and unexpected runs (NOT pass
-        # and failure).
+
+        # TODO(crbug.com/855255): This code calls a test flaky if it has both
+        # expected and unexpected runs (NOT pass and failure); this is generally
+        # wrong (really it should just be if there are multiple kinds of results),
+        # but this works in the normal case because a test will only be retried
+        # if a result is unexpected, and if you get an expected result on the
+        # retry, then you did get multiple results. This fails if you get
+        # one kind of unexpected failure initially and another kind of
+        # unexpected failure on the retry (e.g., TIMEOUT CRASH), or if you
+        # explicitly run a test multiple times and get multiple expected results.
         is_flaky = has_expected and has_unexpected
 
-        if len(set(actual)) == 1:
-            actual = [actual[0]]
-            actual_types = [actual_types[0]]
+        test_dict = {}
+        test_dict['expected'] = expected
+        test_dict['actual'] = ' '.join(actual)
+
+        # Fields below are optional. To avoid bloating the output results json
+        # too much, only add them when they are True or non-empty.
 
         if is_flaky:
             num_flaky += 1
+            test_dict['is_flaky'] = True
         elif all_pass or has_unexpected_pass:
             # We count two situations as a "pass":
             # 1. All test runs pass (which is obviously non-flaky, but does not
@@ -268,19 +280,10 @@ def summarize_results(port_obj, expectations, initial_results,
             num_passes += 1
             if not has_stderr and only_include_failing:
                 continue
-        elif has_unexpected and result.type != test_expectations.SKIP:
+        elif has_unexpected:
             # Either no retries or all retries failed unexpectedly.
-            # TODO(robertma): When will there be unexpected skip? Do we really
-            # want to ignore them when counting regressions?
             num_regressions += 1
 
-        test_dict = {}
-
-        test_dict['expected'] = expected
-        test_dict['actual'] = ' '.join(actual)
-
-        # Fields below are optional. To avoid bloating the output results json
-        # too much, only add them when they are True or non-empty.
 
         rounded_run_time = round(initial_result.test_run_time, 1)
         if rounded_run_time:
@@ -318,11 +321,15 @@ def summarize_results(port_obj, expectations, initial_results,
                                                            port_obj.get_option('pixel_tests') or initial_result.reftest_type,
                                                            port_obj.get_option('enable_sanitizer'))
 
-        # Note: is_unexpected is intended to capture the *last* result. In the
-        # normal use case (stop retrying failures once they pass), this is
-        # equivalent to checking if none of the results is expected.
-        if not any(is_expected(actual_result) for actual_result in actual_types):
+        # Note: is_unexpected and is_regression are intended to reflect the
+        # *last* result. In the normal use case (stop retrying failures
+        # once they pass), this is equivalent to saying that all of the
+        # results were unexpected failures.
+        last_result = actual_types[-1]
+        if not is_expected(last_result):
             test_dict['is_unexpected'] = True
+            if last_result != test_expectations.PASS:
+                test_dict['is_regression'] = True
 
         if initial_result.has_repaint_overlay:
             test_dict['has_repaint_overlay'] = True
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
index b56872d7ae3..5c587d78f62 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
@@ -83,7 +83,7 @@ def summarized_results(port, expected, passing, flaky, only_include_failing=Fals
     elif passing:
         skipped_result = get_result('passes/skipped/skip.html')
         skipped_result.type = test_expectations.SKIP
-        initial_results.add(skipped_result, expected, test_is_slow)
+        initial_results.add(skipped_result, True, test_is_slow)
 
         initial_results.add(get_result('passes/text.html', run_time=1), expected, test_is_slow)
         initial_results.add(get_result('failures/expected/audio.html'), expected, test_is_slow)
@@ -205,7 +205,6 @@ class SummarizedResultsTest(unittest.TestCase):
                 'TEXT': 1,
                 'IMAGE': 1,
                 'PASS': 0,
-                'REBASELINE': 0,
                 'SKIP': 0,
                 'SLOW': 0,
                 'TIMEOUT': 3,
@@ -225,7 +224,6 @@ class SummarizedResultsTest(unittest.TestCase):
                 'TEXT': 0,
                 'IMAGE': 0,
                 'PASS': 1,
-                'REBASELINE': 0,
                 'SKIP': 0,
                 'SLOW': 0,
                 'TIMEOUT': 1,
@@ -245,7 +243,6 @@ class SummarizedResultsTest(unittest.TestCase):
                 'TEXT': 0,
                 'IMAGE': 0,
                 'PASS': 5,
-                'REBASELINE': 0,
                 'SKIP': 1,
                 'SLOW': 0,
                 'TIMEOUT': 0,
@@ -285,7 +282,6 @@ class SummarizedResultsTest(unittest.TestCase):
         self.port._options.builder_name = 'dummy builder'
         summary = summarized_results(self.port, expected=False, passing=True, flaky=False)
         self.assertTrue(summary['tests']['passes']['text.html'])
-        self.assertTrue('is_unexpected' not in summary['tests']['passes']['text.html'])
         self.assertEqual(summary['num_passes'], 5)
         self.assertEqual(summary['num_regressions'], 0)
         self.assertEqual(summary['num_flaky'], 0)
@@ -347,7 +343,6 @@ class SummarizedResultsTest(unittest.TestCase):
     def test_summarized_results_flaky(self):
         summary = summarized_results(self.port, expected=False, passing=False, flaky=True)
 
-        self.assertTrue('is_unexpected' not in summary['tests']['failures']['expected']['crash.html'])
         self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
         self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT AUDIO CRASH LEAK')
 
@@ -425,15 +420,15 @@ class SummarizedResultsTest(unittest.TestCase):
 
         self.assertTrue(summary['tests']['passes']['text.html']['is_unexpected'])
         self.assertEquals(summary['tests']['passes']['text.html']['expected'], 'PASS')
-        self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT')
+        self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
 
         self.assertTrue(summary['tests']['failures']['expected']['crash.html']['is_unexpected'])
         self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
-        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT')
+        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
 
         self.assertTrue(summary['tests']['failures']['expected']['leak.html']['is_unexpected'])
         self.assertEquals(summary['tests']['failures']['expected']['leak.html']['expected'], 'LEAK')
-        self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT')
+        self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
 
         self.assertTrue(summary['tests']['failures']['expected']['audio.html']['is_unexpected'])
         self.assertEquals(summary['tests']['failures']['expected']['audio.html']['expected'], 'FAIL')
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/port/android.py b/chromium/third_party/blink/tools/blinkpy/web_tests/port/android.py
index a94f8e13956..b388554b41f 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/port/android.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/port/android.py
@@ -432,7 +432,7 @@ class AndroidPort(base.Port):
                     device_errors.CommandTimeoutError,
                     device_errors.DeviceUnreachableError) as error:
                 with lock:
-                    _log.warning('[%s] failed to prepare_device: %s', serial, error)
+                    _log.warning('[%s] failed to prepare_device: %s', device.serial, error)
 
         devices = self._devices.usable_devices(self.host.executive)
         device_utils.DeviceUtils.parallel(devices).pMap(setup_device)
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/port/base.py b/chromium/third_party/blink/tools/blinkpy/web_tests/port/base.py
index 09312ae5ff2..1e16870e803 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/port/base.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/port/base.py
@@ -1067,6 +1067,20 @@ class Port(object):
         """
         return self._filesystem.join(self.layout_tests_dir(), test_name)
 
+    @memoized
+    def args_for_test(self, test_name):
+        test_base = self.lookup_virtual_test_base(test_name)
+        if test_base:
+            return self.lookup_virtual_test_args(test_name)
+        return self.lookup_physical_test_args(test_name)
+
+    @memoized
+    def name_for_test(self, test_name):
+        test_base = self.lookup_virtual_test_base(test_name)
+        if test_base and not self._filesystem.exists(self.abspath_for_test(test_name)):
+            return test_base
+        return test_name
+
     def results_directory(self):
         """Returns the absolute path to the place to store the test results."""
         if not self._results_directory:
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py b/chromium/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
index 78f6c76e967..82220141a1e 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
@@ -29,11 +29,14 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import json
+import os
 import re
 import StringIO
+import sys
 import unittest
 
 from blinkpy.common import exit_codes
+from blinkpy.common import path_finder
 from blinkpy.common.host import Host
 from blinkpy.common.host_mock import MockHost
 from blinkpy.common.system.path import abspath_to_uri
@@ -44,6 +47,11 @@ from blinkpy.web_tests.models import test_expectations
 from blinkpy.web_tests.models import test_failures
 from blinkpy.web_tests.port import test
 
+_MOCK_ROOT = os.path.join(
+    path_finder.get_chromium_src_dir(), 'third_party', 'pymock')
+sys.path.append(_MOCK_ROOT)
+import mock
+
 
 def parse_args(extra_args=None, tests_included=False):
     extra_args = extra_args or []
@@ -521,25 +529,31 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
     def test_sharding_even(self):
         # Test that we actually select the right part
         tests_to_run = ['passes/error.html', 'passes/image.html', 'passes/platform_image.html', 'passes/text.html']
-        # Shard 0 of 2
-        tests_run = get_tests_run(['--shard-index', '0', '--total-shards', '2', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/error.html', 'passes/image.html'])
-        # Shard 1 of 2
-        tests_run = get_tests_run(['--shard-index', '1', '--total-shards', '2', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/platform_image.html', 'passes/text.html'])
+
+        with mock.patch('__builtin__.hash', len):
+
+          # Shard 0 of 2
+          tests_run = get_tests_run(['--shard-index', '0', '--total-shards', '2', '--order', 'natural'] + tests_to_run)
+          self.assertEqual(tests_run, ['passes/platform_image.html', 'passes/text.html'])
+          # Shard 1 of 2
+          tests_run = get_tests_run(['--shard-index', '1', '--total-shards', '2', '--order', 'natural'] + tests_to_run)
+          self.assertEqual(tests_run, ['passes/error.html', 'passes/image.html'])
 
     def test_sharding_uneven(self):
         tests_to_run = ['passes/error.html', 'passes/image.html', 'passes/platform_image.html', 'passes/text.html',
                         'perf/foo/test.html']
-        # Shard 0 of 3
-        tests_run = get_tests_run(['--shard-index', '0', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/error.html', 'passes/image.html'])
-        # Shard 1 of 3
-        tests_run = get_tests_run(['--shard-index', '1', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/platform_image.html', 'passes/text.html'])
-        # Shard 2 of 3
-        tests_run = get_tests_run(['--shard-index', '2', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['perf/foo/test.html'])
+
+        with mock.patch('__builtin__.hash', len):
+
+          # Shard 0 of 3
+          tests_run = get_tests_run(['--shard-index', '0', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
+          self.assertEqual(tests_run, ['perf/foo/test.html'])
+          # Shard 1 of 3
+          tests_run = get_tests_run(['--shard-index', '1', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
+          self.assertEqual(tests_run, ['passes/text.html'])
+          # Shard 2 of 3
+          tests_run = get_tests_run(['--shard-index', '2', '--total-shards', '3', '--order', 'natural'] + tests_to_run)
+          self.assertEqual(tests_run, ['passes/error.html', 'passes/image.html', 'passes/platform_image.html'])
 
     def test_sharding_incorrect_arguments(self):
         with self.assertRaises(ValueError):
@@ -553,15 +567,17 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
         tests_to_run = ['passes/error.html', 'passes/image.html', 'passes/platform_image.html', 'passes/text.html']
         host = MockHost()
 
-        host.environ['GTEST_SHARD_INDEX'] = '0'
-        host.environ['GTEST_TOTAL_SHARDS'] = '2'
-        shard_0_tests_run = get_tests_run(['--order', 'natural'] + tests_to_run, host=host)
-        self.assertEqual(shard_0_tests_run, ['passes/error.html', 'passes/image.html'])
+        with mock.patch('__builtin__.hash', len):
+
+          host.environ['GTEST_SHARD_INDEX'] = '0'
+          host.environ['GTEST_TOTAL_SHARDS'] = '2'
+          shard_0_tests_run = get_tests_run(['--order', 'natural'] + tests_to_run, host=host)
+          self.assertEqual(shard_0_tests_run, ['passes/platform_image.html', 'passes/text.html'])
 
-        host.environ['GTEST_SHARD_INDEX'] = '1'
-        host.environ['GTEST_TOTAL_SHARDS'] = '2'
-        shard_1_tests_run = get_tests_run(['--order', 'natural'] + tests_to_run, host=host)
-        self.assertEqual(shard_1_tests_run, ['passes/platform_image.html', 'passes/text.html'])
+          host.environ['GTEST_SHARD_INDEX'] = '1'
+          host.environ['GTEST_TOTAL_SHARDS'] = '2'
+          shard_1_tests_run = get_tests_run(['--order', 'natural'] + tests_to_run, host=host)
+          self.assertEqual(shard_1_tests_run, ['passes/error.html', 'passes/image.html'])
 
     def test_smoke_test(self):
         host = MockHost()
@@ -617,16 +633,27 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
                                      'failures/unexpected/text-image-checksum.html'],
                                     tests_included=True, host=host)
         self.assertEqual(details.exit_code, 2)
-        json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
-        self.assertTrue(json_string.find(
-            '"text-image-checksum.html":{'
-            '"expected":"PASS",'
-            '"text_mismatch":"general text mismatch",'
-            '"actual":"IMAGE+TEXT","is_unexpected":true') != -1)
-        self.assertTrue(json_string.find(
-            '"missing_text.html":{"expected":"PASS","is_missing_text":true,"actual":"MISSING","is_unexpected":true') != -1)
-        self.assertTrue(json_string.find('"num_regressions":2') != -1)
-        self.assertTrue(json_string.find('"num_flaky":0') != -1)
+        results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
+        self.assertEqual(
+            results['tests']['failures']['unexpected']['text-image-checksum.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'IMAGE+TEXT',
+                'is_unexpected': True,
+                'is_regression': True,
+                'text_mismatch': 'general text mismatch',
+            })
+        self.assertEqual(
+            results['tests']['failures']['unexpected']['missing_text.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'MISSING',
+                'is_unexpected': True,
+                'is_regression': True,
+                'is_missing_text': True,
+            })
+        self.assertEqual(results['num_regressions'], 2)
+        self.assertEqual(results['num_flaky'], 0)
 
     def test_different_failure_on_retry(self):
         # This tests that if a test fails two different ways -- both unexpected
@@ -659,8 +686,8 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
     def test_crash_with_stderr(self):
         host = MockHost()
         logging_run(['failures/unexpected/crash-with-stderr.html'], tests_included=True, host=host)
-        self.assertTrue(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json').find(
-            '{"crash-with-stderr.html":{"expected":"PASS","actual":"CRASH","has_stderr":true,"is_unexpected":true') != -1)
+        full_results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
+        self.assertEqual(full_results['tests']['failures']['unexpected']['crash-with-stderr.html']['has_stderr'], True)
 
     def test_no_image_failure_with_image_diff(self):
         host = MockHost()
@@ -828,11 +855,15 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
             host.filesystem.exists('/tmp/layout-test-results/retry_3/failures/unexpected/text-image-checksum-actual.png'))
         json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
         results = parse_full_results(json_string)
-        self.assertEqual(results['tests']['failures']['unexpected']['text-image-checksum.html'],
-                         {'expected': 'PASS',
-                          'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
-                          'is_unexpected': True,
-                          'text_mismatch': 'general text mismatch'})
+        self.assertEqual(
+            results['tests']['failures']['unexpected']['text-image-checksum.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
+                'is_regression': True,
+                'is_unexpected': True,
+                'text_mismatch': 'general text mismatch',
+            })
         self.assertFalse(results['pixel_tests_enabled'])
         self.assertTrue(details.enabled_pixel_tests_in_retry)
 
@@ -925,7 +956,7 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
         host = MockHost()
         logging_run(['--no-show-results', 'reftests/foo/'], tests_included=True, host=host)
         results = parse_full_results(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
-        self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING')
+        self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING MISSING MISSING MISSING')
         self.assertEqual(results['num_regressions'], 5)
         self.assertEqual(results['num_flaky'], 0)
 
@@ -1124,12 +1155,33 @@ class EndToEndTest(unittest.TestCase):
         self.assertTrue('multiple-mismatch-success.html' not in results['tests']['reftests']['foo'])
         self.assertTrue('multiple-both-success.html' not in results['tests']['reftests']['foo'])
 
-        self.assertEqual(results['tests']['reftests']['foo']['multiple-match-failure.html'],
-                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['=='], 'is_unexpected': True})
-        self.assertEqual(results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
-                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['!='], 'is_unexpected': True})
-        self.assertEqual(results['tests']['reftests']['foo']['multiple-both-failure.html'],
-                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['==', '!='], 'is_unexpected': True})
+        self.assertEqual(
+            results['tests']['reftests']['foo']['multiple-match-failure.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'IMAGE IMAGE IMAGE IMAGE',
+                'reftest_type': ['=='],
+                'is_regression': True,
+                'is_unexpected': True,
+            })
+        self.assertEqual(
+            results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'IMAGE IMAGE IMAGE IMAGE',
+                'reftest_type': ['!='],
+                'is_regression': True,
+                'is_unexpected': True,
+            })
+        self.assertEqual(
+            results['tests']['reftests']['foo']['multiple-both-failure.html'],
+            {
+                'expected': 'PASS',
+                'actual': 'IMAGE IMAGE IMAGE IMAGE',
+                'reftest_type': ['==', '!='],
+                'is_regression': True,
+                'is_unexpected': True,
+            })
 
 
 class RebaselineTest(unittest.TestCase, StreamTestingMixin):
@@ -1221,7 +1273,8 @@ class RebaselineTest(unittest.TestCase, StreamTestingMixin):
             expected_extensions=['.txt'])
 
     def test_reset_results_testharness_no_baseline(self):
-        # Tests that we don't create new result for a testharness test without baselines.
+        # Tests that we create new result for a failing testharness test without
+        # baselines, but don't create one for a passing one.
         host = MockHost()
         details, log_stream, _ = logging_run(
             [
@@ -1232,8 +1285,8 @@ class RebaselineTest(unittest.TestCase, StreamTestingMixin):
             tests_included=True, host=host)
         file_list = host.filesystem.written_files.keys()
         self.assertEqual(details.exit_code, 0)
-        self.assertEqual(len(file_list), 5)
-        self.assert_baselines(file_list, log_stream, 'failures/unexpected/testharness', [])
+        self.assertEqual(len(file_list), 6)
+        self.assert_baselines(file_list, log_stream, 'failures/unexpected/testharness', ['.txt'])
         self.assert_baselines(file_list, log_stream, 'passes/testharness', [])
 
     def test_reset_results_testharness_existing_baseline(self):
@@ -1458,6 +1511,58 @@ class RebaselineTest(unittest.TestCase, StreamTestingMixin):
             'virtual/virtual_failures/failures/unexpected/text-image-checksum',
             expected_extensions=['.png'])
 
+    def test_new_platform_baseline_with_fallback(self):
+        # Test that we update the existing baseline in the platform-specific
+        # directory if the new baseline is different, with existing fallback
+        # baseline (which should not matter).
+        host = MockHost()
+        host.filesystem.write_text_file(
+            test.LAYOUT_TEST_DIR +
+            '/platform/test-mac-mac10.10/failures/unexpected/text-image-checksum-expected.png',
+            'wrong-png-baseline')
+
+        details, log_stream, _ = logging_run(
+            [
+                '--reset-results',
+                'failures/unexpected/text-image-checksum.html'
+            ],
+            tests_included=True, host=host)
+        file_list = host.filesystem.written_files.keys()
+        self.assertEqual(details.exit_code, 0)
+        self.assertEqual(len(file_list), 7)
+        # We should reset the platform image baseline.
+        self.assert_baselines(
+            file_list, log_stream,
+            'platform/test-mac-mac10.10/failures/unexpected/text-image-checksum',
+            expected_extensions=['.png'])
+
+    def test_new_platform_baseline_without_fallback(self):
+        # Test that we update the existing baseline in the platform-specific
+        # directory if the new baseline is different, without existing fallback
+        # baseline (which should not matter).
+        host = MockHost()
+        host.filesystem.write_text_file(
+            test.LAYOUT_TEST_DIR +
+            '/platform/test-mac-mac10.10/failures/unexpected/text-image-checksum-expected.png',
+            'wrong-png-baseline')
+        host.filesystem.remove(
+            test.LAYOUT_TEST_DIR + '/failures/unexpected/text-image-checksum-expected.png')
+
+        details, log_stream, _ = logging_run(
+            [
+                '--reset-results',
+                'failures/unexpected/text-image-checksum.html'
+            ],
+            tests_included=True, host=host)
+        file_list = host.filesystem.written_files.keys()
+        self.assertEqual(details.exit_code, 0)
+        self.assertEqual(len(file_list), 8)
+        # We should reset the platform image baseline.
+        self.assert_baselines(
+            file_list, log_stream,
+            'platform/test-mac-mac10.10/failures/unexpected/text-image-checksum',
+            expected_extensions=['.png'])
+
     def test_new_virtual_baseline_optimize(self):
         # Test removing existing baselines under flag-specific directory if the
         # actual results are the same as the fallback baselines.
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/servers/apache_http.py b/chromium/third_party/blink/tools/blinkpy/web_tests/servers/apache_http.py
index 9f96f043469..991ae4ab03c 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/servers/apache_http.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/servers/apache_http.py
@@ -179,12 +179,11 @@ class ApacheHTTP(server_base.ServerBase):
         proc = self._executive.popen([self._port_obj.path_to_apache(),
                                       '-f', self._port_obj.path_to_apache_config_file(),
                                       '-c', 'PidFile "%s"' % self._pid_file,
-                                      '-k', 'stop'], stderr=self._executive.PIPE)
-        proc.wait()
+                                      '-k', 'stop'])
+        _, err = proc.communicate()
         retval = proc.returncode
-        err = proc.stderr.read()
-        if retval or len(err):
-            raise server_base.ServerError('Failed to stop %s: %s' % (self._name, err))
+        if retval or (err and len(err)):
+            raise server_base.ServerError('Failed to stop %s: %s %s' % (self._name, err))
 
         # For some reason apache isn't guaranteed to have actually stopped after
         # the stop command returns, so we wait a little while longer for the
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations.py b/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations.py
index 526f4b43f08..d40f91d168f 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations.py
@@ -230,8 +230,6 @@ class ExpectationsRemover(object):
             otherwise.
         """
         unstrippable_expectations = (
-            'NEEDSMANUALREBASELINE',
-            'REBASELINE',
             'SKIP',
             'SLOW',
         )
diff --git a/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations_unittest.py b/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations_unittest.py
index 0ab4ee3a70b..3eba7079965 100644
--- a/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations_unittest.py
+++ b/chromium/third_party/blink/tools/blinkpy/web_tests/update_expectations_unittest.py
@@ -172,8 +172,7 @@ class UpdateTestExpectationsTest(LoggingTestCase):
             # expectations are flaky so we shouldn't remove any.
             Bug(test) test/a.html [ Pass ]
             Bug(test) test/b.html [ Timeout ]
-            Bug(test) test/c.html [ Failure Timeout ]
-            Bug(test) test/d.html [ Rebaseline ]"""
+            Bug(test) test/c.html [ Failure Timeout ]"""
 
         self._expectations_remover = (
             self._create_expectations_remover(self.FLAKE_TYPE))
@@ -213,8 +212,7 @@ class UpdateTestExpectationsTest(LoggingTestCase):
             # expectations are failing so we shouldn't remove any.
             Bug(test) test/a.html [ Pass ]
             Bug(test) test/b.html [ Failure Pass ]
-            Bug(test) test/c.html [ Failure Pass Timeout ]
-            Bug(test) test/d.html [ Rebaseline ]"""
+            Bug(test) test/c.html [ Failure Pass Timeout ]"""
 
         self._expectations_remover = (
             self._create_expectations_remover(self.FAIL_TYPE))
@@ -344,36 +342,6 @@ class UpdateTestExpectationsTest(LoggingTestCase):
         self._assert_expectations_match(
             updated_expectations, test_expectations_before)
 
-    def test_dont_remove_rebaselines(self):
-        """Tests that lines with rebaseline expectations are untouched."""
-        test_expectations_before = """
-            # Even though the results show all passing, none of the
-            # expectations are flaky or failing so we shouldn't remove any.
-            Bug(test) test/a.html [ Failure Pass Rebaseline ]
-            Bug(test) test/b.html [ Failure Rebaseline ]"""
-
-        self._expectations_remover = self._create_expectations_remover()
-        self._define_builders({
-            'WebKit Linux Trusty': {
-                'port_name': 'linux-trusty',
-                'specifiers': ['Trusty', 'Release']
-            },
-        })
-        self._port.all_build_types = ('release',)
-        self._port.all_systems = (('trusty', 'x86_64'),)
-
-        self._parse_expectations(test_expectations_before)
-        self._expectation_factory.all_results_by_builder = {
-            'WebKit Linux Trusty': {
-                'test/a.html': ['PASS', 'PASS'],
-                'test/b.html': ['PASS', 'PASS'],
-            }
-        }
-        updated_expectations = (
-            self._expectations_remover.get_updated_test_expectations())
-        self._assert_expectations_match(
-            updated_expectations, test_expectations_before)
-
     def test_all_failure_result_types(self):
         """Tests that all failure types are treated as failure."""
         test_expectations_before = (