Revert "Performance tests: recover a baseline from ancestor commits and CI results."

Unfortunately this has broken all future commits due to spurious(?) performance changes which I have been unable to work around. This reverts commit cc2261d42f6a954d88e355aaad41f001f65c95da.
author: Ben Gamari <ben@smart-cactus.org> 2019-01-31 19:47:53 -0500
committer: Ben Gamari <ben@smart-cactus.org> 2019-01-31 19:47:53 -0500
commit: ebe2d344899e2165182d3c41b0796ec022561790 (patch)
tree: 0771fe65262a7bbb7a465f1b9e9bb50b91649f29
parent: 21462a3a4edede710582b8038695667d2b9d0159 (diff)
download: haskell-ebe2d344899e2165182d3c41b0796ec022561790.tar.gz
7 files changed, 63 insertions, 353 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4a8cb62e3b..84976328f8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,7 +6,6 @@ before_script:
   - git submodule sync --recursive
   - git submodule update --init --recursive
   - git checkout .gitmodules
-  - "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
 
 stages:
   - lint
@@ -76,7 +75,6 @@ validate-x86_64-linux-deb8-hadrian:
     - git submodule sync --recursive
     - git submodule update --init --recursive
     - git checkout .gitmodules
-    - "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
   tags:
     - x86_64-linux
 
@@ -99,16 +97,8 @@ validate-x86_64-linux-deb8-hadrian:
       make binary-dist TAR_COMP_OPTS="-1"
       mv ghc-*.tar.xz ghc.tar.xz
     - |
-      # Prepare to push git notes.
-      METRICS_FILE=$(mktemp)
-      git config user.email "ben+ghc-ci@smart-cactus.org"
-      git config user.name "GHC GitLab CI"
-    - |
       THREADS=`mk/detect-cpu-count.sh`
-      make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml METRICS_FILE=$METRICS_FILE
-    - |
-      # Push git notes.
-      METRICS_FILE=$METRICS_FILE .gitlab/push-test-metrics.sh
+      make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml
   dependencies: []
   artifacts:
     reports:
@@ -130,14 +120,12 @@ validate-x86_64-darwin:
     ac_cv_func_clock_gettime: "no"
     LANG: "en_US.UTF-8"
     CONFIGURE_ARGS: --with-intree-gmp
-    TEST_ENV: "x86_64-darwin"
   before_script:
     - git clean -xdf && git submodule foreach git clean -xdf
     - python3 .gitlab/fix-submodules.py
     - git submodule sync --recursive
     - git submodule update --init --recursive
     - git checkout .gitmodules
-    - "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
 
     - bash .gitlab/darwin-init.sh
     - PATH="`pwd`/toolchain/bin:$PATH"
@@ -162,7 +150,6 @@ validate-x86_64-darwin:
     - git submodule sync --recursive
     - git submodule update --init --recursive
     - git checkout .gitmodules
-    - "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
 
     - bash .circleci/prepare-system.sh
     # workaround for docker permissions
@@ -180,8 +167,6 @@ validate-aarch64-linux-deb9:
   stage: full-build
   image: ghcci/aarch64-linux-deb9:0.1
   allow_failure: true
-  variables:
-    TEST_ENV: "aarch64-linux-deb9"
   artifacts:
     when: always
     expire_in: 2 week
@@ -206,8 +191,6 @@ validate-i386-linux-deb9:
   stage: full-build
   image: ghcci/i386-linux-deb9:0.1
   allow_failure: true
-  variables:
-    TEST_ENV: "i386-linux-deb9"
   artifacts:
     when: always
     expire_in: 2 week
@@ -221,7 +204,6 @@ nightly-i386-linux-deb9:
   allow_failure: true
   variables:
     TEST_TYPE: slowtest
-    TEST_ENV: "i386-linux-deb9"
   artifacts:
     when: always
     expire_in: 2 week
@@ -235,8 +217,6 @@ validate-x86_64-linux-deb9:
   extends: .validate-linux
   stage: build
   image: ghcci/x86_64-linux-deb9:0.2
-  variables:
-    TEST_ENV: "x86_64-linux-deb9"
   artifacts:
     when: always
     expire_in: 2 week
@@ -261,7 +241,6 @@ validate-x86_64-linux-deb9-llvm:
   image: ghcci/x86_64-linux-deb9:0.2
   variables:
     BUILD_FLAVOUR: perf-llvm
-    TEST_ENV: "x86_64-linux-deb9-llvm"
   cache:
     key: linux-x86_64-deb9
 
@@ -269,8 +248,6 @@ validate-x86_64-linux-deb8:
   extends: .validate-linux
   stage: full-build
   image: ghcci/x86_64-linux-deb8:0.1
-  variables:
-    TEST_ENV: "x86_64-linux-deb8"
   cache:
     key: linux-x86_64-deb8
   artifacts:
@@ -281,8 +258,6 @@ validate-x86_64-linux-fedora27:
   extends: .validate-linux
   stage: full-build
   image: ghcci/x86_64-linux-fedora27:0.1
-  variables:
-    TEST_ENV: "x86_64-linux-fedora27"
   cache:
     key: linux-x86_64-fedora27
   artifacts:
@@ -294,7 +269,6 @@ validate-x86_64-linux-deb9-integer-simple:
   stage: full-build
   variables:
     INTEGER_LIBRARY: integer-simple
-    TEST_ENV: "x86_64-linux-deb9-integer-simple"
   image: ghcci/x86_64-linux-deb9:0.2
   cache:
     key: linux-x86_64-deb9
@@ -315,7 +289,6 @@ validate-x86_64-linux-deb9-unreg:
   stage: full-build
   variables:
     CONFIGURE_ARGS: --enable-unregisterised
-    TEST_ENV: "x86_64-linux-deb9-unreg"
   image: ghcci/x86_64-linux-deb9:0.2
   cache:
     key: linux-x86_64-deb9
@@ -341,7 +314,6 @@ validate-x86_64-linux-deb9-unreg:
     - git submodule sync --recursive
     - git submodule update --init --recursive
     - git checkout .gitmodules
-    - "git fetch origin refs/notes/perf:refs/notes/ci/perf || true"
     - bash .gitlab/win32-init.sh
   after_script:
     - rd /s /q tmp
diff --git a/.gitlab/push-test-metrics.sh b/.gitlab/push-test-metrics.sh
deleted file mode 100755
index 7ec2dfe955..0000000000
--- a/.gitlab/push-test-metrics.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env bash
-# vim: sw=2 et
-set -euo pipefail
-
-NOTES_ORIGIN="git@gitlab.haskell.org:ghc/ghc-performance-notes.git"
-REF="perf"
-
-fail() {
-  echo "ERROR: $*" >&2
-  exit 1
-}
-
-# Check that private key is available (Set on all GitLab protected branches).
-if ! [ -v PERF_NOTE_KEY ] || [ "$PERF_NOTE_KEY" = "" ]; then
-  echo "Not pushing performance git notes: PERF_NOTE_KEY is not set."
-  exit 0
-fi
-
-# TEST_ENV must be set.
-if ! [ -v TEST_ENV ] || [ "$TEST_ENV" = "" ]; then
-  fail "Not pushing performance git notes: TEST_ENV must be set."
-fi
-
-# Assert that the METRICS_FILE exists and can be read.
-if ! [ -v TEST_ENV ] || [ "$METRICS_FILE" = "" ]
-then
-  fail "\$METRICS_FILE not set."
-fi
-if ! [ -r $METRICS_FILE ]
-then
-  fail "Metrics file not found: $METRICS_FILE"
-fi
-
-# Add gitlab as a known host.
-mkdir -p ~/.ssh
-echo "|1|+AUrMGS1elvPeLNt+NHGa5+c6pU=|4XvfRsQftO1OgZD4c0JJ7oNaii8= ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDXilA5l4kOZPx0nM6xDATF+t4fS6te0eYPDwBI/jLWD9cJVtCnsrwMl5ar+/NfmcD0jnCYztUiVHuXyTaWPJYSQpwltfpTeqpo9/z/0MxkPtSl1uMP2cLbDiqA01OWveChktOXwU6hRQ+7MmO+dNRS/iXrRmYrGv/p1W811QgLBLS9fefEdF25n+0dP71L7Ov7riOawlDmd0C11FraE/R8HX6gs6lbXta1kisdxGyKojYSiCtobUaJxRoatMfUP0a9rwTAyl8tf56LgB+igjMky879VAbL7eQ/AmfHYPrSGJ/YlWP6Jj23Dnos5nOVlWL/rVTs9Y/NakLpPwMs75KTC0Pd74hdf2e3folDdAi2kLrQgO2SI6so7rOYZ+mFkCM751QdDVy4DzjmDvSgSIVf9SV7RQf7e7unE7pSZ/ILupZqz9KhR1MOwVO+ePa5qJMNSdC204PIsRWkIO5KP0QLl507NI9Ri84+aODoHD7gDIWNhU08J2P8/E6r0wcC8uWaxh+HaOjI9BkHjqRYsrgfn54BAuO9kw1cDvyi3c8n7VFlNtvQP15lANwim3gr9upV+r95KEPJCgZMYWJBDPIVtp4GdYxCfXxWj5oMXbA5pf0tNixwNJjAsY7I6RN2htHbuySH36JybOZk+gCj6mQkxpCT/tKaUn14hBJWLq7Q+Q==" >> ~/.ssh/known_hosts
-echo "|1|JZkdAPJmpX6SzGeqhmQLfMWLGQA=|4vTELroOlbFxbCr0WX+PK9EcpD0= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJknufU+I6A5Nm58lmse4/o11Ai2UzYbYe7782J1+kRk" >> ~/.ssh/known_hosts
-
-# Setup ssh keys.
-eval `ssh-agent`
-echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDJPR1vrZgeGTXmgJw2PsJfMjf22LcDnVVwt3l0rwTZ+8Q2J0bHaYxMRKBco1sON6LGcZepw0Hy76RQ87v057pTz18SXvnfE7U/B6v9qBk0ILJz+4BOX9sEhxu2XmScp/wMxkG9IoyruMlsxXzd1sz09o+rzzx24U2Rp27PRm08vG0oipve6BWLbYEqYrE4/nCufqOJmGd56fju7OTU0lTpEkGDEDWGMxutaX2CbTbDju7qy07Ld8BjSc9aHfvuQaslUbj3ex3EF8EXahURzGpHQn/UFFzVGMokFumiJCAagHQb7cj6jOkKseZLaysbA/mTBQsOzjWiRmkN23bQf1wF ben+ghc-ci@smart-cactus.org" > ~/.ssh/perf_rsa.pub
-touch ~/.ssh/perf_rsa
-chmod 0600 ~/.ssh/perf_rsa
-echo "$PERF_NOTE_KEY" >> ~/.ssh/perf_rsa
-ssh-add ~/.ssh/perf_rsa
-
-# Reset the git notes and append the metrics file to the notes, then push and return the result.
-# This is favoured over a git notes merge as it avoids potential data loss/duplication from the merge strategy.
-function reset_append_note_push {
-  git fetch -f $NOTES_ORIGIN refs/notes/$REF:refs/notes/$REF || true
-  echo "git notes --ref=$REF append -F $METRICS_FILE HEAD"
-  git notes --ref=$REF append -F $METRICS_FILE HEAD
-  echo "git push $NOTES_ORIGIN refs/notes/$REF"
-  git push $NOTES_ORIGIN refs/notes/$REF
-}
-
-# Push the metrics file as a git note. This may fail if another task pushes a note first. In that case
-# the latest note is fetched and appended.
-MAX_RETRY=20
-until reset_append_note_push || [ $MAX_RETRY -le 0 ]
-do
-  ((MAX_RETRY--))
-  echo ""
-  echo "Failed to push git notes. Fetching, appending, and retrying... $MAX_RETRY retries left."
-done
diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py
index c5d09e04db..6d80e07e12 100644
--- a/testsuite/driver/perf_notes.py
+++ b/testsuite/driver/perf_notes.py
@@ -13,7 +13,6 @@ import argparse
 import re
 import subprocess
 import time
-import sys
 
 from collections import namedtuple
 from math import ceil, trunc
@@ -42,7 +41,7 @@ def is_worktree_dirty():
 # The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
 #
 # ( test_env : 'val',      # Test environment.
-#   test     : 'val',      # Name of the test
+#   test     : 'val',      # Name of the test 
 #   way      : 'val',
 #   metric   : 'val',      # Metric being recorded
 #   value    : 'val',      # The statistic result e.g. runtime
@@ -74,21 +73,6 @@ def get_perf_stats(commit='HEAD', namespace='perf'):
     log = [parse_perf_stat(stat_str) for stat_str in log]
     return log
 
-# Check if a str is in a 40 character git commit hash.
-# str -> bool
-_commit_hash_re = re.compile('[0-9a-f]' * 40)
-def is_commit_hash(hash):
-    return _commit_hash_re.fullmatch(hash) != None
-
-# Convert a <ref> to a commit hash code.
-# str -> str
-def commit_hash(commit):
-    if is_commit_hash(commit):
-        return commit
-    return subprocess.check_output(['git', 'rev-parse', commit], \
-            stderr=subprocess.STDOUT) \
-            .decode() \
-            .strip()
 
 # Get allowed changes to performance. This is extracted from the commit message of
 # the given commit in this form:
@@ -99,20 +83,13 @@ def commit_hash(commit):
 #           'metrics': ['metricA', 'metricB', ...],
 #           'opts': {
 #                   'optionA': 'string value',
-#                   'optionB': 'string value',          # e.g. test_env: "x86_64-linux"
+#                   'optionB': 'string value',
 #                   ...
 #               }
 #   }
-_get_allowed_perf_changes_cache = {}
 def get_allowed_perf_changes(commit='HEAD'):
-    global _get_allowed_perf_changes_cache
-    commit =  commit_hash(commit)
-    if not commit in _get_allowed_perf_changes_cache:
-        commitByteStr = subprocess.check_output(\
-            ['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
-        _get_allowed_perf_changes_cache[commit] \
-            = parse_allowed_perf_changes(commitByteStr.decode())
-    return _get_allowed_perf_changes_cache[commit]
+    commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
+    return parse_allowed_perf_changes(commitByteStr.decode())
 
 def parse_allowed_perf_changes(commitMsg):
     # Helper regex. Non-capturing unless postfixed with Cap.
@@ -125,7 +102,7 @@ def parse_allowed_perf_changes(commitMsg):
     exp = (r"^Metric"
         +s+r"(Increase|Decrease)"
         +s+r"?("+qstr+r"|"+qstrList+r")?"                   # Metric or list of metrics.s..
-        +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?"      # Options surrounded in parenthesis. (allow parenthases in quoted strings)
+        +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?"      # Options surounded in parenthesis. (allow parenthases in quoted strings))
         +s+r"?:?"                                           # Optional ":"
         +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)"           # Test names. Stop parsing on empty or non-indented new line.
         )
@@ -236,176 +213,11 @@ def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5):
         tries += 1
         time.sleep(1)
 
-    print("\nAn error occurred while writing the performance metrics to git notes.\n \
-            This is usually due to a lock-file existing somewhere in the git repo.")
+    print("\nAn error occured while writing the performance metrics to git notes.\n \
+	            This is usually due to a lock-file existing somewhere in the git repo.")
 
     return False
 
-#
-# Baseline calculation
-#
-
-# Max number of ancestor commits to search when compiling a baseline performance metric.
-BaselineSearchDepth = 75
-
-# The git notes name space for local results.
-LocalNamespace = "perf"
-
-# The git notes name space for ci results.
-CiNamespace = "ci/" + LocalNamespace
-
-# (isCalculated, best fit ci test_env or None)
-BestFitCiTestEnv = (False, None)
-
-# test_env string or None
-def best_fit_ci_test_env():
-    global BestFitCiTestEnv
-    if not BestFitCiTestEnv[0]:
-        platform = sys.platform
-        isArch64 = sys.maxsize > 2**32
-        arch = "x86_64" if isArch64 else "i386"
-
-        if platform.startswith("linux"):
-            test_env = arch + "-linux-deb9"
-        elif platform.startswith("win32"):
-            # There are no windows CI test results.
-            test_env = None
-        elif isArch64 and platform.startswith("darwin"):
-            test_env = arch + "-darwin"
-        elif isArch64 and platform.startswith("freebsd"):
-            test_env = arch + "-freebsd"
-        else:
-            test_env = None
-
-        BestFitCiTestEnv = (True, test_env)
-
-    return BestFitCiTestEnv[1]
-
-_baseline_depth_commit_log = {}
-
-# Get the commit hashes for the last BaselineSearchDepth commits from and
-# including the input commit. The output commits are all commit hashes.
-# str -> [str]
-def baseline_commit_log(commit):
-    global _baseline_depth_commit_log
-    commit = commit_hash(commit)
-    if not commit in _baseline_depth_commit_log:
-        _baseline_depth_commit_log[commit] = \
-            subprocess.check_output(['git', 'log', '--format=%H', \
-                             '-n' + str(BaselineSearchDepth)]) \
-                .decode().split('\n')
-    return _baseline_depth_commit_log[commit]
-
-# Cache of baseline values. This is a dict of dicts indexed on:
-# (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline
-# (bool          , str   ) -> (str     , str , str   , str) -> float
-_commit_metric_cache = {}
-
-# Get the baseline (expected value) of a test at a given commit. This searches
-# git notes from older commits for recorded metrics (locally and from ci). More
-# recent commits are favoured, then local results over ci results are favoured.
-#
-# commit: str - must be a commit hash (see commit_has())
-# name: str - test name
-# test_env: str - test environment (note a best fit test_env will be used
-#                      instead when looking for ci results)
-# metric: str - test metric
-# way: str - test way
-# returns: the baseline float or None if no metric was found within
-#          BaselineSearchDepth commits and since the last expected change.
-def baseline_metric(commit, name, test_env, metric, way):
-    # For performance reasons (in order to avoid calling commit_hash), we assert
-    # commit is already a commit hash.
-    assert is_commit_hash(commit)
-
-    # Get all recent commit hashes.
-    commit_hashes = baseline_commit_log(commit)
-
-    # TODO PERF use git log to get hashes of all BaselineSearchDepth commits
-    def depth_to_commit(depth):
-        return commit_hashes[depth]
-
-    def has_expected_change(commit):
-        return get_allowed_perf_changes(commit).get(name) \
-                != None
-
-    # Bool -> String
-    def namespace(useCiNamespace):
-        return CiNamespace if useCiNamespace else LocalNamespace
-
-    ci_test_env = best_fit_ci_test_env()
-
-    # gets the metric of a given commit
-    # (Bool, Int) -> (float | None)
-    def commit_metric(useCiNamespace, currentCommit):
-        global _commit_metric_cache
-
-        # Get test environment.
-        effective_test_env = ci_test_env if useCiNamespace else test_env
-        if effective_test_env == None:
-            # This can happen when no best fit ci test is found.
-            return None
-
-        # Check for cached value.
-        cacheKeyA = (useCiNamespace, currentCommit)
-        cacheKeyB = (effective_test_env, name, metric, way)
-        if cacheKeyA in _commit_metric_cache:
-            return _commit_metric_cache[cacheKeyA].get(cacheKeyB)
-
-        # Cache miss.
-        # Calculate baselines from the current commit's git note.
-        # Note that the git note may contain data for other tests. All tests'
-        # baselines will be collected and cached for future use.
-        allCommitMetrics = get_perf_stats(
-                                currentCommit,
-                                namespace(useCiNamespace))
-
-        # Collect recorded values by cacheKeyB.
-        values_by_cache_key_b = {}
-        for perfStat in allCommitMetrics:
-            currentCacheKey = (perfStat.test_env, perfStat.test, \
-                               perfStat.metric, perfStat.way)
-            currentValues = values_by_cache_key_b.setdefault(currentCacheKey, [])
-            currentValues.append(float(perfStat.value))
-
-        # Calculate and baseline (average of values) by cacheKeyB.
-        baseline_by_cache_key_b = {}
-        for currentCacheKey, currentValues in values_by_cache_key_b.items():
-            baseline_by_cache_key_b[currentCacheKey] = \
-                    sum(currentValues) / len(currentValues)
-
-        # Save baselines to the cache.
-        _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
-        return baseline_by_cache_key_b.get(cacheKeyB)
-
-    # Searches through previous commits trying local then ci for each commit in.
-    def search(useCiNamespace, depth):
-        # Stop if reached the max search depth, or if
-        # there is an expected change at the child commit (depth-1). This is a
-        # subtlety: Metrics recorded on commit x incorporate the expected
-        # changes for commit x. Hence metrics from x are still a valid baseline,
-        # while older commits are not. This is why we check for expected changes
-        # on depth-1 rather than depth.
-        if depth >= BaselineSearchDepth or has_expected_change( \
-                        depth_to_commit(depth - 1)):
-            return None
-
-        # Check for a metric on this commit.
-        current_metric = commit_metric(useCiNamespace, depth_to_commit(depth))
-        if current_metric != None:
-            return current_metric
-
-        # Metric is not available.
-        # If tried local, now try CI. Else move to the parent commit.
-        if not useCiNamespace:
-            return search(True, depth)
-        else:
-            return search(False, depth + 1)
-
-    # Start search from parent commit using local name space.
-    return search(False, 1)
-
-
 # Check test stats. This prints the results for the user.
 # actual: the PerfStat with actual value.
 # expected_val: the expected value (this should generally be derived from get_perf_stats())
diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py
index 73297dae46..247a5cc330 100644
--- a/testsuite/driver/runtests.py
+++ b/testsuite/driver/runtests.py
@@ -379,37 +379,18 @@ else:
     new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric]
     if any(new_metrics):
         if canGitStatus:
-            reason = 'a baseline (expected value) cannot be recovered from' + \
-                ' previous git commits. This may be due to HEAD having' + \
-                ' new tests or having expected changes, the presence of' + \
-                ' expected changes since the last run of the tests, and/or' + \
-                ' the latest test run being too old.'
-            fix = 'If the tests exist on the previous' + \
-                ' commit (And are configured to run with the same ways),' + \
-                ' then check out that commit and run the tests to generate' + \
-                ' the missing metrics. Alternatively, a baseline may be' + \
-                ' recovered from ci results once fetched (where origin' + \
-                ' is the official ghc git repo):\n\n' + \
-                spacing + 'git fetch ' + \
-                  'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \
-                  ' refs/notes/perf:refs/notes/' + Perf.CiNamespace
+            reason = 'the previous git commit doesn\'t have recorded metrics for the following tests.' + \
+                  ' If the tests exist on the previous commit, then check it out and run the tests to generate the missing metrics.'
         else:
-            reason = "this is not a git repo so the previous git commit's" + \
-                     " metrics cannot be loaded from git notes:"
-            fix = ""
+            reason = 'this is not a git repo so the previous git commit\'s metrics cannot be loaded from git notes:'
         print()
-        print(str_warn('Missing Baseline Metrics') + \
-                ' these metrics trivially pass because ' + reason)
-        print(spacing + (' ').join(set([metric.test for metric in new_metrics])))
-        if fix != "":
-            print()
-            print(fix)
+        print(str_warn('New Metrics') + ' these metrics trivially pass because ' + reason)
+        print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics])))
 
     # Inform of how to accept metric changes.
     if (len(t.unexpected_stat_failures) > 0):
         print()
-        print(str_info("Some stats have changed") + " If this is expected, " + \
-            "allow changes by appending the git commit message with this:")
+        print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:")
         print('-' * 25)
         print(Perf.allow_changes_string(t.metrics))
         print('-' * 25)
@@ -425,9 +406,8 @@ else:
     elif canGitStatus and any(stats):
         if is_worktree_dirty():
             print()
-            print(str_warn('Performance Metrics NOT Saved') + \
-                ' working tree is dirty. Commit changes or use ' + \
-                '--metrics-file to save metrics to a file.')
+            print(str_warn('Working Tree is Dirty') + ' performance metrics will not be saved.' + \
+                                    ' Commit changes or use --metrics-file to save metrics to a file.')
         else:
             Perf.append_perf_stat(stats)
 
diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py
index 423925e87e..0e0240db8e 100644
--- a/testsuite/driver/testglobals.py
+++ b/testsuite/driver/testglobals.py
@@ -235,17 +235,11 @@ class TestOptions:
        # extra files to copy to the testdir
        self.extra_files = []
 
-       # Map from metric to (fuction from way to baseline value, allowed percentage deviation) e.g.
-       #     { 'bytes allocated': (
-       #              lambda way: if way1: return None ... elif way2:return 9300000000 ...,
-       #              10) }
-       # This means no baseline is available for way1. For way 2, allow a 10%
-       # deviation from 9300000000.
+       # Map from metric to expectected value and allowed percentage deviation. e.g.
+       #     { 'bytes allocated': (9300000000, 10) }
+       # To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric.
        self.stats_range_fields = {}
 
-       # Is the test testing performance?
-       self.is_stats_test = False
-
        # Does this test the compiler's performance as opposed to the generated code.
        self.is_compiler_stats_test = False
 
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index b637b1992d..040e674312 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -65,7 +65,7 @@ def isCompilerStatsTest():
 
 def isStatsTest():
     opts = getTestOpts()
-    return opts.is_stats_test
+    return bool(opts.stats_range_fields)
 
 
 # This can be called at the top of a file of tests, to set default test options
@@ -348,18 +348,29 @@ def testing_metrics():
 # measures the performance numbers of the compiler.
 # As this is a fairly rare case in the testsuite, it defaults to false to
 # indicate that it is a 'normal' performance test.
-def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False):
+def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
     if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
         failBecause('This test has an invalid name.')
 
-    # Normalize metrics to a list of strings.
-    if isinstance(metrics, str):
-        if metrics == 'all':
-            metrics = testing_metrics()
-        else:
-            metrics = [metrics]
+    tests = Perf.get_perf_stats('HEAD^')
+
+    # Might have multiple metrics being measured for a single test.
+    test = [t for t in tests if t.test == name]
+
+    if tests == [] or test == []:
+        # There are no prior metrics for this test.
+        if isinstance(metric, str):
+            if metric == 'all':
+                for field in testing_metrics():
+                    opts.stats_range_fields[field] = None
+            else:
+                opts.stats_range_fields[metric] = None
+        if isinstance(metric, list):
+            for field in metric:
+                opts.stats_range_fields[field] = None
+
+        return
 
-    opts.is_stats_test = True
     if is_compiler_stats_test:
         opts.is_compiler_stats_test = True
 
@@ -368,11 +379,24 @@ def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False)
     if config.compiler_debugged and is_compiler_stats_test:
         opts.skip = 1
 
-    for metric in metrics:
-        baselineByWay = lambda way, target_commit: Perf.baseline_metric( \
-                              target_commit, name, config.test_env, metric, way)
+    # get the average value of the given metric from test
+    def get_avg_val(metric_2):
+        metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2]
+        return sum(metric_2_metrics) / len(metric_2_metrics)
+
+    # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used.
+    if isinstance(metric, str):
+        if metric == 'all':
+            for field in testing_metrics():
+                opts.stats_range_fields[field] = (get_avg_val(field), deviation)
+                return
+        else:
+            opts.stats_range_fields[metric] = (get_avg_val(metric), deviation)
+            return
 
-        opts.stats_range_fields[metric] = (baselineByWay, deviation)
+    if isinstance(metric, list):
+        for field in metric:
+            opts.stats_range_fields[field] = (get_avg_val(field), deviation)
 
 # -----
 
@@ -1140,11 +1164,10 @@ def metric_dict(name, way, metric, value):
 # name: name of the test.
 # way: the way.
 # stats_file: the path of the stats_file containing the stats for the test.
-# range_fields: see TestOptions.stats_range_fields
+# range_fields
 # Returns a pass/fail object. Passes if the stats are withing the expected value ranges.
 # This prints the results for the user.
 def check_stats(name, way, stats_file, range_fields):
-    head_commit = Perf.commit_hash('HEAD')
     result = passed()
     if range_fields:
         try:
@@ -1154,7 +1177,7 @@ def check_stats(name, way, stats_file, range_fields):
         stats_file_contents = f.read()
         f.close()
 
-        for (metric, baseline_and_dev) in range_fields.items():
+        for (metric, range_val_dev) in range_fields.items():
             field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents)
             if field_match == None:
                 print('Failed to find metric: ', metric)
@@ -1167,15 +1190,14 @@ def check_stats(name, way, stats_file, range_fields):
                 change = None
 
                 # If this is the first time running the benchmark, then pass.
-                baseline = baseline_and_dev[0](way, head_commit)
-                if baseline == None:
+                if range_val_dev == None:
                     metric_result = passed()
                     change = MetricChange.NewMetric
                 else:
-                    tolerance_dev = baseline_and_dev[1]
+                    (expected_val, tolerance_dev) = range_val_dev
                     (change, metric_result) = Perf.check_stats_change(
                         perf_stat,
-                        baseline,
+                        expected_val,
                         tolerance_dev,
                         config.allowed_perf_changes,
                         config.verbose >= 4)
@@ -1308,13 +1330,8 @@ def simple_run(name, way, prog, extra_run_opts):
 
     my_rts_flags = rts_flags(way)
 
-    # Collect stats if necessary:
-    # isStatsTest and not isCompilerStatsTest():
-    #   assume we are running a ghc compiled program. Collect stats.
-    # isStatsTest and way == 'ghci':
-    #   assume we are running a program via ghci. Collect stats
     stats_file = name + '.stats'
-    if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'):
+    if isStatsTest() and not isCompilerStatsTest():
         stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
     else:
         stats_args = ''
diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py
index cc4a4eea0d..6e0c2684d7 100644
--- a/testsuite/driver/testutil.py
+++ b/testsuite/driver/testutil.py
@@ -16,7 +16,7 @@ def strip_quotes(s):
     return s.strip('\'"')
 
 def str_fail(s):
-    return '\033[1m\033[31m' + s + '\033[0m'
+    return '\033[1m\033[43m\033[31m' + s + '\033[0m'
 
 def str_pass(s):
     return '\033[1m\033[32m' + s + '\033[0m'
author	Ben Gamari <ben@smart-cactus.org>	2019-01-31 19:47:53 -0500
committer	Ben Gamari <ben@smart-cactus.org>	2019-01-31 19:47:53 -0500
commit	ebe2d344899e2165182d3c41b0796ec022561790 (patch)
tree	0771fe65262a7bbb7a465f1b9e9bb50b91649f29
parent	21462a3a4edede710582b8038695667d2b9d0159 (diff)
download	haskell-ebe2d344899e2165182d3c41b0796ec022561790.tar.gz