From 8199901c3b434366edb8b87d5b56c65ca624bcca Mon Sep 17 00:00:00 2001
From: Julian Berman <Julian@GrayVines.com>
Date: Sun, 22 Sep 2013 18:37:45 -0400
Subject: Initial stab at best_match.

---
 docs/errors.rst                     |  31 ++++++++
 jsonschema/exceptions.py            |  33 +++++++-
 jsonschema/tests/test_exceptions.py | 152 +++++++++++++++++++++++++++++++++++-
 3 files changed, 213 insertions(+), 3 deletions(-)

diff --git a/docs/errors.rst b/docs/errors.rst
index 9f63c25..c3cf096 100644
--- a/docs/errors.rst
+++ b/docs/errors.rst
@@ -301,3 +301,34 @@ To summarize, each tree contains child trees that can be accessed by indexing
 the tree to get the corresponding child tree for a given index into the
 instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a
 dict, that maps the failed validator to the corresponding validation error.
+
+
+best_match
+----------
+
+The :func:`best_match` function is a simple but useful function for attempting
+to guess the most relevant error in a given bunch.
+
+.. autofunction:: best_match
+
+    Try to find an error that appears to be the best match among given errors.
+
+    In general, errors that are higher up in the instance (i.e. for which
+    :attr:`ValidationError.path` is shorter) are considered better matches,
+    since they indicate "more" is wrong with the instance.
+
+    If the resulting match is either :validator:`oneOf` or :validator:`anyOf`,
+    the *opposite* assumption is made -- i.e. the deepest error is picked,
+    since these validators only need to match once, and any other errors may
+    not be relevant.
+
+    :argument iterable errors: the errors to select from. Do not provide a
+        mixture of errors from different validation attempts (i.e. from
+        different instances or schemas), since it won't produce sensical
+        output.
+    :returns: the best matching error, or ``None`` if the iterable was empty
+
+    .. note::
+
+        This function is a heuristic. Its return value may change for a given
+        set of inputs from version to version if better heuristics are added.
diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py
index fe592e2..5c52f41 100644
--- a/jsonschema/exceptions.py
+++ b/jsonschema/exceptions.py
@@ -1,4 +1,5 @@
 import collections
+import itertools
 import pprint
 import textwrap
 
@@ -24,6 +25,11 @@ class _Error(Exception):
         self.instance = instance
         self.schema = schema
 
+    def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            return NotImplemented
+        return self._contents() == other._contents()
+
     def __lt__(self, other):
         if not isinstance(other, self.__class__):
             # On Py2 Python will "helpfully" make this succeed. So be more
@@ -36,7 +42,14 @@ class _Error(Exception):
                 )
                 raise TypeError(message)
             return NotImplemented
-        return self.path < other.path
+
+        is_deeper = len(self.path) > len(other.path)
+        is_weak_matcher = self.validator in ("anyOf", "oneOf")
+        other_is_weak_matcher = other.validator in ("anyOf", "oneOf")
+        return is_deeper or is_weak_matcher > other_is_weak_matcher
+
+    def __ne__(self, other):
+        return not self == other
 
     def __repr__(self):
         return "<%s: %r>" % (self.__class__.__name__, self.message)
@@ -93,6 +106,14 @@ class _Error(Exception):
             if getattr(self, k) is _unset:
                 setattr(self, k, v)
 
+    def _contents(self):
+        return dict(
+            (attr, getattr(self, attr)) for attr in (
+                "message", "cause", "context", "path", "schema_path",
+                "validator", "validator_value", "instance", "schema"
+            )
+        )
+
 
 class ValidationError(_Error):
     pass
@@ -146,3 +167,13 @@ class FormatError(Exception):
 
     if PY3:
         __str__ = __unicode__
+
+
+def best_match(errors):
+    first = next(iter(errors), None)
+    if first is None:
+        return
+    best = max(itertools.chain([first], errors))
+    while best.context:
+        best = min(best.context)
+    return best
diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py
index b7ecaae..92976b3 100644
--- a/jsonschema/tests/test_exceptions.py
+++ b/jsonschema/tests/test_exceptions.py
@@ -17,7 +17,7 @@ class TestValidationErrorSorting(unittest.TestCase):
         errors = sorted(validator.iter_errors({"foo" : {"bar" : []}}))
         self.assertEqual(
             [list(error.path) for error in errors],
-            [["foo"], ["foo", "bar"]],
+            [["foo", "bar"], ["foo"]],
         )
 
     def test_global_errors_are_even_better_matches(self):
@@ -30,7 +30,25 @@ class TestValidationErrorSorting(unittest.TestCase):
         errors = sorted(validator.iter_errors({"foo" : {"bar" : []}}))
         self.assertEqual(
             [list(error.path) for error in errors],
-            [[], ["foo"]],
+            [["foo"], []],
+        )
+
+    def test_oneOf_and_anyOf_are_weak_matches(self):
+        """
+        A property you *must* match is probably better than one you have to
+        match a part of.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "minProperties" : 2,
+                "oneOf" : [{"type" : "string"}, {"type" : "number"}],
+            }
+        )
+        errors = sorted(validator.iter_errors({}))
+        self.assertEqual(
+            [error.validator for error in errors], ["oneOf", "minProperties"],
         )
 
     def test_cannot_sort_errors_of_mixed_types(self):
@@ -38,3 +56,133 @@ class TestValidationErrorSorting(unittest.TestCase):
             v = exceptions.ValidationError("Oh", instance=3)
             s = exceptions.SchemaError("No!", instance=3)
             v < s
+
+
+class TestBestMatch(unittest.TestCase):
+    def test_for_errors_without_context_it_returns_the_max(self):
+        """
+        The ``max`` will be the error which is most "shallow" in the instance.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "minProperties" : 2,
+                        "properties" : {"bar" : {"type" : "object"}},
+                    },
+                },
+            }
+        )
+        errors = sorted(validator.iter_errors({"foo" : {"bar" : []}}))
+        self.assertIs(exceptions.best_match(errors), errors[-1])
+
+    def test_context_for_anyOf(self):
+        """
+        For the anyOf validator, we use the min, to assume the least.
+
+        Other errors are not necessarily relevant, since only one needs to
+        match.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "anyOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        errors = validator.iter_errors({"foo" : {"bar" : 12}})
+        best = exceptions.best_match(errors)
+        self.assertEqual(best.validator_value, "array")
+
+    def test_context_for_oneOf(self):
+        """
+        For the oneOf validator, we use the min, to assume the least.
+
+        Other errors are not necessarily relevant, since only one needs to
+        match.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "oneOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        errors = validator.iter_errors({"foo" : {"bar" : 12}})
+        best = exceptions.best_match(errors)
+        self.assertEqual(best.validator_value, "array")
+
+    def test_context_for_allOf(self):
+        """
+        allOf just yields all the errors globally, so each should be considered
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "allOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        errors = validator.iter_errors({"foo" : {"bar" : 12}})
+        best = exceptions.best_match(errors)
+        self.assertEqual(best.validator_value, "string")
+
+    def test_nested_context_for_oneOf(self):
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "oneOf" : [
+                            {"type" : "string"},
+                            {
+                                "oneOf" : [
+                                    {"type" : "string"},
+                                    {
+                                        "properties" : {
+                                            "bar" : {"type" : "array"}
+                                        },
+                                    },
+                                ],
+                            },
+                        ],
+                    },
+                },
+            },
+        )
+        errors = validator.iter_errors({"foo" : {"bar" : 12}})
+        best = exceptions.best_match(errors)
+        self.assertEqual(best.validator_value, "array")
+
+    def test_one_error(self):
+        validator = Draft4Validator({"minProperties" : 2})
+        error, = validator.iter_errors({})
+        self.assertEqual(
+            exceptions.best_match(validator.iter_errors({})), error,
+        )
+
+    def test_no_errors(self):
+        validator = Draft4Validator({})
+        self.assertIsNone(exceptions.best_match(validator.iter_errors({})))
-- 
cgit v1.2.1