Merge branch 'best-match'

* best-match: And docs for the arguments. And add by_relevance docs. Remove __eq__, since it causes hashability issues on Py3 that I don't want to deal with at the moment. Update best_match docs. Different strategy that's a lot more robust. Use ._contents in create_from Initial stab at best_match. Sort errors based on their paths.
author: Julian Berman <Julian@GrayVines.com> 2013-10-28 08:12:03 -0400
committer: Julian Berman <Julian@GrayVines.com> 2013-10-28 08:12:03 -0400
commit: bab6e745c2cafa77c9b585875d75e5634a810168 (patch)
tree: 22879304455b674e4cb638ce738cebe1ade538aa
parent: a73d1efe56c922661afc6c451e1ce0284ba23260 (diff)
parent: 4f171aa18393682f2a0a3a6a178717eda021dd2c (diff)
download: jsonschema-bab6e745c2cafa77c9b585875d75e5634a810168.tar.gz
3 files changed, 336 insertions, 21 deletions
diff --git a/docs/errors.rst b/docs/errors.rst
index 9f63c25..7fbd2f0 100644
--- a/docs/errors.rst
+++ b/docs/errors.rst
@@ -2,7 +2,7 @@
 Handling Validation Errors
 ==========================
 
-.. currentmodule:: jsonschema
+.. currentmodule:: jsonschema.exceptions
 
 When an invalid instance is encountered, a :exc:`ValidationError` will be
 raised or returned, depending on which method or function is used.
@@ -194,7 +194,7 @@ If you want to programmatically be able to query which properties or validators
 failed when validating a given instance, you probably will want to do so using
 :class:`ErrorTree` objects.
 
-.. autoclass:: ErrorTree
+.. autoclass:: jsonschema.validators.ErrorTree
     :members:
     :special-members:
     :exclude-members: __dict__,__weakref__
@@ -301,3 +301,87 @@ To summarize, each tree contains child trees that can be accessed by indexing
 the tree to get the corresponding child tree for a given index into the
 instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a
 dict, that maps the failed validator to the corresponding validation error.
+
+
+best_match and by_relevance
+---------------------------
+
+The :func:`best_match` function is a simple but useful function for attempting
+to guess the most relevant error in a given bunch.
+
+.. autofunction:: best_match
+
+    Try to find an error that appears to be the best match among given errors.
+
+    In general, errors that are higher up in the instance (i.e. for which
+    :attr:`ValidationError.path` is shorter) are considered better matches,
+    since they indicate "more" is wrong with the instance.
+
+.. doctest::
+
+        >>> from jsonschema import Draft4Validator
+        >>> from jsonschema.exceptions import best_match
+
+        >>> schema = {
+        ...     "type": "array",
+        ...     "minItems": 3,
+        ... }
+        >>> print(best_match(Draft4Validator(schema).iter_errors(11)).message)
+        11 is not of type 'array'
+
+    If the resulting match is either :validator:`oneOf` or :validator:`anyOf`,
+    the *opposite* assumption is made -- i.e. the deepest error is picked,
+    since these validators only need to match once, and any other errors may
+    not be relevant.
+
+    :argument iterable errors: the errors to select from. Do not provide a
+        mixture of errors from different validation attempts (i.e. from
+        different instances or schemas), since it won't produce sensical
+        output.
+    :argument callable key: the key to use when sorting errors. See
+        :func:`by_relevance` for more details (the default is to sort with the
+        defaults of that function).
+    :returns: the best matching error, or ``None`` if the iterable was empty
+
+    .. note::
+
+        This function is a heuristic. Its return value may change for a given
+        set of inputs from version to version if better heuristics are added.
+
+
+.. autofunction:: by_relevance
+
+    Create a key function that can be used to sort errors by relevance.
+
+    If you want to sort a bunch of errors entirely, you can use this function
+    to do so. Using the return value of this function as a key to e.g.
+    :func:`sorted` or :func:`max` will cause more relevant errors to be
+    considered greater than less relevant ones.
+
+.. doctest::
+
+    >>> schema = {
+    ...     "properties": {
+    ...         "name": {"type": "string"},
+    ...         "phones": {
+    ...             "properties": {
+    ...                 "home": {"type": "string"}
+    ...             },
+    ...         },
+    ...     },
+    ... }
+    >>> instance = {"name": 123, "phones": {"home": [123]}}
+    >>> errors = Draft4Validator(schema).iter_errors(instance)
+    >>> [
+    ...     e.path[-1]
+    ...     for e in sorted(errors, key=exceptions.by_relevance())
+    ... ]
+    ['home', 'name']
+
+    :argument set weak: a collection of validators to consider to be "weak". If
+        there are two errors at the same level of the instance and one is in
+        the set of weak validators, the other error will take priority. By
+        default, :validator:`anyOf` and :validator:`oneOf` are considered weak
+        validators and will be superceded by other same-level validation
+        errors.
+    :argument set strong a collection of validators to consider to be "strong".
diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py
index e94907d..26fafbe 100644
--- a/jsonschema/exceptions.py
+++ b/jsonschema/exceptions.py
@@ -1,4 +1,5 @@
 import collections
+import itertools
 import pprint
 import textwrap
 
@@ -6,6 +7,9 @@ from jsonschema import _utils
 from jsonschema.compat import PY3, iteritems
 
 
+WEAK_MATCHES = frozenset(["anyOf", "oneOf"])
+STRONG_MATCHES = frozenset()
+
 _unset = _utils.Unset()
 
 
@@ -24,25 +28,6 @@ class _Error(Exception):
         self.instance = instance
         self.schema = schema
 
-    @classmethod
-    def create_from(cls, other):
-        return cls(
-            message=other.message,
-            cause=other.cause,
-            context=other.context,
-            path=other.path,
-            schema_path=other.schema_path,
-            validator=other.validator,
-            validator_value=other.validator_value,
-            instance=other.instance,
-            schema=other.schema,
-        )
-
-    def _set(self, **kwargs):
-        for k, v in iteritems(kwargs):
-            if getattr(self, k) is _unset:
-                setattr(self, k, v)
-
     def __repr__(self):
         return "<%s: %r>" % (self.__class__.__name__, self.message)
 
@@ -79,6 +64,23 @@ class _Error(Exception):
     if PY3:
         __str__ = __unicode__
 
+    @classmethod
+    def create_from(cls, other):
+        return cls(**other._contents())
+
+    def _set(self, **kwargs):
+        for k, v in iteritems(kwargs):
+            if getattr(self, k) is _unset:
+                setattr(self, k, v)
+
+    def _contents(self):
+        return dict(
+            (attr, getattr(self, attr)) for attr in (
+                "message", "cause", "context", "path", "schema_path",
+                "validator", "validator_value", "instance", "schema"
+            )
+        )
+
 
 class ValidationError(_Error):
     pass
@@ -132,3 +134,22 @@ class FormatError(Exception):
 
     if PY3:
         __str__ = __unicode__
+
+
+def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
+    def relevance(error):
+        validator = error.validator
+        return -len(error.path), validator not in weak, validator in strong
+    return relevance
+
+
+def best_match(errors, key=by_relevance()):
+    errors = iter(errors)
+    best = next(errors, None)
+    if best is None:
+        return
+    best = max(itertools.chain([best], errors), key=key)
+
+    while best.context:
+        best = min(best.context, key=key)
+    return best
diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py
new file mode 100644
index 0000000..2014a64
--- /dev/null
+++ b/jsonschema/tests/test_exceptions.py
@@ -0,0 +1,210 @@
+from jsonschema import Draft4Validator, exceptions
+from jsonschema.tests.compat import unittest
+
+
+class TestBestMatch(unittest.TestCase):
+    def best_match(self, errors):
+        errors = list(errors)
+        best = exceptions.best_match(errors)
+        reversed_best = exceptions.best_match(reversed(errors))
+        self.assertEqual(
+            best,
+            reversed_best,
+            msg="Didn't return a consistent best match!\n"
+                "Got: {0}\n\nThen: {1}".format(best, reversed_best),
+        )
+        return best
+
+    def test_shallower_errors_are_better_matches(self):
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "minProperties" : 2,
+                        "properties" : {"bar" : {"type" : "object"}},
+                    }
+                }
+            }
+        )
+        best = self.best_match(validator.iter_errors({"foo" : {"bar" : []}}))
+        self.assertEqual(best.validator, "minProperties")
+
+    def test_oneOf_and_anyOf_are_weak_matches(self):
+        """
+        A property you *must* match is probably better than one you have to
+        match a part of.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "minProperties" : 2,
+                "anyOf" : [{"type" : "string"}, {"type" : "number"}],
+                "oneOf" : [{"type" : "string"}, {"type" : "number"}],
+            }
+        )
+        best = self.best_match(validator.iter_errors({}))
+        self.assertEqual(best.validator, "minProperties")
+
+    def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
+        """
+        If the most relevant error is an anyOf, then we traverse its context
+        and select the otherwise *least* relevant error, since in this case
+        that means the most specific, deep, error inside the instance.
+
+        I.e. since only one of the schemas must match, we look for the most
+        relevant one.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "anyOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+        self.assertEqual(best.validator_value, "array")
+
+    def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
+        """
+        If the most relevant error is an oneOf, then we traverse its context
+        and select the otherwise *least* relevant error, since in this case
+        that means the most specific, deep, error inside the instance.
+
+        I.e. since only one of the schemas must match, we look for the most
+        relevant one.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "oneOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+        self.assertEqual(best.validator_value, "array")
+
+    def test_if_the_most_relevant_error_is_allOf_it_is_traversed(self):
+        """
+        Now, if the error is allOf, we traverse but select the *most* relevant
+        error from the context, because all schemas here must match anyways.
+
+        """
+
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "allOf" : [
+                            {"type" : "string"},
+                            {"properties" : {"bar" : {"type" : "array"}}},
+                        ],
+                    },
+                },
+            },
+        )
+        best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+        self.assertEqual(best.validator_value, "string")
+
+    def test_nested_context_for_oneOf(self):
+        validator = Draft4Validator(
+            {
+                "properties" : {
+                    "foo" : {
+                        "oneOf" : [
+                            {"type" : "string"},
+                            {
+                                "oneOf" : [
+                                    {"type" : "string"},
+                                    {
+                                        "properties" : {
+                                            "bar" : {"type" : "array"}
+                                        },
+                                    },
+                                ],
+                            },
+                        ],
+                    },
+                },
+            },
+        )
+        best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+        self.assertEqual(best.validator_value, "array")
+
+    def test_one_error(self):
+        validator = Draft4Validator({"minProperties" : 2})
+        error, = validator.iter_errors({})
+        self.assertEqual(
+            exceptions.best_match(validator.iter_errors({})).validator,
+            "minProperties",
+        )
+
+    def test_no_errors(self):
+        validator = Draft4Validator({})
+        self.assertIsNone(exceptions.best_match(validator.iter_errors({})))
+
+
+class TestByRelevance(unittest.TestCase):
+    def test_short_paths_are_better_matches(self):
+        shallow = exceptions.ValidationError("Oh no!", path=["baz"])
+        deep = exceptions.ValidationError("Oh yes!", path=["foo", "bar"])
+        match = max([shallow, deep], key=exceptions.by_relevance())
+        self.assertIs(match, shallow)
+
+        match = max([deep, shallow], key=exceptions.by_relevance())
+        self.assertIs(match, shallow)
+
+    def test_global_errors_are_even_better_matches(self):
+        shallow = exceptions.ValidationError("Oh no!", path=[])
+        deep = exceptions.ValidationError("Oh yes!", path=["foo"])
+
+        errors = sorted([shallow, deep], key=exceptions.by_relevance())
+        self.assertEqual(
+            [list(error.path) for error in errors],
+            [["foo"], []],
+        )
+
+        errors = sorted([deep, shallow], key=exceptions.by_relevance())
+        self.assertEqual(
+            [list(error.path) for error in errors],
+            [["foo"], []],
+        )
+
+    def test_weak_validators_are_lower_priority(self):
+        weak = exceptions.ValidationError("Oh no!", path=[], validator="a")
+        normal = exceptions.ValidationError("Oh yes!", path=[], validator="b")
+
+        best_match = exceptions.by_relevance(weak="a")
+
+        match = max([weak, normal], key=best_match)
+        self.assertIs(match, normal)
+
+        match = max([normal, weak], key=best_match)
+        self.assertIs(match, normal)
+
+    def test_strong_validators_are_higher_priority(self):
+        weak = exceptions.ValidationError("Oh no!", path=[], validator="a")
+        normal = exceptions.ValidationError("Oh yes!", path=[], validator="b")
+        strong = exceptions.ValidationError("Oh fine!", path=[], validator="c")
+
+        best_match = exceptions.by_relevance(weak="a", strong="c")
+
+        match = max([weak, normal, strong], key=best_match)
+        self.assertIs(match, strong)
+
+        match = max([strong, normal, weak], key=best_match)
+        self.assertIs(match, strong)
author	Julian Berman <Julian@GrayVines.com>	2013-10-28 08:12:03 -0400
committer	Julian Berman <Julian@GrayVines.com>	2013-10-28 08:12:03 -0400
commit	bab6e745c2cafa77c9b585875d75e5634a810168 (patch)
tree	22879304455b674e4cb638ce738cebe1ade538aa
parent	a73d1efe56c922661afc6c451e1ce0284ba23260 (diff)
parent	4f171aa18393682f2a0a3a6a178717eda021dd2c (diff)
download	jsonschema-bab6e745c2cafa77c9b585875d75e5634a810168.tar.gz