summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Berman <Julian@GrayVines.com>2013-10-28 08:12:03 -0400
committerJulian Berman <Julian@GrayVines.com>2013-10-28 08:12:03 -0400
commitbab6e745c2cafa77c9b585875d75e5634a810168 (patch)
tree22879304455b674e4cb638ce738cebe1ade538aa
parenta73d1efe56c922661afc6c451e1ce0284ba23260 (diff)
parent4f171aa18393682f2a0a3a6a178717eda021dd2c (diff)
downloadjsonschema-bab6e745c2cafa77c9b585875d75e5634a810168.tar.gz
Merge branch 'best-match'
* best-match: And docs for the arguments. And add by_relevance docs. Remove __eq__, since it causes hashability issues on Py3 that I don't want to deal with at the moment. Update best_match docs. Different strategy that's a lot more robust. Use ._contents in create_from Initial stab at best_match. Sort errors based on their paths.
-rw-r--r--docs/errors.rst88
-rw-r--r--jsonschema/exceptions.py59
-rw-r--r--jsonschema/tests/test_exceptions.py210
3 files changed, 336 insertions, 21 deletions
diff --git a/docs/errors.rst b/docs/errors.rst
index 9f63c25..7fbd2f0 100644
--- a/docs/errors.rst
+++ b/docs/errors.rst
@@ -2,7 +2,7 @@
Handling Validation Errors
==========================
-.. currentmodule:: jsonschema
+.. currentmodule:: jsonschema.exceptions
When an invalid instance is encountered, a :exc:`ValidationError` will be
raised or returned, depending on which method or function is used.
@@ -194,7 +194,7 @@ If you want to programmatically be able to query which properties or validators
failed when validating a given instance, you probably will want to do so using
:class:`ErrorTree` objects.
-.. autoclass:: ErrorTree
+.. autoclass:: jsonschema.validators.ErrorTree
:members:
:special-members:
:exclude-members: __dict__,__weakref__
@@ -301,3 +301,87 @@ To summarize, each tree contains child trees that can be accessed by indexing
the tree to get the corresponding child tree for a given index into the
instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a
dict, that maps the failed validator to the corresponding validation error.
+
+
+best_match and by_relevance
+---------------------------
+
+The :func:`best_match` function is a simple but useful function for attempting
+to guess the most relevant error in a given bunch.
+
+.. autofunction:: best_match
+
+ Try to find an error that appears to be the best match among given errors.
+
+ In general, errors that are higher up in the instance (i.e. for which
+ :attr:`ValidationError.path` is shorter) are considered better matches,
+ since they indicate "more" is wrong with the instance.
+
+.. doctest::
+
+ >>> from jsonschema import Draft4Validator
+ >>> from jsonschema.exceptions import best_match
+
+ >>> schema = {
+ ... "type": "array",
+ ... "minItems": 3,
+ ... }
+ >>> print(best_match(Draft4Validator(schema).iter_errors(11)).message)
+ 11 is not of type 'array'
+
+ If the resulting match is either :validator:`oneOf` or :validator:`anyOf`,
+ the *opposite* assumption is made -- i.e. the deepest error is picked,
+ since these validators only need to match once, and any other errors may
+ not be relevant.
+
+ :argument iterable errors: the errors to select from. Do not provide a
+ mixture of errors from different validation attempts (i.e. from
+ different instances or schemas), since it won't produce sensical
+ output.
+ :argument callable key: the key to use when sorting errors. See
+ :func:`by_relevance` for more details (the default is to sort with the
+ defaults of that function).
+ :returns: the best matching error, or ``None`` if the iterable was empty
+
+ .. note::
+
+ This function is a heuristic. Its return value may change for a given
+ set of inputs from version to version if better heuristics are added.
+
+
+.. autofunction:: by_relevance
+
+ Create a key function that can be used to sort errors by relevance.
+
+ If you want to sort a bunch of errors entirely, you can use this function
+ to do so. Using the return value of this function as a key to e.g.
+ :func:`sorted` or :func:`max` will cause more relevant errors to be
+ considered greater than less relevant ones.
+
+.. doctest::
+
+ >>> schema = {
+ ... "properties": {
+ ... "name": {"type": "string"},
+ ... "phones": {
+ ... "properties": {
+ ... "home": {"type": "string"}
+ ... },
+ ... },
+ ... },
+ ... }
+ >>> instance = {"name": 123, "phones": {"home": [123]}}
+ >>> errors = Draft4Validator(schema).iter_errors(instance)
+ >>> [
+ ... e.path[-1]
+ ... for e in sorted(errors, key=exceptions.by_relevance())
+ ... ]
+ ['home', 'name']
+
+ :argument set weak: a collection of validators to consider to be "weak". If
+ there are two errors at the same level of the instance and one is in
+ the set of weak validators, the other error will take priority. By
+ default, :validator:`anyOf` and :validator:`oneOf` are considered weak
+ validators and will be superceded by other same-level validation
+ errors.
+ :argument set strong a collection of validators to consider to be "strong".
diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py
index e94907d..26fafbe 100644
--- a/jsonschema/exceptions.py
+++ b/jsonschema/exceptions.py
@@ -1,4 +1,5 @@
import collections
+import itertools
import pprint
import textwrap
@@ -6,6 +7,9 @@ from jsonschema import _utils
from jsonschema.compat import PY3, iteritems
+WEAK_MATCHES = frozenset(["anyOf", "oneOf"])
+STRONG_MATCHES = frozenset()
+
_unset = _utils.Unset()
@@ -24,25 +28,6 @@ class _Error(Exception):
self.instance = instance
self.schema = schema
- @classmethod
- def create_from(cls, other):
- return cls(
- message=other.message,
- cause=other.cause,
- context=other.context,
- path=other.path,
- schema_path=other.schema_path,
- validator=other.validator,
- validator_value=other.validator_value,
- instance=other.instance,
- schema=other.schema,
- )
-
- def _set(self, **kwargs):
- for k, v in iteritems(kwargs):
- if getattr(self, k) is _unset:
- setattr(self, k, v)
-
def __repr__(self):
return "<%s: %r>" % (self.__class__.__name__, self.message)
@@ -79,6 +64,23 @@ class _Error(Exception):
if PY3:
__str__ = __unicode__
+ @classmethod
+ def create_from(cls, other):
+ return cls(**other._contents())
+
+ def _set(self, **kwargs):
+ for k, v in iteritems(kwargs):
+ if getattr(self, k) is _unset:
+ setattr(self, k, v)
+
+ def _contents(self):
+ return dict(
+ (attr, getattr(self, attr)) for attr in (
+ "message", "cause", "context", "path", "schema_path",
+ "validator", "validator_value", "instance", "schema"
+ )
+ )
+
class ValidationError(_Error):
pass
@@ -132,3 +134,22 @@ class FormatError(Exception):
if PY3:
__str__ = __unicode__
+
+
+def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
+ def relevance(error):
+ validator = error.validator
+ return -len(error.path), validator not in weak, validator in strong
+ return relevance
+
+
+def best_match(errors, key=by_relevance()):
+ errors = iter(errors)
+ best = next(errors, None)
+ if best is None:
+ return
+ best = max(itertools.chain([best], errors), key=key)
+
+ while best.context:
+ best = min(best.context, key=key)
+ return best
diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py
new file mode 100644
index 0000000..2014a64
--- /dev/null
+++ b/jsonschema/tests/test_exceptions.py
@@ -0,0 +1,210 @@
+from jsonschema import Draft4Validator, exceptions
+from jsonschema.tests.compat import unittest
+
+
+class TestBestMatch(unittest.TestCase):
+ def best_match(self, errors):
+ errors = list(errors)
+ best = exceptions.best_match(errors)
+ reversed_best = exceptions.best_match(reversed(errors))
+ self.assertEqual(
+ best,
+ reversed_best,
+ msg="Didn't return a consistent best match!\n"
+ "Got: {0}\n\nThen: {1}".format(best, reversed_best),
+ )
+ return best
+
+ def test_shallower_errors_are_better_matches(self):
+ validator = Draft4Validator(
+ {
+ "properties" : {
+ "foo" : {
+ "minProperties" : 2,
+ "properties" : {"bar" : {"type" : "object"}},
+ }
+ }
+ }
+ )
+ best = self.best_match(validator.iter_errors({"foo" : {"bar" : []}}))
+ self.assertEqual(best.validator, "minProperties")
+
+ def test_oneOf_and_anyOf_are_weak_matches(self):
+ """
+ A property you *must* match is probably better than one you have to
+ match a part of.
+
+ """
+
+ validator = Draft4Validator(
+ {
+ "minProperties" : 2,
+ "anyOf" : [{"type" : "string"}, {"type" : "number"}],
+ "oneOf" : [{"type" : "string"}, {"type" : "number"}],
+ }
+ )
+ best = self.best_match(validator.iter_errors({}))
+ self.assertEqual(best.validator, "minProperties")
+
+ def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
+ """
+ If the most relevant error is an anyOf, then we traverse its context
+ and select the otherwise *least* relevant error, since in this case
+ that means the most specific, deep, error inside the instance.
+
+ I.e. since only one of the schemas must match, we look for the most
+ relevant one.
+
+ """
+
+ validator = Draft4Validator(
+ {
+ "properties" : {
+ "foo" : {
+ "anyOf" : [
+ {"type" : "string"},
+ {"properties" : {"bar" : {"type" : "array"}}},
+ ],
+ },
+ },
+ },
+ )
+ best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+ self.assertEqual(best.validator_value, "array")
+
+ def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
+ """
+ If the most relevant error is an oneOf, then we traverse its context
+ and select the otherwise *least* relevant error, since in this case
+ that means the most specific, deep, error inside the instance.
+
+ I.e. since only one of the schemas must match, we look for the most
+ relevant one.
+
+ """
+
+ validator = Draft4Validator(
+ {
+ "properties" : {
+ "foo" : {
+ "oneOf" : [
+ {"type" : "string"},
+ {"properties" : {"bar" : {"type" : "array"}}},
+ ],
+ },
+ },
+ },
+ )
+ best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+ self.assertEqual(best.validator_value, "array")
+
+ def test_if_the_most_relevant_error_is_allOf_it_is_traversed(self):
+ """
+ Now, if the error is allOf, we traverse but select the *most* relevant
+ error from the context, because all schemas here must match anyways.
+
+ """
+
+ validator = Draft4Validator(
+ {
+ "properties" : {
+ "foo" : {
+ "allOf" : [
+ {"type" : "string"},
+ {"properties" : {"bar" : {"type" : "array"}}},
+ ],
+ },
+ },
+ },
+ )
+ best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+ self.assertEqual(best.validator_value, "string")
+
+ def test_nested_context_for_oneOf(self):
+ validator = Draft4Validator(
+ {
+ "properties" : {
+ "foo" : {
+ "oneOf" : [
+ {"type" : "string"},
+ {
+ "oneOf" : [
+ {"type" : "string"},
+ {
+ "properties" : {
+ "bar" : {"type" : "array"}
+ },
+ },
+ ],
+ },
+ ],
+ },
+ },
+ },
+ )
+ best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}}))
+ self.assertEqual(best.validator_value, "array")
+
+ def test_one_error(self):
+ validator = Draft4Validator({"minProperties" : 2})
+ error, = validator.iter_errors({})
+ self.assertEqual(
+ exceptions.best_match(validator.iter_errors({})).validator,
+ "minProperties",
+ )
+
+ def test_no_errors(self):
+ validator = Draft4Validator({})
+ self.assertIsNone(exceptions.best_match(validator.iter_errors({})))
+
+
+class TestByRelevance(unittest.TestCase):
+ def test_short_paths_are_better_matches(self):
+ shallow = exceptions.ValidationError("Oh no!", path=["baz"])
+ deep = exceptions.ValidationError("Oh yes!", path=["foo", "bar"])
+ match = max([shallow, deep], key=exceptions.by_relevance())
+ self.assertIs(match, shallow)
+
+ match = max([deep, shallow], key=exceptions.by_relevance())
+ self.assertIs(match, shallow)
+
+ def test_global_errors_are_even_better_matches(self):
+ shallow = exceptions.ValidationError("Oh no!", path=[])
+ deep = exceptions.ValidationError("Oh yes!", path=["foo"])
+
+ errors = sorted([shallow, deep], key=exceptions.by_relevance())
+ self.assertEqual(
+ [list(error.path) for error in errors],
+ [["foo"], []],
+ )
+
+ errors = sorted([deep, shallow], key=exceptions.by_relevance())
+ self.assertEqual(
+ [list(error.path) for error in errors],
+ [["foo"], []],
+ )
+
+ def test_weak_validators_are_lower_priority(self):
+ weak = exceptions.ValidationError("Oh no!", path=[], validator="a")
+ normal = exceptions.ValidationError("Oh yes!", path=[], validator="b")
+
+ best_match = exceptions.by_relevance(weak="a")
+
+ match = max([weak, normal], key=best_match)
+ self.assertIs(match, normal)
+
+ match = max([normal, weak], key=best_match)
+ self.assertIs(match, normal)
+
+ def test_strong_validators_are_higher_priority(self):
+ weak = exceptions.ValidationError("Oh no!", path=[], validator="a")
+ normal = exceptions.ValidationError("Oh yes!", path=[], validator="b")
+ strong = exceptions.ValidationError("Oh fine!", path=[], validator="c")
+
+ best_match = exceptions.by_relevance(weak="a", strong="c")
+
+ match = max([weak, normal, strong], key=best_match)
+ self.assertIs(match, strong)
+
+ match = max([strong, normal, weak], key=best_match)
+ self.assertIs(match, strong)