diff options
author | Julian Berman <Julian@GrayVines.com> | 2013-10-28 08:12:03 -0400 |
---|---|---|
committer | Julian Berman <Julian@GrayVines.com> | 2013-10-28 08:12:03 -0400 |
commit | bab6e745c2cafa77c9b585875d75e5634a810168 (patch) | |
tree | 22879304455b674e4cb638ce738cebe1ade538aa | |
parent | a73d1efe56c922661afc6c451e1ce0284ba23260 (diff) | |
parent | 4f171aa18393682f2a0a3a6a178717eda021dd2c (diff) | |
download | jsonschema-bab6e745c2cafa77c9b585875d75e5634a810168.tar.gz |
Merge branch 'best-match'
* best-match:
And docs for the arguments.
And add by_relevance docs.
Remove __eq__, since it causes hashability issues on Py3 that I don't want to deal with at the moment.
Update best_match docs.
Different strategy that's a lot more robust.
Use ._contents in create_from
Initial stab at best_match.
Sort errors based on their paths.
-rw-r--r-- | docs/errors.rst | 88 | ||||
-rw-r--r-- | jsonschema/exceptions.py | 59 | ||||
-rw-r--r-- | jsonschema/tests/test_exceptions.py | 210 |
3 files changed, 336 insertions, 21 deletions
diff --git a/docs/errors.rst b/docs/errors.rst index 9f63c25..7fbd2f0 100644 --- a/docs/errors.rst +++ b/docs/errors.rst @@ -2,7 +2,7 @@ Handling Validation Errors ========================== -.. currentmodule:: jsonschema +.. currentmodule:: jsonschema.exceptions When an invalid instance is encountered, a :exc:`ValidationError` will be raised or returned, depending on which method or function is used. @@ -194,7 +194,7 @@ If you want to programmatically be able to query which properties or validators failed when validating a given instance, you probably will want to do so using :class:`ErrorTree` objects. -.. autoclass:: ErrorTree +.. autoclass:: jsonschema.validators.ErrorTree :members: :special-members: :exclude-members: __dict__,__weakref__ @@ -301,3 +301,87 @@ To summarize, each tree contains child trees that can be accessed by indexing the tree to get the corresponding child tree for a given index into the instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a dict, that maps the failed validator to the corresponding validation error. + + +best_match and by_relevance +--------------------------- + +The :func:`best_match` function is a simple but useful function for attempting +to guess the most relevant error in a given bunch. + +.. autofunction:: best_match + + Try to find an error that appears to be the best match among given errors. + + In general, errors that are higher up in the instance (i.e. for which + :attr:`ValidationError.path` is shorter) are considered better matches, + since they indicate "more" is wrong with the instance. + +.. doctest:: + + >>> from jsonschema import Draft4Validator + >>> from jsonschema.exceptions import best_match + + >>> schema = { + ... "type": "array", + ... "minItems": 3, + ... } + >>> print(best_match(Draft4Validator(schema).iter_errors(11)).message) + 11 is not of type 'array' + + If the resulting match is either :validator:`oneOf` or :validator:`anyOf`, + the *opposite* assumption is made -- i.e. the deepest error is picked, + since these validators only need to match once, and any other errors may + not be relevant. + + :argument iterable errors: the errors to select from. Do not provide a + mixture of errors from different validation attempts (i.e. from + different instances or schemas), since it won't produce sensical + output. + :argument callable key: the key to use when sorting errors. See + :func:`by_relevance` for more details (the default is to sort with the + defaults of that function). + :returns: the best matching error, or ``None`` if the iterable was empty + + .. note:: + + This function is a heuristic. Its return value may change for a given + set of inputs from version to version if better heuristics are added. + + +.. autofunction:: by_relevance + + Create a key function that can be used to sort errors by relevance. + + If you want to sort a bunch of errors entirely, you can use this function + to do so. Using the return value of this function as a key to e.g. + :func:`sorted` or :func:`max` will cause more relevant errors to be + considered greater than less relevant ones. + +.. doctest:: + + >>> schema = { + ... "properties": { + ... "name": {"type": "string"}, + ... "phones": { + ... "properties": { + ... "home": {"type": "string"} + ... }, + ... }, + ... }, + ... } + >>> instance = {"name": 123, "phones": {"home": [123]}} + >>> errors = Draft4Validator(schema).iter_errors(instance) + >>> [ + ... e.path[-1] + ... for e in sorted(errors, key=exceptions.by_relevance()) + ... ] + ['home', 'name'] + + :argument set weak: a collection of validators to consider to be "weak". If + there are two errors at the same level of the instance and one is in + the set of weak validators, the other error will take priority. By + default, :validator:`anyOf` and :validator:`oneOf` are considered weak + validators and will be superceded by other same-level validation + errors. + :argument set strong a collection of validators to consider to be "strong". diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py index e94907d..26fafbe 100644 --- a/jsonschema/exceptions.py +++ b/jsonschema/exceptions.py @@ -1,4 +1,5 @@ import collections +import itertools import pprint import textwrap @@ -6,6 +7,9 @@ from jsonschema import _utils from jsonschema.compat import PY3, iteritems +WEAK_MATCHES = frozenset(["anyOf", "oneOf"]) +STRONG_MATCHES = frozenset() + _unset = _utils.Unset() @@ -24,25 +28,6 @@ class _Error(Exception): self.instance = instance self.schema = schema - @classmethod - def create_from(cls, other): - return cls( - message=other.message, - cause=other.cause, - context=other.context, - path=other.path, - schema_path=other.schema_path, - validator=other.validator, - validator_value=other.validator_value, - instance=other.instance, - schema=other.schema, - ) - - def _set(self, **kwargs): - for k, v in iteritems(kwargs): - if getattr(self, k) is _unset: - setattr(self, k, v) - def __repr__(self): return "<%s: %r>" % (self.__class__.__name__, self.message) @@ -79,6 +64,23 @@ class _Error(Exception): if PY3: __str__ = __unicode__ + @classmethod + def create_from(cls, other): + return cls(**other._contents()) + + def _set(self, **kwargs): + for k, v in iteritems(kwargs): + if getattr(self, k) is _unset: + setattr(self, k, v) + + def _contents(self): + return dict( + (attr, getattr(self, attr)) for attr in ( + "message", "cause", "context", "path", "schema_path", + "validator", "validator_value", "instance", "schema" + ) + ) + class ValidationError(_Error): pass @@ -132,3 +134,22 @@ class FormatError(Exception): if PY3: __str__ = __unicode__ + + +def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES): + def relevance(error): + validator = error.validator + return -len(error.path), validator not in weak, validator in strong + return relevance + + +def best_match(errors, key=by_relevance()): + errors = iter(errors) + best = next(errors, None) + if best is None: + return + best = max(itertools.chain([best], errors), key=key) + + while best.context: + best = min(best.context, key=key) + return best diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py new file mode 100644 index 0000000..2014a64 --- /dev/null +++ b/jsonschema/tests/test_exceptions.py @@ -0,0 +1,210 @@ +from jsonschema import Draft4Validator, exceptions +from jsonschema.tests.compat import unittest + + +class TestBestMatch(unittest.TestCase): + def best_match(self, errors): + errors = list(errors) + best = exceptions.best_match(errors) + reversed_best = exceptions.best_match(reversed(errors)) + self.assertEqual( + best, + reversed_best, + msg="Didn't return a consistent best match!\n" + "Got: {0}\n\nThen: {1}".format(best, reversed_best), + ) + return best + + def test_shallower_errors_are_better_matches(self): + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "minProperties" : 2, + "properties" : {"bar" : {"type" : "object"}}, + } + } + } + ) + best = self.best_match(validator.iter_errors({"foo" : {"bar" : []}})) + self.assertEqual(best.validator, "minProperties") + + def test_oneOf_and_anyOf_are_weak_matches(self): + """ + A property you *must* match is probably better than one you have to + match a part of. + + """ + + validator = Draft4Validator( + { + "minProperties" : 2, + "anyOf" : [{"type" : "string"}, {"type" : "number"}], + "oneOf" : [{"type" : "string"}, {"type" : "number"}], + } + ) + best = self.best_match(validator.iter_errors({})) + self.assertEqual(best.validator, "minProperties") + + def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self): + """ + If the most relevant error is an anyOf, then we traverse its context + and select the otherwise *least* relevant error, since in this case + that means the most specific, deep, error inside the instance. + + I.e. since only one of the schemas must match, we look for the most + relevant one. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "anyOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}})) + self.assertEqual(best.validator_value, "array") + + def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self): + """ + If the most relevant error is an oneOf, then we traverse its context + and select the otherwise *least* relevant error, since in this case + that means the most specific, deep, error inside the instance. + + I.e. since only one of the schemas must match, we look for the most + relevant one. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "oneOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}})) + self.assertEqual(best.validator_value, "array") + + def test_if_the_most_relevant_error_is_allOf_it_is_traversed(self): + """ + Now, if the error is allOf, we traverse but select the *most* relevant + error from the context, because all schemas here must match anyways. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "allOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}})) + self.assertEqual(best.validator_value, "string") + + def test_nested_context_for_oneOf(self): + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "oneOf" : [ + {"type" : "string"}, + { + "oneOf" : [ + {"type" : "string"}, + { + "properties" : { + "bar" : {"type" : "array"} + }, + }, + ], + }, + ], + }, + }, + }, + ) + best = self.best_match(validator.iter_errors({"foo" : {"bar" : 12}})) + self.assertEqual(best.validator_value, "array") + + def test_one_error(self): + validator = Draft4Validator({"minProperties" : 2}) + error, = validator.iter_errors({}) + self.assertEqual( + exceptions.best_match(validator.iter_errors({})).validator, + "minProperties", + ) + + def test_no_errors(self): + validator = Draft4Validator({}) + self.assertIsNone(exceptions.best_match(validator.iter_errors({}))) + + +class TestByRelevance(unittest.TestCase): + def test_short_paths_are_better_matches(self): + shallow = exceptions.ValidationError("Oh no!", path=["baz"]) + deep = exceptions.ValidationError("Oh yes!", path=["foo", "bar"]) + match = max([shallow, deep], key=exceptions.by_relevance()) + self.assertIs(match, shallow) + + match = max([deep, shallow], key=exceptions.by_relevance()) + self.assertIs(match, shallow) + + def test_global_errors_are_even_better_matches(self): + shallow = exceptions.ValidationError("Oh no!", path=[]) + deep = exceptions.ValidationError("Oh yes!", path=["foo"]) + + errors = sorted([shallow, deep], key=exceptions.by_relevance()) + self.assertEqual( + [list(error.path) for error in errors], + [["foo"], []], + ) + + errors = sorted([deep, shallow], key=exceptions.by_relevance()) + self.assertEqual( + [list(error.path) for error in errors], + [["foo"], []], + ) + + def test_weak_validators_are_lower_priority(self): + weak = exceptions.ValidationError("Oh no!", path=[], validator="a") + normal = exceptions.ValidationError("Oh yes!", path=[], validator="b") + + best_match = exceptions.by_relevance(weak="a") + + match = max([weak, normal], key=best_match) + self.assertIs(match, normal) + + match = max([normal, weak], key=best_match) + self.assertIs(match, normal) + + def test_strong_validators_are_higher_priority(self): + weak = exceptions.ValidationError("Oh no!", path=[], validator="a") + normal = exceptions.ValidationError("Oh yes!", path=[], validator="b") + strong = exceptions.ValidationError("Oh fine!", path=[], validator="c") + + best_match = exceptions.by_relevance(weak="a", strong="c") + + match = max([weak, normal, strong], key=best_match) + self.assertIs(match, strong) + + match = max([strong, normal, weak], key=best_match) + self.assertIs(match, strong) |