From 8199901c3b434366edb8b87d5b56c65ca624bcca Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Sun, 22 Sep 2013 18:37:45 -0400 Subject: Initial stab at best_match. --- docs/errors.rst | 31 ++++++++ jsonschema/exceptions.py | 33 +++++++- jsonschema/tests/test_exceptions.py | 152 +++++++++++++++++++++++++++++++++++- 3 files changed, 213 insertions(+), 3 deletions(-) diff --git a/docs/errors.rst b/docs/errors.rst index 9f63c25..c3cf096 100644 --- a/docs/errors.rst +++ b/docs/errors.rst @@ -301,3 +301,34 @@ To summarize, each tree contains child trees that can be accessed by indexing the tree to get the corresponding child tree for a given index into the instance. Each tree and child has a :attr:`~ErrorTree.errors` attribute, a dict, that maps the failed validator to the corresponding validation error. + + +best_match +---------- + +The :func:`best_match` function is a simple but useful function for attempting +to guess the most relevant error in a given bunch. + +.. autofunction:: best_match + + Try to find an error that appears to be the best match among given errors. + + In general, errors that are higher up in the instance (i.e. for which + :attr:`ValidationError.path` is shorter) are considered better matches, + since they indicate "more" is wrong with the instance. + + If the resulting match is either :validator:`oneOf` or :validator:`anyOf`, + the *opposite* assumption is made -- i.e. the deepest error is picked, + since these validators only need to match once, and any other errors may + not be relevant. + + :argument iterable errors: the errors to select from. Do not provide a + mixture of errors from different validation attempts (i.e. from + different instances or schemas), since it won't produce sensical + output. + :returns: the best matching error, or ``None`` if the iterable was empty + + .. note:: + + This function is a heuristic. Its return value may change for a given + set of inputs from version to version if better heuristics are added. diff --git a/jsonschema/exceptions.py b/jsonschema/exceptions.py index fe592e2..5c52f41 100644 --- a/jsonschema/exceptions.py +++ b/jsonschema/exceptions.py @@ -1,4 +1,5 @@ import collections +import itertools import pprint import textwrap @@ -24,6 +25,11 @@ class _Error(Exception): self.instance = instance self.schema = schema + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self._contents() == other._contents() + def __lt__(self, other): if not isinstance(other, self.__class__): # On Py2 Python will "helpfully" make this succeed. So be more @@ -36,7 +42,14 @@ class _Error(Exception): ) raise TypeError(message) return NotImplemented - return self.path < other.path + + is_deeper = len(self.path) > len(other.path) + is_weak_matcher = self.validator in ("anyOf", "oneOf") + other_is_weak_matcher = other.validator in ("anyOf", "oneOf") + return is_deeper or is_weak_matcher > other_is_weak_matcher + + def __ne__(self, other): + return not self == other def __repr__(self): return "<%s: %r>" % (self.__class__.__name__, self.message) @@ -93,6 +106,14 @@ class _Error(Exception): if getattr(self, k) is _unset: setattr(self, k, v) + def _contents(self): + return dict( + (attr, getattr(self, attr)) for attr in ( + "message", "cause", "context", "path", "schema_path", + "validator", "validator_value", "instance", "schema" + ) + ) + class ValidationError(_Error): pass @@ -146,3 +167,13 @@ class FormatError(Exception): if PY3: __str__ = __unicode__ + + +def best_match(errors): + first = next(iter(errors), None) + if first is None: + return + best = max(itertools.chain([first], errors)) + while best.context: + best = min(best.context) + return best diff --git a/jsonschema/tests/test_exceptions.py b/jsonschema/tests/test_exceptions.py index b7ecaae..92976b3 100644 --- a/jsonschema/tests/test_exceptions.py +++ b/jsonschema/tests/test_exceptions.py @@ -17,7 +17,7 @@ class TestValidationErrorSorting(unittest.TestCase): errors = sorted(validator.iter_errors({"foo" : {"bar" : []}})) self.assertEqual( [list(error.path) for error in errors], - [["foo"], ["foo", "bar"]], + [["foo", "bar"], ["foo"]], ) def test_global_errors_are_even_better_matches(self): @@ -30,7 +30,25 @@ class TestValidationErrorSorting(unittest.TestCase): errors = sorted(validator.iter_errors({"foo" : {"bar" : []}})) self.assertEqual( [list(error.path) for error in errors], - [[], ["foo"]], + [["foo"], []], + ) + + def test_oneOf_and_anyOf_are_weak_matches(self): + """ + A property you *must* match is probably better than one you have to + match a part of. + + """ + + validator = Draft4Validator( + { + "minProperties" : 2, + "oneOf" : [{"type" : "string"}, {"type" : "number"}], + } + ) + errors = sorted(validator.iter_errors({})) + self.assertEqual( + [error.validator for error in errors], ["oneOf", "minProperties"], ) def test_cannot_sort_errors_of_mixed_types(self): @@ -38,3 +56,133 @@ class TestValidationErrorSorting(unittest.TestCase): v = exceptions.ValidationError("Oh", instance=3) s = exceptions.SchemaError("No!", instance=3) v < s + + +class TestBestMatch(unittest.TestCase): + def test_for_errors_without_context_it_returns_the_max(self): + """ + The ``max`` will be the error which is most "shallow" in the instance. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "minProperties" : 2, + "properties" : {"bar" : {"type" : "object"}}, + }, + }, + } + ) + errors = sorted(validator.iter_errors({"foo" : {"bar" : []}})) + self.assertIs(exceptions.best_match(errors), errors[-1]) + + def test_context_for_anyOf(self): + """ + For the anyOf validator, we use the min, to assume the least. + + Other errors are not necessarily relevant, since only one needs to + match. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "anyOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + errors = validator.iter_errors({"foo" : {"bar" : 12}}) + best = exceptions.best_match(errors) + self.assertEqual(best.validator_value, "array") + + def test_context_for_oneOf(self): + """ + For the oneOf validator, we use the min, to assume the least. + + Other errors are not necessarily relevant, since only one needs to + match. + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "oneOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + errors = validator.iter_errors({"foo" : {"bar" : 12}}) + best = exceptions.best_match(errors) + self.assertEqual(best.validator_value, "array") + + def test_context_for_allOf(self): + """ + allOf just yields all the errors globally, so each should be considered + + """ + + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "allOf" : [ + {"type" : "string"}, + {"properties" : {"bar" : {"type" : "array"}}}, + ], + }, + }, + }, + ) + errors = validator.iter_errors({"foo" : {"bar" : 12}}) + best = exceptions.best_match(errors) + self.assertEqual(best.validator_value, "string") + + def test_nested_context_for_oneOf(self): + validator = Draft4Validator( + { + "properties" : { + "foo" : { + "oneOf" : [ + {"type" : "string"}, + { + "oneOf" : [ + {"type" : "string"}, + { + "properties" : { + "bar" : {"type" : "array"} + }, + }, + ], + }, + ], + }, + }, + }, + ) + errors = validator.iter_errors({"foo" : {"bar" : 12}}) + best = exceptions.best_match(errors) + self.assertEqual(best.validator_value, "array") + + def test_one_error(self): + validator = Draft4Validator({"minProperties" : 2}) + error, = validator.iter_errors({}) + self.assertEqual( + exceptions.best_match(validator.iter_errors({})), error, + ) + + def test_no_errors(self): + validator = Draft4Validator({}) + self.assertIsNone(exceptions.best_match(validator.iter_errors({}))) -- cgit v1.2.1