summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2018-09-02 13:00:12 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2018-09-02 13:01:21 -0700
commitd28979bc0fa16764a8bfd13aa72f94dfb584f77c (patch)
treecb9a83793e66bfd4194af9e7895ebbf7be155efa
parent1b0cd880ca7ebe40a49c182b277ef4cc8de1ec6b (diff)
downloadnatsort-d28979bc0fa16764a8bfd13aa72f94dfb584f77c.tar.gz
Refactor test_parse_string_function.py.
Much unneeded duplication has been removed. Most of the work in this file had been going into validating the regular expressions - this will be added elsewhere in a future commit. slow_splitters.py has been removed. Docs have been updated to remove the reference to slow_splitters.py.
-rw-r--r--docs/source/howitworks.rst22
-rw-r--r--test_natsort/slow_splitters.py429
-rw-r--r--test_natsort/test_parse_string_function.py280
3 files changed, 66 insertions, 665 deletions
diff --git a/docs/source/howitworks.rst b/docs/source/howitworks.rst
index 59a9257..2415a91 100644
--- a/docs/source/howitworks.rst
+++ b/docs/source/howitworks.rst
@@ -147,28 +147,6 @@ Starting with :mod:`natsort` version 4.0.0 the default number definition was
changed to an *unsigned integer* which satisfies the "least astonishment" principle, and
I have not heard a complaint since.
-.. admonition:: Wouldn't itertools.groupby work as well as regex to split strings?
-
- You *could* do it using something like :func:`itertools.groupby`, but it is not clearer
- nor more concise, *I promise*.
-
- .. code-block:: python
-
- >>> import itertools
- >>> import operator
- >>> list(map(''.join, map(operator.itemgetter(1), itertools.groupby('2 ft 11 in', str.isdigit))))
- ['2', ' ft ', '11', ' in']
-
- OK, but let's assume for a moment that you *really* like itertools and think the above
- is fine. We still have lost a lot of flexibility here because of the :meth:`str.isdigit`
- call which makes this method non-optimal; with a regular expression one can change
- the pattern string and split on much more complicated patterns, but with
- :func:`itertools.groupby` it becomes *much* more complicated to change it up;
- I implemented this strategy `as part of my testing`_ and it is anything but clear an concise.
-
- Not to mention it's *way* slower than regex. Just the simple example above (unsigned integers)
- is 50% slower than regex...
-
Coercing Strings Containing Numbers Into Numbers
++++++++++++++++++++++++++++++++++++++++++++++++
diff --git a/test_natsort/slow_splitters.py b/test_natsort/slow_splitters.py
deleted file mode 100644
index d9154e4..0000000
--- a/test_natsort/slow_splitters.py
+++ /dev/null
@@ -1,429 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Alternate versions of the splitting functions for testing."""
-from __future__ import unicode_literals
-
-import collections
-import functools
-import itertools
-import unicodedata
-
-from natsort.compat.py23 import PY_VERSION, py23_zip
-from natsort.unicode_numbers import decimals
-
-if PY_VERSION >= 3.0:
- long = int
-
-triple_none = None, None, None
-_sentinel = object()
-SplitElement = collections.namedtuple("SplitElement", ["isnum", "val", "isuni"])
-
-
-def int_splitter(iterable, signed, sep):
- """Alternate (slow) method to split a string into numbers."""
- iterable = unicodedata.normalize("NFD", iterable)
- split_by_decimal = itertools.groupby(iterable, lambda a: a.isdigit())
- split_by_decimal = refine_split_grouping(split_by_decimal)
- split = int_splitter_iter(split_by_decimal, signed)
- split = sep_inserter(split, sep)
- return tuple(add_leading_space_if_first_is_num(split, sep))
-
-
-def float_splitter(iterable, signed, exp, sep):
- """Alternate (slow) method to split a string into numbers."""
-
- def number_tester(x):
- return x.isdecimal() or unicodedata.numeric(x, None) is not None
-
- iterable = unicodedata.normalize("NFD", iterable)
- split_by_decimal = itertools.groupby(iterable, number_tester)
- split_by_decimal = peekable(refine_split_grouping(split_by_decimal))
- split = float_splitter_iter(split_by_decimal, signed, exp)
- split = sep_inserter(split, sep)
- return tuple(add_leading_space_if_first_is_num(split, sep))
-
-
-def refine_split_grouping(iterable):
- """Combines lists into strings, and separates unicode numbers from ASCII"""
- for isnum, values in iterable:
- values = list(values)
- # Further refine numbers into unicode and ASCII numeric characters.
- if isnum:
- num_grouped = group_unicode_and_ascii_numbers(values)
- for isuni, num_values in num_grouped:
- # If unicode, return one character at a time.
- if isuni:
- for u in num_values:
- yield SplitElement(True, u, True)
- # If ASCII, combine into a single multicharacter number.
- else:
- val = "".join(num_values)
- yield SplitElement(True, val, False)
-
- else:
- # If non-numeric, combine into a single string.
- val = "".join(values)
- yield SplitElement(False, val, False)
-
-
-def group_unicode_and_ascii_numbers(
- iterable, ascii_digits=frozenset(decimals + "0123456789")
-):
- """
- Use groupby to group ASCII and unicode numeric characters.
- Assumes all input is already all numeric characters.
- """
- return itertools.groupby(iterable, lambda a: a not in ascii_digits)
-
-
-def int_splitter_iter(iterable, signed):
- """Split the input into integers and strings."""
- for isnum, val, isuni in iterable:
- if isuni:
- yield unicodedata.digit(val)
- elif isnum:
- yield int(val)
- elif signed:
- for x in try_to_read_signed_integer(iterable, val):
- yield int("".join(x)) if isinstance(x, list) else x
- else:
- yield val
-
-
-def float_splitter_iter(iterable, signed, exp):
- """Split the input into integers and other."""
- weird_check = (
- "-inf",
- "-infinity",
- "+inf",
- "+infinity",
- "inf",
- "infinity",
- "nan",
- "-nan",
- "+nan",
- )
- try_to_read_float_correctly = [
- try_to_read_float,
- try_to_read_float_with_exp,
- functools.partial(try_to_read_signed_float_template, key=try_to_read_float),
- functools.partial(
- try_to_read_signed_float_template, key=try_to_read_float_with_exp
- ),
- ][
- signed * 2 + exp * 1
- ] # Choose the appropriate converter function.
- for isnum, val, isuni in iterable:
- if isuni:
- yield unicodedata.numeric(val)
- else:
- for x in try_to_read_float_correctly(iterable, isnum, val):
- if isinstance(x, list):
- yield float("".join(x))
- elif x.lower().strip(" \t\n\r\f\v") in weird_check:
- yield float(x)
- else:
- yield x
-
-
-def try_to_read_signed_integer(iterable, val):
- """
- If the given string ends with +/-, attempt to return a signed int.
- Otherwise, return the string as-is.
- """
- if val.endswith(("+", "-")):
- next_element = next(iterable, None)
-
- # Last element, return as-is.
- if next_element is None:
- yield val
- return
-
- # We know the next value in the sequence must be "isnum == True".
- # We just need to handle unicode or not.
- _, next_val, next_isuni = next_element
-
- # If unicode, don't apply sign and just return the val as-is
- # and convert the unicode character.
- if next_isuni:
- yield val
- yield unicodedata.digit(next_val)
-
- # If the val is *only* the sign, return only the number.
- elif val in ("-", "+"):
- yield [val, next_val]
-
- # Otherwise, remove the sign from the val and apply it to the number,
- # returning both.
- else:
- yield val[:-1]
- yield [val[-1], next_val]
-
- else:
- yield val
-
-
-def try_to_read_float(iterable, isnum, val):
- """
- Try to read a string that matches num.num and return as a float.
- Otherwise return the input as found.
- """
- # Extract what is coming next.
- next_isnum, next_val, next_isuni = iterable.peek(triple_none)
-
- # If a non-number was given, we can only accept a decimal point.
- if not isnum:
-
- # If the next value is None or not a non-uni number, return as-is.
- if next_val is None or not next_isnum or next_isuni:
- yield val
-
- # If this the decimal point, add it to the number and return.
- elif val == ".":
- next(iterable) # To progress the iterator.
- yield [val, next_val]
-
- # If the val ends with the decimal point, split the decimal point
- # off the end of the string then place it to the front of the
- # iterable so that we can use it later.
- elif val.endswith("."):
- iterable.push(SplitElement(False, val[-1], False))
- yield val[:-1]
-
- # Otherwise, just return the val and move on.
- else:
- yield val
-
- # If a number, read the number then try to get the post-decimal part.
- else:
-
- # If the next element is not '.', return now.
- if next_val != ".":
- # If the next val starts with a '.', let's add that.
- if next_val is not None and next_val.startswith("."):
- next(iterable) # To progress the iterator.
- iterable.push(SplitElement(False, next_val[1:], False))
- yield [val, next_val[0]]
- else:
- yield [val]
-
- # Recursively parse the decimal and after. If the returned
- # value is a list, add the list to the current number.
- # If not, just return the number with the decimal.
- else:
- # If the first value returned from the try_to_read_float
- # is a list, add it to the float component list.
- next(iterable) # To progress the iterator.
- ret = next(try_to_read_float(iterable, next_isnum, next_val))
- if isinstance(ret, list):
- yield [val] + ret
- else:
- yield [val, next_val]
-
-
-def try_to_read_float_with_exp(iterable, isnum, val):
- """
- Try to read a string that matches num.numE[+-]num and return as a float.
- Otherwise return the input as found.
- """
- exp_ident = ("e", "E", "e-", "E-", "e+", "E+")
-
- # Start by reading the floating point part.
- float_ret = next(try_to_read_float(iterable, isnum, val))
-
- # Extract what is coming next.
- next_isnum, next_val, next_isuni = iterable.peek(triple_none)
-
- # If the float part is not a list, or the next value
- # is not in the exponential identifier list, return it as-is.
- if not isinstance(float_ret, list) or next_val not in exp_ident:
- yield float_ret
-
- # We know the next_val is an exponential identifier. See if the value
- # after that is a non-unicode number. If so, return all as a float.
- # If not, put the exponential identifier back on the front of the
- # list and return the float_ret as-is.
- else:
- exp = SplitElement(next_isnum, next_val, next_isuni)
- next(iterable) # To progress the iterator.
- next_isnum, next_val, next_isuni = iterable.peek(triple_none)
- if next_isnum and not next_isuni:
- next(iterable) # To progress the iterator.
- yield float_ret + [exp.val, next_val]
- else:
- iterable.push(exp)
- yield float_ret
-
-
-def try_to_read_signed_float_template(iterable, isnum, val, key):
- """
- Try to read a string that matches [+-]num.numE[+-]num and return as a
- float. Otherwise return the input as found.
- """
- # Extract what is coming next.
- next_isnum, next_val, next_isuni = iterable.peek(triple_none)
-
- # If it looks like there is a sign here and the next value is a
- # non-unicode number, try to parse that with the sign.
- if val.endswith(("+", "-")) and next_isnum and not next_isuni:
-
- # If this value is a sign, return the combo.
- if val in ("+", "-"):
- next(iterable) # To progress the iterator.
- yield [val] + next(key(iterable, next_isnum, next_val))
-
- # If the val ends with the sign split the sign off the end of
- # the string then place it to the front of the iterable so that
- # we can use it later.
- else:
- iterable.push(SplitElement(False, val[-1], False))
- yield val[:-1]
-
- # If it looks like there is a sign here and the next value is a
- # decimal, try to parse as a decimal.
- elif val.endswith(("+.", "-.")) and next_isnum and not next_isuni:
-
- # Push back a zero before the decimal then parse.
- print(val, iterable.peek())
-
- # If this value is a sign, return the combo
- if val[:-1] in ("+", "-"):
- yield [val[:-1]] + next(key(iterable, False, val[-1]))
-
- # If the val ends with the sign split the decimal the end of
- # the string then place it to the front of the iterable so that
- # we can use it later.
- else:
- iterable.push(SplitElement(False, val[-2:], False))
- yield val[:-2]
-
- # If no sign, pass directly to the key function.
- else:
- yield next(key(iterable, isnum, val))
-
-
-def add_leading_space_if_first_is_num(iterable, sep):
- """Check if the first element is a number, and prepend with space if so."""
- z, peek = itertools.tee(iterable)
- if type(next(peek, None)) in (int, long, float):
- z = itertools.chain([sep], z)
- del peek
- return z
-
-
-def sep_inserter(iterable, sep, types=frozenset((int, long, float))):
- """Simulates the py3_safe function."""
- pairs = pairwise(iterable)
-
- # Prime loop by handling first pair specially.
- try:
- first, second = next(pairs)
- except StopIteration:
- return
- if second is None: # Only one element
- yield first
- elif type(first) in types and type(second) in types:
- yield first
- yield sep
- yield second
- else:
- yield first
- yield second
-
- # Handle all remaining pairs in loop.
- for first, second in pairs:
- if type(first) in types and type(second) in types:
- yield sep
- yield second
-
-
-def pairwise(iterable):
- "s -> (s0,s1), (s1,s2), (s2,s3), ..."
- split1, split2 = itertools.tee(iterable)
- a, b = itertools.tee(split1)
- test1, test2 = itertools.tee(split2)
- next(b, None)
- if next(test1, None) is None:
- ret = py23_zip(a, b) # Returns empty list
- elif next(test2, None) is not None and next(test2, None) is None:
- ret = py23_zip(a, [None]) # Return at least one value
- else:
- ret = py23_zip(a, b)
- del test1, test2, split2
- return ret
-
-
-class peekable(object):
- """Wrapper for an iterator to allow 1-item lookahead
- Call ``peek()`` on the result to get the value that will next pop out of
- ``next()``, without advancing the iterator:
- >>> p = peekable(xrange(2))
- >>> p.peek()
- 0
- >>> p.next()
- 0
- >>> p.peek()
- 1
- >>> p.next()
- 1
- Pass ``peek()`` a default value, and it will be returned in the case where
- the iterator is exhausted:
- >>> p = peekable([])
- >>> p.peek('hi')
- 'hi'
- If no default is provided, ``peek()`` raises ``StopIteration`` when there
- are no items left.
- To test whether there are more items in the iterator, examine the
- peekable's truth value. If it is truthy, there are more items.
- >>> assert peekable(xrange(1))
- >>> assert not peekable([])
- """
-
- # Lowercase to blend in with itertools. The fact that it's a class is an
- # implementation detail.
-
- def __init__(self, iterable):
- self._it = iter(iterable)
-
- def __iter__(self):
- return self
-
- def __nonzero__(self):
- try:
- self.peek()
- except StopIteration:
- return False
- return True
-
- __bool__ = __nonzero__
-
- def peek(self, default=_sentinel):
- """Return the item that will be next returned from ``next()``.
- Return ``default`` if there are no items left. If ``default`` is not
- provided, raise ``StopIteration``.
- """
- if not hasattr(self, "_peek"):
- try:
- self._peek = next(self._it)
- except StopIteration:
- if default is _sentinel:
- raise
- return default
- return self._peek
-
- def next(self):
- ret = self.peek()
- try:
- del self._peek
- except AttributeError:
- pass
- return ret
-
- __next__ = next
-
- def push(self, value):
- """Put an element at the front of the iterable."""
- if hasattr(self, "_peek"):
- self._it = itertools.chain([value, self._peek], self._it)
- del self._peek
- else:
- self._it = itertools.chain([value], self._it)
diff --git a/test_natsort/test_parse_string_function.py b/test_natsort/test_parse_string_function.py
index b0b114e..46ed201 100644
--- a/test_natsort/test_parse_string_function.py
+++ b/test_natsort/test_parse_string_function.py
@@ -2,237 +2,89 @@
"""These test the utils.py functions."""
from __future__ import unicode_literals
-from hypothesis import example, given
+import unicodedata
+
+import pytest
+from hypothesis import given
from hypothesis.strategies import floats, integers, lists, text
-from natsort.compat.fastnumbers import fast_float, fast_int
-from natsort.compat.py23 import PY_VERSION, py23_str
+from natsort.compat.fastnumbers import fast_float
+from natsort.compat.py23 import py23_str
from natsort.ns_enum import ns, ns_DUMB
-from natsort.utils import NumericalRegularExpressions as nre
-from natsort.utils import parse_path_factory, parse_string_factory
-from pytest import raises
-
-from slow_splitters import float_splitter, int_splitter
-
-if PY_VERSION >= 3:
- long = int
-
-
-def whitespace_check(x):
- """Simplifies testing"""
- try:
- if x.isspace():
- return x in " \t\n\r\f\v"
- else:
- return True
- except (AttributeError, TypeError):
- return True
-
-
-def no_op(x):
- """A function that does nothing."""
- return x
-
-
-def tuple2(x, dummy):
- """Make the input a tuple."""
- return tuple(x)
-
-
-# Each test has an "example" version for demonstrative purposes,
-# and a test that uses the hypothesis module.
-
-
-def test_parse_string_factory_raises_TypeError_if_given_a_number_example():
- with raises(TypeError):
- assert parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )(50.0)
-
-
-@given(floats())
-def test_parse_string_factory_raises_TypeError_if_given_a_number(x):
- with raises(TypeError):
- assert parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )(x)
-
-
-def test_parse_string_factory_only_parses_digits_with_nosign_int_example():
- assert parse_string_factory(0, "", nre.int_nosign().split, no_op, fast_int, tuple2)(
- "a5+5.034e-1"
- ) == ("a", 5, "+", 5, ".", 34, "e-", 1)
-
-
-@given(
- lists(
- elements=floats() | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
- )
-)
-@example(
- [
- 10000000000000000000000000000000000000000000000000000000000000000000000000,
- 100000000000000000000000000000000000000000000000000000000000000000000000000,
- 100000000000000000000000000000000000000000000000000000000000000000000000000,
- ]
-)
-def test_parse_string_factory_only_parses_digits_with_nosign_int(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(0, "", nre.int_nosign().split, no_op, fast_int, tuple2)(
- s
- ) == int_splitter(s, False, "")
-
+from natsort.utils import NumericalRegularExpressions as NumRegex
+from natsort.utils import parse_string_factory
-def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example():
- assert parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)(
- "a5+5.034e-1"
- ) == ("a", 5, "", 5, ".", 34, "e", -1)
+class CustomTuple(tuple):
+ """Used to ensure what is given during testing is what is returned."""
-@given(
- lists(
- elements=floats() | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
- )
-)
-def test_parse_string_factory_parses_digit_with_sign_with_signed_int(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(0, "", nre.int_sign().split, no_op, fast_int, tuple2)(
- s
- ) == int_splitter(s, True, "")
-
+ original = None
-def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example():
- assert parse_string_factory(
- 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2
- )("a5+5.034e-1") == ("a", 5.0, "+", 5.034, "e-", 1.0)
+def input_transform(x):
+ """Make uppercase."""
+ return x.upper()
-@given(
- lists(
- elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
- )
-)
-def test_parse_string_factory_only_parses_float_with_nosign_noexp_float(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(
- 0, "", nre.float_nosign_noexp().split, no_op, fast_float, tuple2
- )(s) == float_splitter(s, False, False, "")
+def final_transform(x, original):
+ """Make the input a CustomTuple."""
+ t = CustomTuple(x)
+ t.original = original
+ return t
-def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float_example():
- assert parse_string_factory(
- 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2
- )("a5+5.034e-1") == ("a", 5.0, "+", 0.5034)
-
-@given(
- lists(
- elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
+@pytest.fixture
+def parse_string_func(request):
+ """A parse_string_factory result with sample arguments."""
+ sep = ""
+ return parse_string_factory(
+ request.param, # algorirhm
+ sep,
+ NumRegex.int_nosign().split,
+ input_transform,
+ fast_float,
+ final_transform,
)
-)
-def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(
- 0, "", nre.float_nosign_exp().split, no_op, fast_float, tuple2
- )(s) == float_splitter(s, False, True, "")
-def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_example():
- assert parse_string_factory(
- 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2
- )("a5+5.034e-1") == ("a", 5.0, "", 5.034, "e", -1.0)
+@pytest.mark.parametrize("parse_string_func", [ns.DEFAULT], indirect=True)
+@given(x=floats() | integers())
+def test_parse_string_factory_raises_type_error_if_given_number(x, parse_string_func):
+ with pytest.raises(TypeError):
+ assert parse_string_func(x)
-@given(
- lists(
- elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
- )
+# noinspection PyCallingNonCallable
+@pytest.mark.parametrize(
+ "parse_string_func, orig_func",
+ [
+ (ns.DEFAULT, lambda x: x.upper()),
+ (ns.LOCALE, lambda x: x.upper()),
+ (ns.LOCALE | ns_DUMB, lambda x: x), # This changes the "original" handling.
+ ],
+ indirect=["parse_string_func"],
)
-def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(
- 0, "", nre.float_sign_noexp().split, no_op, fast_float, tuple2
- )(s) == float_splitter(s, True, False, "")
-
-
-def test_parse_string_factory_parses_float_with_sign_exp_float_example():
- assert parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )("a5+5.034e-1") == ("a", 5.0, "", 0.5034)
- assert parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )("6a5+5.034e-1") == ("", 6.0, "a", 5.0, "", 0.5034)
-
-
@given(
- lists(
- elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(),
- min_size=1,
- max_size=10,
+ x=lists(
+ elements=floats(allow_nan=False) | text() | integers(), min_size=1, max_size=10
)
)
-def test_parse_string_factory_parses_float_with_sign_exp_float(x):
- s = "".join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )(s) == float_splitter(s, True, True, "")
-
-
-def test_parse_string_factory_selects_pre_function_value_if_not_dumb():
- def tuple2(x, orig):
- """Make the input a tuple."""
- return (orig[0], tuple(x))
-
- assert parse_string_factory(
- 0, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
- )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
- assert parse_string_factory(
- ns_DUMB, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
- )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
- assert parse_string_factory(
- ns.LOCALE, "", nre.int_nosign().split, py23_str.upper, fast_float, tuple2
- )("a5+5.034e-1") == ("A", ("A", 5, "+", 5, ".", 34, "E-", 1))
- assert parse_string_factory(
- ns.LOCALE | ns_DUMB,
- "",
- nre.int_nosign().split,
- py23_str.upper,
- fast_float,
- tuple2,
- )("a5+5.034e-1") == ("a", ("A", 5, "+", 5, ".", 34, "E-", 1))
-
-
-def test_parse_path_function_parses_string_as_path_then_as_string():
- splt = parse_string_factory(
- 0, "", nre.float_sign_exp().split, no_op, fast_float, tuple2
- )
- assert parse_path_factory(splt)("/p/Folder (10)/file34.5nm (2).tar.gz") == (
- ("/",),
- ("p",),
- ("Folder (", 10.0, ")"),
- ("file", 34.5, "nm (", 2.0, ")"),
- (".tar",),
- (".gz",),
- )
- assert parse_path_factory(splt)("../Folder (10)/file (2).tar.gz") == (
- ("..",),
- ("Folder (", 10.0, ")"),
- ("file (", 2.0, ")"),
- (".tar",),
- (".gz",),
- )
- assert parse_path_factory(splt)("Folder (10)/file.f34.5nm (2).tar.gz") == (
- ("Folder (", 10.0, ")"),
- ("file.f", 34.5, "nm (", 2.0, ")"),
- (".tar",),
- (".gz",),
- )
+@pytest.mark.usefixtures("with_locale_en_us")
+def test_parse_string_factory_invariance(x, parse_string_func, orig_func):
+ # parse_string_factory is the high-level combination of several dedicated
+ # functions involved in splitting and manipulating a string. The details of
+ # what those functions do is not relevant to testing parse_string_factory.
+ # What is relevant is that the form of the output matches the invariant
+ # that even elements are string and odd are numerical. That each component
+ # function is doing what it should is tested elsewhere.
+ value = "".join(map(str, x)) # Convert the input to a single string.
+ result = parse_string_func(value)
+ result_types = list(map(type, result))
+ expected_types = [py23_str if i % 2 == 0 else float for i in range(len(result))]
+ assert result_types == expected_types
+
+ # The result is in our CustomTuple.
+ assert isinstance(result, CustomTuple)
+
+ # Original should have gone through the "input_transform"
+ # which is uppercase in these tests.
+ assert result.original == orig_func(unicodedata.normalize("NFD", value))