summaryrefslogtreecommitdiff
path: root/tests/test_regex.py
blob: d3fe6172149054f8ac9babb2fd9b748681f33d1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: utf-8 -*-
"""These test the splitting regular expressions."""
from __future__ import unicode_literals

import pytest
from natsort.utils import NumericalRegularExpressions as NumRegex


regex_names = {
    NumRegex.int_nosign(): "int_nosign",
    NumRegex.int_sign(): "int_sign",
    NumRegex.float_nosign_noexp(): "float_nosign_noexp",
    NumRegex.float_sign_noexp(): "float_sign_noexp",
    NumRegex.float_nosign_exp(): "float_nosign_exp",
    NumRegex.float_sign_exp(): "float_sign_exp",
}

# Regex Aliases (so lines stay a reasonable length.
i_u = NumRegex.int_nosign()
i_s = NumRegex.int_sign()
f_u = NumRegex.float_nosign_noexp()
f_s = NumRegex.float_sign_noexp()
f_ue = NumRegex.float_nosign_exp()
f_se = NumRegex.float_sign_exp()

# Assemble a test suite of regular strings and their regular expression
# splitting result. Organize by the input string.
regex_tests = {
    "-123.45e+67": {
        i_u: ["-", "123", ".", "45", "e+", "67", ""],
        i_s: ["", "-123", ".", "45", "e", "+67", ""],
        f_u: ["-", "123.45", "e+", "67", ""],
        f_s: ["", "-123.45", "e", "+67", ""],
        f_ue: ["-", "123.45e+67", ""],
        f_se: ["", "-123.45e+67", ""],
    },
    "a-123.45e+67b": {
        i_u: ["a-", "123", ".", "45", "e+", "67", "b"],
        i_s: ["a", "-123", ".", "45", "e", "+67", "b"],
        f_u: ["a-", "123.45", "e+", "67", "b"],
        f_s: ["a", "-123.45", "e", "+67", "b"],
        f_ue: ["a-", "123.45e+67", "b"],
        f_se: ["a", "-123.45e+67", "b"],
    },
    "hello": {
        i_u: ["hello"],
        i_s: ["hello"],
        f_u: ["hello"],
        f_s: ["hello"],
        f_ue: ["hello"],
        f_se: ["hello"],
    },
    "abc12.34.56-7def": {
        i_u: ["abc", "12", ".", "34", ".", "56", "-", "7", "def"],
        i_s: ["abc", "12", ".", "34", ".", "56", "", "-7", "def"],
        f_u: ["abc", "12.34", "", ".56", "-", "7", "def"],
        f_s: ["abc", "12.34", "", ".56", "", "-7", "def"],
        f_ue: ["abc", "12.34", "", ".56", "-", "7", "def"],
        f_se: ["abc", "12.34", "", ".56", "", "-7", "def"],
    },
    "a1b2c3d4e5e6": {
        i_u: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""],
        i_s: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""],
        f_u: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""],
        f_s: ["a", "1", "b", "2", "c", "3", "d", "4", "e", "5", "e", "6", ""],
        f_ue: ["a", "1", "b", "2", "c", "3", "d", "4e5", "e", "6", ""],
        f_se: ["a", "1", "b", "2", "c", "3", "d", "4e5", "e", "6", ""],
    },
    "eleven۱۱eleven11eleven১১": {  # All of these are the decimal 11
        i_u: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
        i_s: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
        f_u: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
        f_s: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
        f_ue: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
        f_se: ["eleven", "۱۱", "eleven", "11", "eleven", "১১", ""],
    },
    "12①②ⅠⅡ⅓": {  # Two decimals, Two digits, Two numerals, fraction
        i_u: ["", "12", "", "①", "", "②", "ⅠⅡ⅓"],
        i_s: ["", "12", "", "①", "", "②", "ⅠⅡ⅓"],
        f_u: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""],
        f_s: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""],
        f_ue: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""],
        f_se: ["", "12", "", "①", "", "②", "", "Ⅰ", "", "Ⅱ", "", "⅓", ""],
    }
}


# From the above collections, create the parametrized tests and labels.
regex_params = [
    (given, expected, regex)
    for given, values in regex_tests.items()
    for regex, expected in values.items()
]
labels = ["{}-{}".format(given, regex_names[regex]) for given, _, regex in regex_params]


@pytest.mark.parametrize("x, expected, regex", regex_params, ids=labels)
def test_regex_splits_correctly(x, expected, regex):
    # noinspection PyUnresolvedReferences
    assert regex.split(x) == expected