diff options
| author | scoder <stefan_ml@behnel.de> | 2011-11-09 00:14:56 -0800 |
|---|---|---|
| committer | scoder <stefan_ml@behnel.de> | 2011-11-09 00:14:56 -0800 |
| commit | f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3 (patch) | |
| tree | 3af0afd7d935c9ea9cd3e0422dad072bda7ab863 | |
| parent | 5fc9eac1edea42ec46422c6c7b017ee11c4a9531 (diff) | |
| parent | c3b80b7fe228e171f98aa2471461b1500ae752be (diff) | |
| download | python-lxml-f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3.tar.gz | |
Merge pull request #19 from SimonSapin/master
Ignore optional whitespace after combinators when parsing CSS selectors.
| -rw-r--r-- | CHANGES.txt | 4 | ||||
| -rw-r--r-- | src/lxml/cssselect.py | 3 | ||||
| -rw-r--r-- | src/lxml/tests/test_css.txt | 10 | ||||
| -rw-r--r-- | src/lxml/tests/test_css_select.txt | 2 |
4 files changed, 19 insertions, 0 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 89b6c307..ea70e141 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -26,6 +26,10 @@ Bugs fixed translated expression. Note that this breaks the usage of some of the generated XPath expressions as XSLT location paths that previously worked in 2.3.1. +* Fixed parsing of some selectors in cssselect. Whitespace after combinators + ">", "+" and "~" is now correctly ignored. Previously is was parsed as + a descendant combinator. For example, "div> .foo" was parsed the same as + "div>* .foo" instead of "div>.foo". Other changes -------------- diff --git a/src/lxml/cssselect.py b/src/lxml/cssselect.py index 8fd7faf3..a319c07b 100644 --- a/src/lxml/cssselect.py +++ b/src/lxml/cssselect.py @@ -693,6 +693,9 @@ def parse_selector(stream): elif peek in ('+', '>', '~'): # A combinator combinator = stream.next() + # Ignore optional whitespace after a combinator + while stream.peek() == ' ': + stream.next() else: combinator = ' ' consumed = len(stream.used) diff --git a/src/lxml/tests/test_css.txt b/src/lxml/tests/test_css.txt index a2168c20..3eb0e482 100644 --- a/src/lxml/tests/test_css.txt +++ b/src/lxml/tests/test_css.txt @@ -16,12 +16,22 @@ A quick test of tokenizing: Then of parsing: + >>> def parse_many(one, *others): + ... result = repr(parse(one)) + ... print(result) + ... for other in others: + ... other_result = repr(parse(other)) + ... if other_result != result: + ... print('Selector %r parses as\n%s' % (other, other_result)) >>> parse('td.foo, .bar') Or([Class[Element[td].foo], CombinedSelector[Element[*] <followed> Class[Element[*].bar]]]) >>> parse('div, td.foo, div.bar span') Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] <followed> Element[span]]]) >>> parse('div > p') CombinedSelector[Element[div] > Element[p]] + >>> parse_many('div>.foo', 'div> .foo', 'div >.foo', 'div > .foo', + ... 'div \n> \t \t .foo', 'div\r>\n\n\n.foo') + CombinedSelector[Element[div] > Class[Element[*].foo]] >>> parse('td:first') Pseudo[Element[td]:first] >>> parse('a[name]') diff --git a/src/lxml/tests/test_css_select.txt b/src/lxml/tests/test_css_select.txt index 72cbc7db..8722e19b 100644 --- a/src/lxml/tests/test_css_select.txt +++ b/src/lxml/tests/test_css_select.txt @@ -140,6 +140,8 @@ Now, the tests: li-div >>> pcss('div > div') empty + >>> pcss('div>.c', 'div > .c') + first-ol >>> pcss('div + div') foobar-div >>> pcss('a ~ a') |
