summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2011-11-09 00:14:56 -0800
committerscoder <stefan_ml@behnel.de>2011-11-09 00:14:56 -0800
commitf331ff39cfa2d752dfc33ba8f6fc45c360d8bac3 (patch)
tree3af0afd7d935c9ea9cd3e0422dad072bda7ab863
parent5fc9eac1edea42ec46422c6c7b017ee11c4a9531 (diff)
parentc3b80b7fe228e171f98aa2471461b1500ae752be (diff)
downloadpython-lxml-f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3.tar.gz
Merge pull request #19 from SimonSapin/master
Ignore optional whitespace after combinators when parsing CSS selectors.
-rw-r--r--CHANGES.txt4
-rw-r--r--src/lxml/cssselect.py3
-rw-r--r--src/lxml/tests/test_css.txt10
-rw-r--r--src/lxml/tests/test_css_select.txt2
4 files changed, 19 insertions, 0 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 89b6c307..ea70e141 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -26,6 +26,10 @@ Bugs fixed
translated expression. Note that this breaks the usage of some of
the generated XPath expressions as XSLT location paths that
previously worked in 2.3.1.
+* Fixed parsing of some selectors in cssselect. Whitespace after combinators
+ ">", "+" and "~" is now correctly ignored. Previously is was parsed as
+ a descendant combinator. For example, "div> .foo" was parsed the same as
+ "div>* .foo" instead of "div>.foo".
Other changes
--------------
diff --git a/src/lxml/cssselect.py b/src/lxml/cssselect.py
index 8fd7faf3..a319c07b 100644
--- a/src/lxml/cssselect.py
+++ b/src/lxml/cssselect.py
@@ -693,6 +693,9 @@ def parse_selector(stream):
elif peek in ('+', '>', '~'):
# A combinator
combinator = stream.next()
+ # Ignore optional whitespace after a combinator
+ while stream.peek() == ' ':
+ stream.next()
else:
combinator = ' '
consumed = len(stream.used)
diff --git a/src/lxml/tests/test_css.txt b/src/lxml/tests/test_css.txt
index a2168c20..3eb0e482 100644
--- a/src/lxml/tests/test_css.txt
+++ b/src/lxml/tests/test_css.txt
@@ -16,12 +16,22 @@ A quick test of tokenizing:
Then of parsing:
+ >>> def parse_many(one, *others):
+ ... result = repr(parse(one))
+ ... print(result)
+ ... for other in others:
+ ... other_result = repr(parse(other))
+ ... if other_result != result:
+ ... print('Selector %r parses as\n%s' % (other, other_result))
>>> parse('td.foo, .bar')
Or([Class[Element[td].foo], CombinedSelector[Element[*] <followed> Class[Element[*].bar]]])
>>> parse('div, td.foo, div.bar span')
Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] <followed> Element[span]]])
>>> parse('div > p')
CombinedSelector[Element[div] > Element[p]]
+ >>> parse_many('div>.foo', 'div> .foo', 'div >.foo', 'div > .foo',
+ ... 'div \n> \t \t .foo', 'div\r>\n\n\n.foo')
+ CombinedSelector[Element[div] > Class[Element[*].foo]]
>>> parse('td:first')
Pseudo[Element[td]:first]
>>> parse('a[name]')
diff --git a/src/lxml/tests/test_css_select.txt b/src/lxml/tests/test_css_select.txt
index 72cbc7db..8722e19b 100644
--- a/src/lxml/tests/test_css_select.txt
+++ b/src/lxml/tests/test_css_select.txt
@@ -140,6 +140,8 @@ Now, the tests:
li-div
>>> pcss('div > div')
empty
+ >>> pcss('div>.c', 'div > .c')
+ first-ol
>>> pcss('div + div')
foobar-div
>>> pcss('a ~ a')