Merge pull request #19 from SimonSapin/master

Ignore optional whitespace after combinators when parsing CSS selectors.
author: scoder <stefan_ml@behnel.de> 2011-11-09 00:14:56 -0800
committer: scoder <stefan_ml@behnel.de> 2011-11-09 00:14:56 -0800
commit: f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3 (patch)
tree: 3af0afd7d935c9ea9cd3e0422dad072bda7ab863
parent: 5fc9eac1edea42ec46422c6c7b017ee11c4a9531 (diff)
parent: c3b80b7fe228e171f98aa2471461b1500ae752be (diff)
download: python-lxml-f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3.tar.gz
4 files changed, 19 insertions, 0 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 89b6c307..ea70e141 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -26,6 +26,10 @@ Bugs fixed
   translated expression.  Note that this breaks the usage of some of
   the generated XPath expressions as XSLT location paths that
   previously worked in 2.3.1.
+* Fixed parsing of some selectors in cssselect. Whitespace after combinators
+  ">", "+" and "~" is now correctly ignored. Previously is was parsed as
+  a descendant combinator. For example, "div> .foo" was parsed the same as
+  "div>* .foo" instead of "div>.foo".
 
 Other changes
 --------------
diff --git a/src/lxml/cssselect.py b/src/lxml/cssselect.py
index 8fd7faf3..a319c07b 100644
--- a/src/lxml/cssselect.py
+++ b/src/lxml/cssselect.py
@@ -693,6 +693,9 @@ def parse_selector(stream):
         elif peek in ('+', '>', '~'):
             # A combinator
             combinator = stream.next()
+            # Ignore optional whitespace after a combinator
+            while stream.peek() == ' ':
+                stream.next()
         else:
             combinator = ' '
         consumed = len(stream.used)
diff --git a/src/lxml/tests/test_css.txt b/src/lxml/tests/test_css.txt
index a2168c20..3eb0e482 100644
--- a/src/lxml/tests/test_css.txt
+++ b/src/lxml/tests/test_css.txt
@@ -16,12 +16,22 @@ A quick test of tokenizing:
 
 Then of parsing:
 
+    >>> def parse_many(one, *others):
+    ...     result = repr(parse(one))
+    ...     print(result)
+    ...     for other in others:
+    ...         other_result = repr(parse(other))
+    ...         if other_result != result:
+    ...             print('Selector %r parses as\n%s' % (other, other_result))
     >>> parse('td.foo, .bar')
     Or([Class[Element[td].foo], CombinedSelector[Element[*] <followed> Class[Element[*].bar]]])
     >>> parse('div, td.foo, div.bar span')
     Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] <followed> Element[span]]])
     >>> parse('div > p')
     CombinedSelector[Element[div] > Element[p]]
+    >>> parse_many('div>.foo', 'div> .foo', 'div >.foo', 'div > .foo',
+    ...            'div \n>  \t \t .foo', 'div\r>\n\n\n.foo')
+    CombinedSelector[Element[div] > Class[Element[*].foo]]
     >>> parse('td:first')
     Pseudo[Element[td]:first]
     >>> parse('a[name]')
diff --git a/src/lxml/tests/test_css_select.txt b/src/lxml/tests/test_css_select.txt
index 72cbc7db..8722e19b 100644
--- a/src/lxml/tests/test_css_select.txt
+++ b/src/lxml/tests/test_css_select.txt
@@ -140,6 +140,8 @@ Now, the tests:
     li-div
     >>> pcss('div > div')
     empty
+    >>> pcss('div>.c', 'div > .c')
+    first-ol
     >>> pcss('div + div')
     foobar-div
     >>> pcss('a ~ a')
author	scoder <stefan_ml@behnel.de>	2011-11-09 00:14:56 -0800
committer	scoder <stefan_ml@behnel.de>	2011-11-09 00:14:56 -0800
commit	f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3 (patch)
tree	3af0afd7d935c9ea9cd3e0422dad072bda7ab863
parent	5fc9eac1edea42ec46422c6c7b017ee11c4a9531 (diff)
parent	c3b80b7fe228e171f98aa2471461b1500ae752be (diff)
download	python-lxml-f331ff39cfa2d752dfc33ba8f6fc45c360d8bac3.tar.gz