This is a test of CSS selectors. We setup a document we'll use for
all our selections, and a function make querying simpler:
>>> from lxml.cssselect import CSSSelector
>>> from lxml.etree import HTML
>>> doc = HTML('''
...
...
...
...
link
...
link
...
... - content
... -
...
...
...
...
...
...
...
... -
...
...
... hi there
... guy
...
...
...
...
... ''')
>>> order = {}
>>> for count, el in enumerate(doc.getiterator()):
... order[el] = count
>>> def select_ids(selector):
... items = CSSSelector(selector)(doc)
... if not items:
... return 'empty'
... items = CSSSelector(selector)(doc)
... items.sort(key=lambda el: order[el])
... return ', '.join([el.get('id', 'nil') for el in items])
>>> def pcss(main, *selectors):
... result = select_ids(main)
... for selector in selectors:
... sel_result = select_ids(selector)
... if sel_result != result:
... print('Selector %r returns %s' % (selector, sel_result))
... print(result)
Now, the tests:
>>> pcss('*') # doctest: +ELLIPSIS
nil, nil, nil, outer-div, ... foobar-span
>>> pcss('div')
outer-div, li-div, foobar-div
>>> pcss('a[name]')
name-anchor
>>> pcss('a[rel]')
tag-anchor, nofollow-anchor
>>> pcss('a[rel="tag"]')
tag-anchor
>>> pcss('a[href*="localhost"]')
tag-anchor
>>> pcss('a[href^="http"]')
tag-anchor, nofollow-anchor
>>> pcss('a[href^="http:"]')
tag-anchor
>>> pcss('a[href$="org"]')
nofollow-anchor
>>> pcss('div[foobar~="bc"]', 'div[foobar~="cde"]')
foobar-div
>>> pcss('div[foobar~="cd"]')
empty
>>> pcss('*[lang|="en"]', '*[lang|="en-US"]')
second-li
>>> pcss('*[lang|="e"]')
empty
>>> pcss('li:nth-child(3)')
third-li
>>> pcss('li:nth-child(10)')
empty
>>> pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)')
second-li, fourth-li, sixth-li
>>> pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)')
first-li, third-li, fifth-li, seventh-li
>>> pcss('li:nth-child(2n+4)')
fourth-li, sixth-li
>>> # FIXME: I'm not 100% sure this is right:
>>> pcss('li:nth-child(3n+1)')
first-li, fourth-li, seventh-li
>>> # FIXME: I'm not sure if nth-last-child(1) or nth-last-child(1)
>>> # should be equivalent to nth-last-child()
>>> pcss('li:nth-last-child()', 'li:nth-last-child(0)')
seventh-li
>>> pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)')
second-li, fourth-li, sixth-li
>>> pcss('li:nth-last-child(2n+2)')
second-li, fourth-li
>>> pcss('ol:first-of-type')
first-ol
>>> pcss('ol:nth-child(1)')
empty
>>> pcss('ol:nth-of-type(2)')
second-ol
>>> # FIXME: like above, (1) or (2)?
>>> pcss('ol:nth-last-of-type(1)')
first-ol
>>> pcss('span:only-child')
foobar-span
>>> pcss('li div:only-child')
li-div
>>> pcss('div *:only-child')
foobar-span
>>> pcss('p *:only-of-type')
Traceback (most recent call last):
...
NotImplementedError: *:only-of-type is not implemented
>>> pcss('p:only-of-type')
paragraph
>>> pcss('a:empty')
name-anchor
>>> pcss('li:empty')
third-li, fourth-li, fifth-li, sixth-li, seventh-li
>>> pcss('*:contains("link")')
nil, nil, outer-div, tag-anchor, nofollow-anchor
>>> pcss('*:contains("E")')
nil, nil, outer-div, first-ol, first-li, paragraph, p-em
>>> pcss('.a', '.b', '*.a', 'ol.a')
first-ol
>>> pcss('.c', '*.c')
first-ol, third-li, fourth-li
>>> pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c')
third-li, fourth-li
>>> pcss('#first-li', 'li#first-li', '*#first-li')
first-li
>>> # Need some tests of :not()
>>> pcss('li div', 'li > div', 'div div')
li-div
>>> pcss('div > div')
empty
>>> pcss('div + div')
foobar-div
>>> pcss('a ~ a')
tag-anchor, nofollow-anchor
>>> pcss('a[rel="tag"] ~ a')
nofollow-anchor
>>> pcss('ol#first-ol li:last-child', 'ol#first-ol *:last-child')
seventh-li