This is a test of CSS selectors. We setup a document we'll use for all our selections, and a function make querying simpler: >>> from lxml.cssselect import CSSSelector >>> from lxml.etree import HTML >>> doc = HTML(''' ... ...
... ... ... link ...
    ...
  1. content
  2. ...
  3. ...
    ...
    ...
  4. ...
  5. ...
  6. ...
  7. ...
  8. ...
  9. ...
...

... hi there ... guy

...
    ...
...
...
... ''') >>> order = {} >>> for count, el in enumerate(doc.getiterator()): ... order[el] = count >>> def select_ids(selector): ... items = CSSSelector(selector)(doc) ... if not items: ... return 'empty' ... items = CSSSelector(selector)(doc) ... items.sort(key=lambda el: order[el]) ... return ', '.join([el.get('id', 'nil') for el in items]) >>> def pcss(main, *selectors): ... result = select_ids(main) ... for selector in selectors: ... sel_result = select_ids(selector) ... if sel_result != result: ... print('Selector %r returns %s' % (selector, sel_result)) ... print(result) Now, the tests: >>> pcss('*') # doctest: +ELLIPSIS nil, nil, nil, outer-div, ... foobar-span >>> pcss('div') outer-div, li-div, foobar-div >>> pcss('a[name]') name-anchor >>> pcss('a[rel]') tag-anchor, nofollow-anchor >>> pcss('a[rel="tag"]') tag-anchor >>> pcss('a[href*="localhost"]') tag-anchor >>> pcss('a[href^="http"]') tag-anchor, nofollow-anchor >>> pcss('a[href^="http:"]') tag-anchor >>> pcss('a[href$="org"]') nofollow-anchor >>> pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') foobar-div >>> pcss('div[foobar~="cd"]') empty >>> pcss('*[lang|="en"]', '*[lang|="en-US"]') second-li >>> pcss('*[lang|="e"]') empty >>> pcss('li:nth-child(3)') third-li >>> pcss('li:nth-child(10)') empty >>> pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)') second-li, fourth-li, sixth-li >>> pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') first-li, third-li, fifth-li, seventh-li >>> pcss('li:nth-child(2n+4)') fourth-li, sixth-li >>> # FIXME: I'm not 100% sure this is right: >>> pcss('li:nth-child(3n+1)') first-li, fourth-li, seventh-li >>> # FIXME: I'm not sure if nth-last-child(1) or nth-last-child(1) >>> # should be equivalent to nth-last-child() >>> pcss('li:nth-last-child()', 'li:nth-last-child(0)') seventh-li >>> pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)') second-li, fourth-li, sixth-li >>> pcss('li:nth-last-child(2n+2)') second-li, fourth-li >>> pcss('ol:first-of-type') first-ol >>> pcss('ol:nth-child(1)') empty >>> pcss('ol:nth-of-type(2)') second-ol >>> # FIXME: like above, (1) or (2)? >>> pcss('ol:nth-last-of-type(1)') first-ol >>> pcss('span:only-child') foobar-span >>> pcss('li div:only-child') li-div >>> pcss('div *:only-child') foobar-span >>> pcss('p *:only-of-type') Traceback (most recent call last): ... NotImplementedError: *:only-of-type is not implemented >>> pcss('p:only-of-type') paragraph >>> pcss('a:empty') name-anchor >>> pcss('li:empty') third-li, fourth-li, fifth-li, sixth-li, seventh-li >>> pcss('*:contains("link")') nil, nil, outer-div, tag-anchor, nofollow-anchor >>> pcss('*:contains("E")') nil, nil, outer-div, first-ol, first-li, paragraph, p-em >>> pcss('.a', '.b', '*.a', 'ol.a') first-ol >>> pcss('.c', '*.c') first-ol, third-li, fourth-li >>> pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') third-li, fourth-li >>> pcss('#first-li', 'li#first-li', '*#first-li') first-li >>> # Need some tests of :not() >>> pcss('li div', 'li > div', 'div div') li-div >>> pcss('div > div') empty >>> pcss('div + div') foobar-div >>> pcss('a ~ a') tag-anchor, nofollow-anchor >>> pcss('a[rel="tag"] ~ a') nofollow-anchor >>> pcss('ol#first-ol li:last-child', 'ol#first-ol *:last-child') seventh-li