diff options
| -rw-r--r-- | src/lxml/cssselect.py | 9 | ||||
| -rw-r--r-- | src/lxml/doctestcompare.py | 24 | ||||
| -rw-r--r-- | src/lxml/html/__init__.py | 5 | ||||
| -rw-r--r-- | src/lxml/html/diff.py | 9 | ||||
| -rw-r--r-- | src/lxml/html/formfill.py | 7 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_clean.txt | 10 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_diff.txt | 4 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_formfill.txt | 16 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_forms.txt | 18 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_rewritelinks.txt | 30 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_xhtml.txt | 10 | ||||
| -rw-r--r-- | src/lxml/tests/test_css.txt | 4 | ||||
| -rw-r--r-- | src/lxml/tests/test_errors.py | 6 | ||||
| -rw-r--r-- | src/lxml/tests/test_htmlparser.py | 6 |
14 files changed, 99 insertions, 59 deletions
diff --git a/src/lxml/cssselect.py b/src/lxml/cssselect.py index 642244dd..5fe70033 100644 --- a/src/lxml/cssselect.py +++ b/src/lxml/cssselect.py @@ -901,6 +901,11 @@ class TokenStream(object): self.source = source self.peeked = None self._peeking = False + try: + self.next_token = self.tokens.next + except AttributeError: + # Python 3 + self.next_token = self.tokens.__next__ def next(self): if self._peeking: @@ -909,7 +914,7 @@ class TokenStream(object): return self.peeked else: try: - next = self.tokens.next() + next = self.next_token() self.used.append(next) return next except StopIteration: @@ -921,7 +926,7 @@ class TokenStream(object): def peek(self): if not self._peeking: try: - self.peeked = self.tokens.next() + self.peeked = self.next_token() except StopIteration: return None self._peeking = True diff --git a/src/lxml/doctestcompare.py b/src/lxml/doctestcompare.py index 921a63f0..f988e43f 100644 --- a/src/lxml/doctestcompare.py +++ b/src/lxml/doctestcompare.py @@ -398,12 +398,18 @@ def temp_install(html=False, del_module=None): # __record_outcome to be run, which signals the end of the __run # method, at which point we restore the previous check_output # implementation. - check_func = frame.f_locals['check'].im_func + try: + check_func = frame.f_locals['check'].im_func + checker_check_func = checker.check_output.im_func + except AttributeError: + # Python 3 + check_func = frame.f_locals['check'].__func__ + checker_check_func = checker.check_output.__func__ # Because we can't patch up func_globals, this is the only global # in check_output that we care about: doctest.etree = etree _RestoreChecker(dt_self, old_checker, checker, - check_func, checker.check_output.im_func, + check_func, checker_check_func, del_module) class _RestoreChecker(object): @@ -419,11 +425,17 @@ class _RestoreChecker(object): self.install_clone() self.install_dt_self() def install_clone(self): - self.func_code = self.check_func.func_code - self.func_globals = self.check_func.func_globals - self.check_func.func_code = self.clone_func.func_code + try: + self.func_code = self.check_func.func_code + self.func_globals = self.check_func.func_globals + self.check_func.func_code = self.clone_func.func_code + except AttributeError: + # Python 3 + self.func_code = self.check_func.__code__ + self.func_globals = self.check_func.__globals__ + self.check_func.__code__ = self.clone_func.__code__ def uninstall_clone(self): - self.check_func.func_code = self.func_code + self.check_func.__code__ = self.func_code def install_dt_self(self): self.prev_func = self.dt_self._DocTestRunner__record_outcome self.dt_self._DocTestRunner__record_outcome = self diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py index a2e8f965..65ff4c0d 100644 --- a/src/lxml/html/__init__.py +++ b/src/lxml/html/__init__.py @@ -19,6 +19,11 @@ try: except NameError: # Python 2.3 from sets import Set as set +try: + basestring = __builtins__["basestring"] +except (KeyError, NameError): + # Python 3 + basestring = str __all__ = [ 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring', diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py index e0b333db..3b3ad8e7 100644 --- a/src/lxml/html/diff.py +++ b/src/lxml/html/diff.py @@ -11,6 +11,11 @@ try: except NameError: # Python 3 _unicode = str +try: + basestring = __builtins__["basestring"] +except (KeyError, NameError): + # Python 3 + basestring = str ############################################################ ## Annotation @@ -748,9 +753,9 @@ def serialize_html_fragment(el, skip_outer=False): html = etree.tostring(el, method="html", encoding="UTF-8") if skip_outer: # Get rid of the extra starting tag: - html = html[html.find('>')+1:] + html = html[html.find('>'.encode("ASCII"))+1:] # Get rid of the extra end tag: - html = html[:html.rfind('<')] + html = html[:html.rfind('<'.encode("ASCII"))] return html.strip() else: return html diff --git a/src/lxml/html/formfill.py b/src/lxml/html/formfill.py index ed4ccd7a..325df00a 100644 --- a/src/lxml/html/formfill.py +++ b/src/lxml/html/formfill.py @@ -2,6 +2,11 @@ from lxml.etree import XPath, ElementBase from lxml.html import fromstring, tostring, XHTML_NAMESPACE from lxml.html import _forms_xpath, _options_xpath, _nons from lxml.html import defs +try: + basestring = __builtins__["basestring"] +except (KeyError, NameError): + # Python 3 + basestring = str __all__ = ['FormNotFound', 'fill_form', 'fill_form_html', 'insert_errors', 'insert_errors_html', @@ -235,7 +240,7 @@ def insert_errors( error_creator=default_error_creator, ): el = _find_form(el, form_id=form_id, form_index=form_index) - for name, error in errors.iteritems(): + for name, error in errors.items(): if error is None: continue for error_el, message in _find_elements_for_name(el, name, error): diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 255c5a1f..aa2e900f 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -27,7 +27,7 @@ ... </body> ... </html>''' ->>> print doc +>>> print(doc) <html> <head> <script type="text/javascript" src="evil-site"></script> @@ -53,7 +53,7 @@ </body> </html> ->>> print tostring(fromstring(doc)) +>>> print(tostring(fromstring(doc))) <html> <head> <script type="text/javascript" src="evil-site"></script> @@ -79,7 +79,7 @@ </body> </html> ->>> print Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc) +>>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)) <html> <head> <style>/* deleted */</style> @@ -96,8 +96,8 @@ </body> </html> ->>> print Cleaner(style=True, links=True, add_nofollow=True, -... page_structure=False, safe_attrs_only=False).clean_html(doc) +>>> print(Cleaner(style=True, links=True, add_nofollow=True, +... page_structure=False, safe_attrs_only=False).clean_html(doc)) <html> <head> </head> diff --git a/src/lxml/html/tests/test_diff.txt b/src/lxml/html/tests/test_diff.txt index 784fb307..0665b54b 100644 --- a/src/lxml/html/tests/test_diff.txt +++ b/src/lxml/html/tests/test_diff.txt @@ -6,7 +6,7 @@ First, a handy function for normalizing whitespace and doing word wrapping:: >>> def pwrapped(text): ... text = re.sub(r'[ \n\t\r]+', ' ', text) ... text = textwrap.fill(text) - ... print text + ... print(text) >>> def pdiff(text1, text2): ... pwrapped(htmldiff(text1, text2)) @@ -200,7 +200,7 @@ Some utility functions:: >>> from lxml.html.diff import fixup_ins_del_tags, split_unbalanced >>> def pfixup(text): - ... print fixup_ins_del_tags(text).strip() + ... print(fixup_ins_del_tags(text).strip()) >>> pfixup('<ins><p>some text <b>and more text</b> and more</p></ins>') <p><ins>some text <b>and more text</b> and more</ins></p> >>> pfixup('<p><ins>Hi!</ins> you</p>') diff --git a/src/lxml/html/tests/test_formfill.txt b/src/lxml/html/tests/test_formfill.txt index f06c4d65..84033537 100644 --- a/src/lxml/html/tests/test_formfill.txt +++ b/src/lxml/html/tests/test_formfill.txt @@ -5,17 +5,17 @@ Some basic imports: The simplest kind of filling is just filling an input with a value: - >>> print fill_form_html(''' - ... <form><input type="text" name="foo"></form>''', dict(foo='bar')) + >>> print(fill_form_html(''' + ... <form><input type="text" name="foo"></form>''', dict(foo='bar'))) <form><input type="text" name="foo" value="bar"></form> You can also fill multiple inputs, like: - >>> print fill_form_html(''' + >>> print(fill_form_html(''' ... <form> ... <input type="text" name="foo"> ... <input type="text" name="foo"> - ... </form>''', dict(foo=['bar1', 'bar2'])) + ... </form>''', dict(foo=['bar1', 'bar2']))) <form> <input type="text" name="foo" value="bar1"> <input type="text" name="foo" value="bar2"> @@ -24,7 +24,7 @@ You can also fill multiple inputs, like: Checkboxes can work either as boolean true/false, or be selected based on their inclusion in a set of values:: - >>> print fill_form_html(''' + >>> print(fill_form_html(''' ... <form> ... Would you like to be spammed? ... <input type="checkbox" name="spam_me"> <br> @@ -36,7 +36,7 @@ on their inclusion in a set of values:: ... Other spam: ... <input type="checkbox" name="type" value="other"><br> ... <input type="submit" value="Spam!"> - ... </form>''', dict(spam_me=True, type=['viagra', 'other'])) + ... </form>''', dict(spam_me=True, type=['viagra', 'other']))) <form> Would you like to be spammed? <input type="checkbox" name="spam_me" checked> <br> @@ -62,7 +62,7 @@ function you can provide (or the default function). Example:: >>> from lxml.html.formfill import insert_errors_html - >>> print insert_errors_html(''' + >>> print(insert_errors_html(''' ... <form> ... <fieldset id="fieldset"> ... <input name="v1"><br> @@ -80,7 +80,7 @@ Example:: ... 'v4': "err4", ... None: 'general error', ... '#fieldset': 'area error', - ... }) + ... })) <form> <div class="error-message error-block">general error</div> <fieldset id="fieldset" class="error"> diff --git a/src/lxml/html/tests/test_forms.txt b/src/lxml/html/tests/test_forms.txt index 116f5af5..758c0db1 100644 --- a/src/lxml/html/tests/test_forms.txt +++ b/src/lxml/html/tests/test_forms.txt @@ -47,7 +47,7 @@ False >>> hidden.value 'hidden_value' >>> hidden.value = 'new value' ->>> print tostring(hidden) +>>> print(tostring(hidden)) <input type="hidden" name="hidden_field" value="new value"> >>> checkbox = f.inputs['single_checkbox'] >>> checkbox.checkable @@ -56,12 +56,12 @@ True 'checkbox' >>> checkbox.checked False ->>> print checkbox.value +>>> print(checkbox.value) None >>> checkbox.checked = True >>> checkbox.value 'on' ->>> print tostring(checkbox) +>>> print(tostring(checkbox)) <input type="checkbox" name="single_checkbox" checked> >>> checkbox2 = f.inputs['single_checkbox2'] >>> checkbox2.checked = True @@ -73,7 +73,7 @@ None >>> group.value.add('1') >>> group.value # doctest:+NOPARSE_MARKUP <CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'> ->>> print tostring(group[0]) +>>> print(tostring(group[0])) <input type="checkbox" name="check_group" value="1" checked> >>> group.value.add('doesnotexist') Traceback (most recent call last): @@ -90,15 +90,15 @@ KeyError: "No checkbox with value 'doesnotexist'" >>> radios.value = 'value1' >>> radios.value 'value1' ->>> print tostring(radios[0]) +>>> print(tostring(radios[0])) <input type="radio" name="radios" value="value1" id="value1" checked> >>> radios.value = None ->>> print tostring(radios[0]) +>>> print(tostring(radios[0])) <input type="radio" name="radios" value="value1" id="value1"> >>> radios.value_options ['value1', 'value2', 'value3'] >>> select = f.inputs['select1'] ->>> print select.value +>>> print(select.value) None >>> select.value = "" >>> select.value @@ -121,13 +121,13 @@ ValueError: There is no option with the value 'asdf' >>> select.value_options ['1', '2', '3'] >>> import urllib ->>> print urllib.urlencode(f.form_values()) +>>> print(urllib.urlencode(f.form_values())) hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3 >>> fields = f.fields >>> fields # doctest:+NOPARSE_MARKUP <FieldsDict for form 0> >>> for name, value in fields.items(): -... print '%s: %r' % (name, value) +... print('%s: %r' % (name, value)) textarea_field: 'some text' radios: None submit2: 'submit' diff --git a/src/lxml/html/tests/test_rewritelinks.txt b/src/lxml/html/tests/test_rewritelinks.txt index ad146552..674fcb2e 100644 --- a/src/lxml/html/tests/test_rewritelinks.txt +++ b/src/lxml/html/tests/test_rewritelinks.txt @@ -16,30 +16,30 @@ Some basics:: >>> from lxml.html import usedoctest, tostring >>> from lxml.html import rewrite_links - >>> print rewrite_links( - ... '<a href="http://old/blah/blah.html">link</a>', relocate_href) + >>> print(rewrite_links( + ... '<a href="http://old/blah/blah.html">link</a>', relocate_href)) <a href="https://new/blah/blah.html">link</a> - >>> print rewrite_links( - ... '<script src="http://old/foo.js"></script>', relocate_href) + >>> print(rewrite_links( + ... '<script src="http://old/foo.js"></script>', relocate_href)) <script src="https://new/foo.js"></script> - >>> print rewrite_links( - ... '<link href="foo.css">', relocate_href) + >>> print(rewrite_links( + ... '<link href="foo.css">', relocate_href)) <link href="https://new/base/foo.css"> - >>> print rewrite_links('''\ + >>> print(rewrite_links('''\ ... <base href="http://blah/stuff/index.html"> ... <link href="foo.css"> ... <a href="http://old/bar.html">x</a>\ - ... ''', relocate_href) + ... ''', relocate_href)) <link href="http://blah/stuff/foo.css"> <a href="https://new/bar.html">x</a> Links in CSS are also handled:: - >>> print rewrite_links(''' + >>> print(rewrite_links(''' ... <style> ... body {background-image: url(http://old/image.gif)}; ... @import "http://old/other-style.css"; - ... </style>''', relocate_href) + ... </style>''', relocate_href)) <html><head><style> body {background-image: url(https://new/image.gif)}; @import "https://new/other-style.css"; @@ -47,20 +47,20 @@ Links in CSS are also handled:: Those links in style attributes are also rewritten:: - >>> print rewrite_links(''' + >>> print(rewrite_links(''' ... <div style="background-image: url(http://old/image.gif)">text</div> - ... ''', relocate_href) + ... ''', relocate_href)) <div style="background-image: url(https://new/image.gif)">text</div> The ``<base href>`` tag is also respected (but also removed):: - >>> print rewrite_links(''' + >>> print(rewrite_links(''' ... <html><head> ... <base href="http://old/"> ... </head> ... <body> ... <a href="foo.html">link</a> - ... </body></html>''', relocate_href) + ... </body></html>''', relocate_href)) <html> <head></head> <body> @@ -82,7 +82,7 @@ link)``, which is awkward to test here, so we'll make a printer:: ... extra = '@%s' % pos ... else: ... extra = '' - ... print '%s %s="%s"%s' % (element.tag, attrib, link, extra) + ... print('%s %s="%s"%s' % (element.tag, attrib, link, extra)) >>> print_iter(iterlinks(''' ... <html> ... <head> diff --git a/src/lxml/html/tests/test_xhtml.txt b/src/lxml/html/tests/test_xhtml.txt index e13b7f4f..fd89585b 100644 --- a/src/lxml/html/tests/test_xhtml.txt +++ b/src/lxml/html/tests/test_xhtml.txt @@ -6,11 +6,11 @@ lxml.html has two parsers, one for HTML, one for XHTML: >>> html = "<html><body><p>Hi!</p></body></html>" >>> root = document_fromstring(html, parser=HTMLParser()) - >>> print root.tag + >>> print(root.tag) html >>> root = document_fromstring(html, parser=XHTMLParser()) - >>> print root.tag + >>> print(root.tag) html There are two functions for converting between HTML and XHTML: @@ -18,13 +18,13 @@ There are two functions for converting between HTML and XHTML: >>> from lxml.html import xhtml_to_html, html_to_xhtml >>> doc = document_fromstring(html, parser=HTMLParser()) - >>> print tostring(doc) + >>> print(tostring(doc)) <html><body><p>Hi!</p></body></html> >>> html_to_xhtml(doc) - >>> print tostring(doc) + >>> print(tostring(doc)) <html:html xmlns:html="http://www.w3.org/1999/xhtml"><html:body><html:p>Hi!</html:p></html:body></html:html> >>> xhtml_to_html(doc) - >>> print tostring(doc) + >>> print(tostring(doc)) <html xmlns:html="http://www.w3.org/1999/xhtml"><body><p>Hi!</p></body></html> diff --git a/src/lxml/tests/test_css.txt b/src/lxml/tests/test_css.txt index 9da84ed6..fbc8e7b0 100644 --- a/src/lxml/tests/test_css.txt +++ b/src/lxml/tests/test_css.txt @@ -3,7 +3,7 @@ A quick test of tokenizing: >>> from lxml.cssselect import tokenize, parse >>> def ptok(s): ... for item in tokenize(s): - ... print repr(item) + ... print(repr(item)) >>> ptok('E > f[a~="y\\"x"]') Symbol(u'E', 0) Token(u'>', 2) @@ -48,7 +48,7 @@ Then of parsing: Now of translation: >>> def xpath(css): - ... print parse(css).xpath() + ... print(parse(css).xpath()) >>> xpath('*') * >>> xpath('E') diff --git a/src/lxml/tests/test_errors.py b/src/lxml/tests/test_errors.py index 014f5ddd..cff9e2a6 100644 --- a/src/lxml/tests/test_errors.py +++ b/src/lxml/tests/test_errors.py @@ -6,9 +6,13 @@ import unittest, doctest # It is likely that if there are errors, instead of failing the code # will simply crash. -import sys, gc +import sys, gc, os.path from lxml import etree +this_dir = os.path.dirname(__file__) +if this_dir not in sys.path: + sys.path.insert(0, this_dir) # needed for Py3 + from common_imports import HelperTestCase class ErrorTestCase(HelperTestCase): diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py index 932bb52f..ccb1c7f2 100644 --- a/src/lxml/tests/test_htmlparser.py +++ b/src/lxml/tests/test_htmlparser.py @@ -5,7 +5,11 @@ HTML parser test cases for etree """ import unittest -import tempfile, os +import tempfile, os, os.path, sys + +this_dir = os.path.dirname(__file__) +if this_dir not in sys.path: + sys.path.insert(0, this_dir) # needed for Py3 from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str from common_imports import SillyFileLike, HelperTestCase |
