summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lxml/cssselect.py9
-rw-r--r--src/lxml/doctestcompare.py24
-rw-r--r--src/lxml/html/__init__.py5
-rw-r--r--src/lxml/html/diff.py9
-rw-r--r--src/lxml/html/formfill.py7
-rw-r--r--src/lxml/html/tests/test_clean.txt10
-rw-r--r--src/lxml/html/tests/test_diff.txt4
-rw-r--r--src/lxml/html/tests/test_formfill.txt16
-rw-r--r--src/lxml/html/tests/test_forms.txt18
-rw-r--r--src/lxml/html/tests/test_rewritelinks.txt30
-rw-r--r--src/lxml/html/tests/test_xhtml.txt10
-rw-r--r--src/lxml/tests/test_css.txt4
-rw-r--r--src/lxml/tests/test_errors.py6
-rw-r--r--src/lxml/tests/test_htmlparser.py6
14 files changed, 99 insertions, 59 deletions
diff --git a/src/lxml/cssselect.py b/src/lxml/cssselect.py
index 642244dd..5fe70033 100644
--- a/src/lxml/cssselect.py
+++ b/src/lxml/cssselect.py
@@ -901,6 +901,11 @@ class TokenStream(object):
self.source = source
self.peeked = None
self._peeking = False
+ try:
+ self.next_token = self.tokens.next
+ except AttributeError:
+ # Python 3
+ self.next_token = self.tokens.__next__
def next(self):
if self._peeking:
@@ -909,7 +914,7 @@ class TokenStream(object):
return self.peeked
else:
try:
- next = self.tokens.next()
+ next = self.next_token()
self.used.append(next)
return next
except StopIteration:
@@ -921,7 +926,7 @@ class TokenStream(object):
def peek(self):
if not self._peeking:
try:
- self.peeked = self.tokens.next()
+ self.peeked = self.next_token()
except StopIteration:
return None
self._peeking = True
diff --git a/src/lxml/doctestcompare.py b/src/lxml/doctestcompare.py
index 921a63f0..f988e43f 100644
--- a/src/lxml/doctestcompare.py
+++ b/src/lxml/doctestcompare.py
@@ -398,12 +398,18 @@ def temp_install(html=False, del_module=None):
# __record_outcome to be run, which signals the end of the __run
# method, at which point we restore the previous check_output
# implementation.
- check_func = frame.f_locals['check'].im_func
+ try:
+ check_func = frame.f_locals['check'].im_func
+ checker_check_func = checker.check_output.im_func
+ except AttributeError:
+ # Python 3
+ check_func = frame.f_locals['check'].__func__
+ checker_check_func = checker.check_output.__func__
# Because we can't patch up func_globals, this is the only global
# in check_output that we care about:
doctest.etree = etree
_RestoreChecker(dt_self, old_checker, checker,
- check_func, checker.check_output.im_func,
+ check_func, checker_check_func,
del_module)
class _RestoreChecker(object):
@@ -419,11 +425,17 @@ class _RestoreChecker(object):
self.install_clone()
self.install_dt_self()
def install_clone(self):
- self.func_code = self.check_func.func_code
- self.func_globals = self.check_func.func_globals
- self.check_func.func_code = self.clone_func.func_code
+ try:
+ self.func_code = self.check_func.func_code
+ self.func_globals = self.check_func.func_globals
+ self.check_func.func_code = self.clone_func.func_code
+ except AttributeError:
+ # Python 3
+ self.func_code = self.check_func.__code__
+ self.func_globals = self.check_func.__globals__
+ self.check_func.__code__ = self.clone_func.__code__
def uninstall_clone(self):
- self.check_func.func_code = self.func_code
+ self.check_func.__code__ = self.func_code
def install_dt_self(self):
self.prev_func = self.dt_self._DocTestRunner__record_outcome
self.dt_self._DocTestRunner__record_outcome = self
diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py
index a2e8f965..65ff4c0d 100644
--- a/src/lxml/html/__init__.py
+++ b/src/lxml/html/__init__.py
@@ -19,6 +19,11 @@ try:
except NameError:
# Python 2.3
from sets import Set as set
+try:
+ basestring = __builtins__["basestring"]
+except (KeyError, NameError):
+ # Python 3
+ basestring = str
__all__ = [
'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring',
diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py
index e0b333db..3b3ad8e7 100644
--- a/src/lxml/html/diff.py
+++ b/src/lxml/html/diff.py
@@ -11,6 +11,11 @@ try:
except NameError:
# Python 3
_unicode = str
+try:
+ basestring = __builtins__["basestring"]
+except (KeyError, NameError):
+ # Python 3
+ basestring = str
############################################################
## Annotation
@@ -748,9 +753,9 @@ def serialize_html_fragment(el, skip_outer=False):
html = etree.tostring(el, method="html", encoding="UTF-8")
if skip_outer:
# Get rid of the extra starting tag:
- html = html[html.find('>')+1:]
+ html = html[html.find('>'.encode("ASCII"))+1:]
# Get rid of the extra end tag:
- html = html[:html.rfind('<')]
+ html = html[:html.rfind('<'.encode("ASCII"))]
return html.strip()
else:
return html
diff --git a/src/lxml/html/formfill.py b/src/lxml/html/formfill.py
index ed4ccd7a..325df00a 100644
--- a/src/lxml/html/formfill.py
+++ b/src/lxml/html/formfill.py
@@ -2,6 +2,11 @@ from lxml.etree import XPath, ElementBase
from lxml.html import fromstring, tostring, XHTML_NAMESPACE
from lxml.html import _forms_xpath, _options_xpath, _nons
from lxml.html import defs
+try:
+ basestring = __builtins__["basestring"]
+except (KeyError, NameError):
+ # Python 3
+ basestring = str
__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
'insert_errors', 'insert_errors_html',
@@ -235,7 +240,7 @@ def insert_errors(
error_creator=default_error_creator,
):
el = _find_form(el, form_id=form_id, form_index=form_index)
- for name, error in errors.iteritems():
+ for name, error in errors.items():
if error is None:
continue
for error_el, message in _find_elements_for_name(el, name, error):
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index 255c5a1f..aa2e900f 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -27,7 +27,7 @@
... </body>
... </html>'''
->>> print doc
+>>> print(doc)
<html>
<head>
<script type="text/javascript" src="evil-site"></script>
@@ -53,7 +53,7 @@
</body>
</html>
->>> print tostring(fromstring(doc))
+>>> print(tostring(fromstring(doc)))
<html>
<head>
<script type="text/javascript" src="evil-site"></script>
@@ -79,7 +79,7 @@
</body>
</html>
->>> print Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)
+>>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
<style>/* deleted */</style>
@@ -96,8 +96,8 @@
</body>
</html>
->>> print Cleaner(style=True, links=True, add_nofollow=True,
-... page_structure=False, safe_attrs_only=False).clean_html(doc)
+>>> print(Cleaner(style=True, links=True, add_nofollow=True,
+... page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
</head>
diff --git a/src/lxml/html/tests/test_diff.txt b/src/lxml/html/tests/test_diff.txt
index 784fb307..0665b54b 100644
--- a/src/lxml/html/tests/test_diff.txt
+++ b/src/lxml/html/tests/test_diff.txt
@@ -6,7 +6,7 @@ First, a handy function for normalizing whitespace and doing word wrapping::
>>> def pwrapped(text):
... text = re.sub(r'[ \n\t\r]+', ' ', text)
... text = textwrap.fill(text)
- ... print text
+ ... print(text)
>>> def pdiff(text1, text2):
... pwrapped(htmldiff(text1, text2))
@@ -200,7 +200,7 @@ Some utility functions::
>>> from lxml.html.diff import fixup_ins_del_tags, split_unbalanced
>>> def pfixup(text):
- ... print fixup_ins_del_tags(text).strip()
+ ... print(fixup_ins_del_tags(text).strip())
>>> pfixup('<ins><p>some text <b>and more text</b> and more</p></ins>')
<p><ins>some text <b>and more text</b> and more</ins></p>
>>> pfixup('<p><ins>Hi!</ins> you</p>')
diff --git a/src/lxml/html/tests/test_formfill.txt b/src/lxml/html/tests/test_formfill.txt
index f06c4d65..84033537 100644
--- a/src/lxml/html/tests/test_formfill.txt
+++ b/src/lxml/html/tests/test_formfill.txt
@@ -5,17 +5,17 @@ Some basic imports:
The simplest kind of filling is just filling an input with a value:
- >>> print fill_form_html('''
- ... <form><input type="text" name="foo"></form>''', dict(foo='bar'))
+ >>> print(fill_form_html('''
+ ... <form><input type="text" name="foo"></form>''', dict(foo='bar')))
<form><input type="text" name="foo" value="bar"></form>
You can also fill multiple inputs, like:
- >>> print fill_form_html('''
+ >>> print(fill_form_html('''
... <form>
... <input type="text" name="foo">
... <input type="text" name="foo">
- ... </form>''', dict(foo=['bar1', 'bar2']))
+ ... </form>''', dict(foo=['bar1', 'bar2'])))
<form>
<input type="text" name="foo" value="bar1">
<input type="text" name="foo" value="bar2">
@@ -24,7 +24,7 @@ You can also fill multiple inputs, like:
Checkboxes can work either as boolean true/false, or be selected based
on their inclusion in a set of values::
- >>> print fill_form_html('''
+ >>> print(fill_form_html('''
... <form>
... Would you like to be spammed?
... <input type="checkbox" name="spam_me"> <br>
@@ -36,7 +36,7 @@ on their inclusion in a set of values::
... Other spam:
... <input type="checkbox" name="type" value="other"><br>
... <input type="submit" value="Spam!">
- ... </form>''', dict(spam_me=True, type=['viagra', 'other']))
+ ... </form>''', dict(spam_me=True, type=['viagra', 'other'])))
<form>
Would you like to be spammed?
<input type="checkbox" name="spam_me" checked> <br>
@@ -62,7 +62,7 @@ function you can provide (or the default function).
Example::
>>> from lxml.html.formfill import insert_errors_html
- >>> print insert_errors_html('''
+ >>> print(insert_errors_html('''
... <form>
... <fieldset id="fieldset">
... <input name="v1"><br>
@@ -80,7 +80,7 @@ Example::
... 'v4': "err4",
... None: 'general error',
... '#fieldset': 'area error',
- ... })
+ ... }))
<form>
<div class="error-message error-block">general error</div>
<fieldset id="fieldset" class="error">
diff --git a/src/lxml/html/tests/test_forms.txt b/src/lxml/html/tests/test_forms.txt
index 116f5af5..758c0db1 100644
--- a/src/lxml/html/tests/test_forms.txt
+++ b/src/lxml/html/tests/test_forms.txt
@@ -47,7 +47,7 @@ False
>>> hidden.value
'hidden_value'
>>> hidden.value = 'new value'
->>> print tostring(hidden)
+>>> print(tostring(hidden))
<input type="hidden" name="hidden_field" value="new value">
>>> checkbox = f.inputs['single_checkbox']
>>> checkbox.checkable
@@ -56,12 +56,12 @@ True
'checkbox'
>>> checkbox.checked
False
->>> print checkbox.value
+>>> print(checkbox.value)
None
>>> checkbox.checked = True
>>> checkbox.value
'on'
->>> print tostring(checkbox)
+>>> print(tostring(checkbox))
<input type="checkbox" name="single_checkbox" checked>
>>> checkbox2 = f.inputs['single_checkbox2']
>>> checkbox2.checked = True
@@ -73,7 +73,7 @@ None
>>> group.value.add('1')
>>> group.value # doctest:+NOPARSE_MARKUP
<CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
->>> print tostring(group[0])
+>>> print(tostring(group[0]))
<input type="checkbox" name="check_group" value="1" checked>
>>> group.value.add('doesnotexist')
Traceback (most recent call last):
@@ -90,15 +90,15 @@ KeyError: "No checkbox with value 'doesnotexist'"
>>> radios.value = 'value1'
>>> radios.value
'value1'
->>> print tostring(radios[0])
+>>> print(tostring(radios[0]))
<input type="radio" name="radios" value="value1" id="value1" checked>
>>> radios.value = None
->>> print tostring(radios[0])
+>>> print(tostring(radios[0]))
<input type="radio" name="radios" value="value1" id="value1">
>>> radios.value_options
['value1', 'value2', 'value3']
>>> select = f.inputs['select1']
->>> print select.value
+>>> print(select.value)
None
>>> select.value = ""
>>> select.value
@@ -121,13 +121,13 @@ ValueError: There is no option with the value 'asdf'
>>> select.value_options
['1', '2', '3']
>>> import urllib
->>> print urllib.urlencode(f.form_values())
+>>> print(urllib.urlencode(f.form_values()))
hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=&select2=1&select2=2&select2=3
>>> fields = f.fields
>>> fields # doctest:+NOPARSE_MARKUP
<FieldsDict for form 0>
>>> for name, value in fields.items():
-... print '%s: %r' % (name, value)
+... print('%s: %r' % (name, value))
textarea_field: 'some text'
radios: None
submit2: 'submit'
diff --git a/src/lxml/html/tests/test_rewritelinks.txt b/src/lxml/html/tests/test_rewritelinks.txt
index ad146552..674fcb2e 100644
--- a/src/lxml/html/tests/test_rewritelinks.txt
+++ b/src/lxml/html/tests/test_rewritelinks.txt
@@ -16,30 +16,30 @@ Some basics::
>>> from lxml.html import usedoctest, tostring
>>> from lxml.html import rewrite_links
- >>> print rewrite_links(
- ... '<a href="http://old/blah/blah.html">link</a>', relocate_href)
+ >>> print(rewrite_links(
+ ... '<a href="http://old/blah/blah.html">link</a>', relocate_href))
<a href="https://new/blah/blah.html">link</a>
- >>> print rewrite_links(
- ... '<script src="http://old/foo.js"></script>', relocate_href)
+ >>> print(rewrite_links(
+ ... '<script src="http://old/foo.js"></script>', relocate_href))
<script src="https://new/foo.js"></script>
- >>> print rewrite_links(
- ... '<link href="foo.css">', relocate_href)
+ >>> print(rewrite_links(
+ ... '<link href="foo.css">', relocate_href))
<link href="https://new/base/foo.css">
- >>> print rewrite_links('''\
+ >>> print(rewrite_links('''\
... <base href="http://blah/stuff/index.html">
... <link href="foo.css">
... <a href="http://old/bar.html">x</a>\
- ... ''', relocate_href)
+ ... ''', relocate_href))
<link href="http://blah/stuff/foo.css">
<a href="https://new/bar.html">x</a>
Links in CSS are also handled::
- >>> print rewrite_links('''
+ >>> print(rewrite_links('''
... <style>
... body {background-image: url(http://old/image.gif)};
... @import "http://old/other-style.css";
- ... </style>''', relocate_href)
+ ... </style>''', relocate_href))
<html><head><style>
body {background-image: url(https://new/image.gif)};
@import "https://new/other-style.css";
@@ -47,20 +47,20 @@ Links in CSS are also handled::
Those links in style attributes are also rewritten::
- >>> print rewrite_links('''
+ >>> print(rewrite_links('''
... <div style="background-image: url(http://old/image.gif)">text</div>
- ... ''', relocate_href)
+ ... ''', relocate_href))
<div style="background-image: url(https://new/image.gif)">text</div>
The ``<base href>`` tag is also respected (but also removed)::
- >>> print rewrite_links('''
+ >>> print(rewrite_links('''
... <html><head>
... <base href="http://old/">
... </head>
... <body>
... <a href="foo.html">link</a>
- ... </body></html>''', relocate_href)
+ ... </body></html>''', relocate_href))
<html>
<head></head>
<body>
@@ -82,7 +82,7 @@ link)``, which is awkward to test here, so we'll make a printer::
... extra = '@%s' % pos
... else:
... extra = ''
- ... print '%s %s="%s"%s' % (element.tag, attrib, link, extra)
+ ... print('%s %s="%s"%s' % (element.tag, attrib, link, extra))
>>> print_iter(iterlinks('''
... <html>
... <head>
diff --git a/src/lxml/html/tests/test_xhtml.txt b/src/lxml/html/tests/test_xhtml.txt
index e13b7f4f..fd89585b 100644
--- a/src/lxml/html/tests/test_xhtml.txt
+++ b/src/lxml/html/tests/test_xhtml.txt
@@ -6,11 +6,11 @@ lxml.html has two parsers, one for HTML, one for XHTML:
>>> html = "<html><body><p>Hi!</p></body></html>"
>>> root = document_fromstring(html, parser=HTMLParser())
- >>> print root.tag
+ >>> print(root.tag)
html
>>> root = document_fromstring(html, parser=XHTMLParser())
- >>> print root.tag
+ >>> print(root.tag)
html
There are two functions for converting between HTML and XHTML:
@@ -18,13 +18,13 @@ There are two functions for converting between HTML and XHTML:
>>> from lxml.html import xhtml_to_html, html_to_xhtml
>>> doc = document_fromstring(html, parser=HTMLParser())
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html><body><p>Hi!</p></body></html>
>>> html_to_xhtml(doc)
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html:html xmlns:html="http://www.w3.org/1999/xhtml"><html:body><html:p>Hi!</html:p></html:body></html:html>
>>> xhtml_to_html(doc)
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html xmlns:html="http://www.w3.org/1999/xhtml"><body><p>Hi!</p></body></html>
diff --git a/src/lxml/tests/test_css.txt b/src/lxml/tests/test_css.txt
index 9da84ed6..fbc8e7b0 100644
--- a/src/lxml/tests/test_css.txt
+++ b/src/lxml/tests/test_css.txt
@@ -3,7 +3,7 @@ A quick test of tokenizing:
>>> from lxml.cssselect import tokenize, parse
>>> def ptok(s):
... for item in tokenize(s):
- ... print repr(item)
+ ... print(repr(item))
>>> ptok('E > f[a~="y\\"x"]')
Symbol(u'E', 0)
Token(u'>', 2)
@@ -48,7 +48,7 @@ Then of parsing:
Now of translation:
>>> def xpath(css):
- ... print parse(css).xpath()
+ ... print(parse(css).xpath())
>>> xpath('*')
*
>>> xpath('E')
diff --git a/src/lxml/tests/test_errors.py b/src/lxml/tests/test_errors.py
index 014f5ddd..cff9e2a6 100644
--- a/src/lxml/tests/test_errors.py
+++ b/src/lxml/tests/test_errors.py
@@ -6,9 +6,13 @@ import unittest, doctest
# It is likely that if there are errors, instead of failing the code
# will simply crash.
-import sys, gc
+import sys, gc, os.path
from lxml import etree
+this_dir = os.path.dirname(__file__)
+if this_dir not in sys.path:
+ sys.path.insert(0, this_dir) # needed for Py3
+
from common_imports import HelperTestCase
class ErrorTestCase(HelperTestCase):
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py
index 932bb52f..ccb1c7f2 100644
--- a/src/lxml/tests/test_htmlparser.py
+++ b/src/lxml/tests/test_htmlparser.py
@@ -5,7 +5,11 @@ HTML parser test cases for etree
"""
import unittest
-import tempfile, os
+import tempfile, os, os.path, sys
+
+this_dir = os.path.dirname(__file__)
+if this_dir not in sys.path:
+ sys.path.insert(0, this_dir) # needed for Py3
from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str
from common_imports import SillyFileLike, HelperTestCase