When bad HTML is encountered, ignore the page rather than failing with

a traceback.
author: Mark Hammond <mhammond@skippinet.com.au> 2003-02-27 06:59:10 +0000
committer: Mark Hammond <mhammond@skippinet.com.au> 2003-02-27 06:59:10 +0000
commit: ce56c377a0f548cdac3ab9c66117df654f934484 (patch)
tree: b5bd56edb43d9b59db7203b80b13c42d22337b3a /Tools/webchecker/webchecker.py
parent: 05595e9d73b2c05fcd9492cf8f5d126282b82053 (diff)
download: cpython-git-ce56c377a0f548cdac3ab9c66117df654f934484.tar.gz
1 files changed, 9 insertions, 1 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index e8d0ed746f..e89529e5cf 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -400,7 +400,15 @@ class Checker:
         if local_fragment and self.nonames:
             self.markdone(url_pair)
             return
-        page = self.getpage(url_pair)
+        try:
+            page = self.getpage(url_pair)
+        except sgmllib.SGMLParseError, msg:
+            msg = self.sanitize(msg)
+            self.note(0, "Error parsing %s: %s",
+                          self.format_url(url_pair), msg)
+            # Dont actually mark the URL as bad - it exists, just
+            # we can't parse it!
+            page = None
         if page:
             # Store the page which corresponds to this URL.
             self.name_table[url] = page
author	Mark Hammond <mhammond@skippinet.com.au>	2003-02-27 06:59:10 +0000
committer	Mark Hammond <mhammond@skippinet.com.au>	2003-02-27 06:59:10 +0000
commit	ce56c377a0f548cdac3ab9c66117df654f934484 (patch)
tree	b5bd56edb43d9b59db7203b80b13c42d22337b3a /Tools/webchecker/webchecker.py
parent	05595e9d73b2c05fcd9492cf8f5d126282b82053 (diff)
download	cpython-git-ce56c377a0f548cdac3ab9c66117df654f934484.tar.gz