# -*- coding: utf-8 -*- """ sphinx.linkcheck ~~~~~~~~~~~~~~~~ The CheckExternalLinksBuilder class. :copyright: 2008 by Georg Brandl, Thomas Lamb. :license: BSD. """ import socket from os import path from urllib2 import build_opener, HTTPError from docutils import nodes from sphinx.builder import Builder from sphinx.util.console import purple, red, darkgreen # create an opener that will simulate a browser user-agent opener = build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] class CheckExternalLinksBuilder(Builder): """ Checks for broken external links. """ name = 'linkcheck' def init(self): self.good = set() self.broken = {} self.redirected = {} # set a timeout for non-responding servers socket.setdefaulttimeout(5.0) # create output file open(path.join(self.outdir, 'output.txt'), 'w').close() def get_target_uri(self, docname, typ=None): return '' def get_outdated_docs(self): return self.env.found_docs def prepare_writing(self, docnames): return def write_doc(self, docname, doctree): self.info() for node in doctree.traverse(nodes.reference): try: self.check(node, docname) except KeyError: continue def check(self, node, docname): uri = node['refuri'] if '#' in uri: uri = uri.split('#')[0] if uri in self.good: return lineno = None while lineno is None and node: node = node.parent lineno = node.line if uri[0:5] == 'http:' or uri[0:6] == 'https:': self.info(uri, nonl=1) if uri in self.broken: (r, s) = self.broken[uri] elif uri in self.redirected: (r, s) = self.redirected[uri] else: (r, s) = self.resolve(uri) if r == 0: self.info(' - ' + darkgreen('working')) self.good.add(uri) elif r == 2: self.info(' - ' + red('broken: ') + s) self.write_entry('broken', docname, lineno, uri + ': ' + s) self.broken[uri] = (r, s) if self.app.quiet: self.warn('%s:%s: broken link: %s' % (docname, lineno, uri)) else: self.info(' - ' + purple('redirected') + ' to ' + s) self.write_entry('redirected', docname, lineno, uri + ' to ' + s) self.redirected[uri] = (r, s) elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:': return else: self.warn(uri + ' - ' + red('malformed!')) self.write_entry('malformed', docname, lineno, uri) if self.app.quiet: self.warn('%s:%s: malformed link: %s' % (docname, lineno, uri)) self.app.statuscode = 1 if self.broken: self.app.statuscode = 1 def write_entry(self, what, docname, line, uri): output = open(path.join(self.outdir, 'output.txt'), 'a') output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None), line, what, uri)) output.close() def resolve(self, uri): try: f = opener.open(uri) f.close() except HTTPError, err: #if err.code == 403 and uri.startswith('http://en.wikipedia.org/'): # # Wikipedia blocks requests from urllib User-Agent # return (0, 0) return (2, str(err)) except Exception, err: return (2, str(err)) if f.url.rstrip('/') == uri.rstrip('/'): return (0, 0) else: return (1, f.url) def finish(self): return