# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php ############################################################################## # # Copyright (c) 2001, 2002 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## ## Originally zExceptions.ExceptionFormatter from Zope; ## Modified by Ian Bicking, Imaginary Landscape, 2005 """ An exception collector that finds traceback information plus supplements """ import sys import traceback import time from six.moves import cStringIO as StringIO import linecache from paste.exceptions import serial_number_generator import warnings DEBUG_EXCEPTION_FORMATTER = True DEBUG_IDENT_PREFIX = 'E-' FALLBACK_ENCODING = 'UTF-8' __all__ = ['collect_exception', 'ExceptionCollector'] class ExceptionCollector(object): """ Produces a data structure that can be used by formatters to display exception reports. Magic variables: If you define one of these variables in your local scope, you can add information to tracebacks that happen in that context. This allows applications to add all sorts of extra information about the context of the error, including URLs, environmental variables, users, hostnames, etc. These are the variables we look for: ``__traceback_supplement__``: You can define this locally or globally (unlike all the other variables, which must be defined locally). ``__traceback_supplement__`` is a tuple of ``(factory, arg1, arg2...)``. When there is an exception, ``factory(arg1, arg2, ...)`` is called, and the resulting object is inspected for supplemental information. ``__traceback_info__``: This information is added to the traceback, usually fairly literally. ``__traceback_hide__``: If set and true, this indicates that the frame should be hidden from abbreviated tracebacks. This way you can hide some of the complexity of the larger framework and let the user focus on their own errors. By setting it to ``'before'``, all frames before this one will be thrown away. By setting it to ``'after'`` then all frames after this will be thrown away until ``'reset'`` is found. In each case the frame where it is set is included, unless you append ``'_and_this'`` to the value (e.g., ``'before_and_this'``). Note that formatters will ignore this entirely if the frame that contains the error wouldn't normally be shown according to these rules. ``__traceback_reporter__``: This should be a reporter object (see the reporter module), or a list/tuple of reporter objects. All reporters found this way will be given the exception, innermost first. ``__traceback_decorator__``: This object (defined in a local or global scope) will get the result of this function (the CollectedException defined below). It may modify this object in place, or return an entirely new object. This gives the object the ability to manipulate the traceback arbitrarily. The actually interpretation of these values is largely up to the reporters and formatters. ``collect_exception(*sys.exc_info())`` will return an object with several attributes: ``frames``: A list of frames ``exception_formatted``: The formatted exception, generally a full traceback ``exception_type``: The type of the exception, like ``ValueError`` ``exception_value``: The string value of the exception, like ``'x not in list'`` ``identification_code``: A hash of the exception data meant to identify the general exception, so that it shares this code with other exceptions that derive from the same problem. The code is a hash of all the module names and function names in the traceback, plus exception_type. This should be shown to users so they can refer to the exception later. (@@: should it include a portion that allows identification of the specific instance of the exception as well?) The list of frames goes innermost first. Each frame has these attributes; some values may be None if they could not be determined. ``modname``: the name of the module ``filename``: the filename of the module ``lineno``: the line of the error ``revision``: the contents of __version__ or __revision__ ``name``: the function name ``supplement``: an object created from ``__traceback_supplement__`` ``supplement_exception``: a simple traceback of any exception ``__traceback_supplement__`` created ``traceback_info``: the str() of any ``__traceback_info__`` variable found in the local scope (@@: should it str()-ify it or not?) ``traceback_hide``: the value of any ``__traceback_hide__`` variable ``traceback_log``: the value of any ``__traceback_log__`` variable ``__traceback_supplement__`` is thrown away, but a fixed set of attributes are captured; each of these attributes is optional. ``object``: the name of the object being visited ``source_url``: the original URL requested ``line``: the line of source being executed (for interpreters, like ZPT) ``column``: the column of source being executed ``expression``: the expression being evaluated (also for interpreters) ``warnings``: a list of (string) warnings to be displayed ``getInfo``: a function/method that takes no arguments, and returns a string describing any extra information ``extraData``: a function/method that takes no arguments, and returns a dictionary. The contents of this dictionary will not be displayed in the context of the traceback, but globally for the exception. Results will be grouped by the keys in the dictionaries (which also serve as titles). The keys can also be tuples of (importance, title); in this case the importance should be ``important`` (shows up at top), ``normal`` (shows up somewhere; unspecified), ``supplemental`` (shows up at bottom), or ``extra`` (shows up hidden or not at all). These are used to create an object with attributes of the same names (``getInfo`` becomes a string attribute, not a method). ``__traceback_supplement__`` implementations should be careful to produce values that are relatively static and unlikely to cause further errors in the reporting system -- any complex introspection should go in ``getInfo()`` and should ultimately return a string. Note that all attributes are optional, and under certain circumstances may be None or may not exist at all -- the collector can only do a best effort, but must avoid creating any exceptions itself. Formatters may want to use ``__traceback_hide__`` as a hint to hide frames that are part of the 'framework' or underlying system. There are a variety of rules about special values for this variables that formatters should be aware of. TODO: More attributes in __traceback_supplement__? Maybe an attribute that gives a list of local variables that should also be collected? Also, attributes that would be explicitly meant for the entire request, not just a single frame. Right now some of the fixed set of attributes (e.g., source_url) are meant for this use, but there's no explicit way for the supplement to indicate new values, e.g., logged-in user, HTTP referrer, environment, etc. Also, the attributes that do exist are Zope/Web oriented. More information on frames? cgitb, for instance, produces extensive information on local variables. There exists the possibility that getting this information may cause side effects, which can make debugging more difficult; but it also provides fodder for post-mortem debugging. However, the collector is not meant to be configurable, but to capture everything it can and let the formatters be configurable. Maybe this would have to be a configuration value, or maybe it could be indicated by another magical variable (which would probably mean 'show all local variables below this frame') """ show_revisions = 0 def __init__(self, limit=None): self.limit = limit def getLimit(self): limit = self.limit if limit is None: limit = getattr(sys, 'tracebacklimit', None) return limit def getRevision(self, globals): if not self.show_revisions: return None revision = globals.get('__revision__', None) if revision is None: # Incorrect but commonly used spelling revision = globals.get('__version__', None) if revision is not None: try: revision = str(revision).strip() except: revision = '???' return revision def collectSupplement(self, supplement, tb): result = {} for name in ('object', 'source_url', 'line', 'column', 'expression', 'warnings'): result[name] = getattr(supplement, name, None) func = getattr(supplement, 'getInfo', None) if func: result['info'] = func() else: result['info'] = None func = getattr(supplement, 'extraData', None) if func: result['extra'] = func() else: result['extra'] = None return SupplementaryData(**result) def collectLine(self, tb, extra_data): f = tb.tb_frame lineno = tb.tb_lineno co = f.f_code filename = co.co_filename name = co.co_name globals = f.f_globals locals = f.f_locals if not hasattr(locals, 'keys'): # Something weird about this frame; it's not a real dict warnings.warn( "Frame %s has an invalid locals(): %r" % ( globals.get('__name__', 'unknown'), locals)) locals = {} data = {} data['modname'] = globals.get('__name__', None) data['filename'] = filename data['lineno'] = lineno data['revision'] = self.getRevision(globals) data['name'] = name data['tbid'] = id(tb) # Output a traceback supplement, if any. if '__traceback_supplement__' in locals: # Use the supplement defined in the function. tbs = locals['__traceback_supplement__'] elif '__traceback_supplement__' in globals: # Use the supplement defined in the module. # This is used by Scripts (Python). tbs = globals['__traceback_supplement__'] else: tbs = None if tbs is not None: factory = tbs[0] args = tbs[1:] try: supp = factory(*args) data['supplement'] = self.collectSupplement(supp, tb) if data['supplement'].extra: for key, value in data['supplement'].extra.items(): extra_data.setdefault(key, []).append(value) except: if DEBUG_EXCEPTION_FORMATTER: out = StringIO() traceback.print_exc(file=out) text = out.getvalue() data['supplement_exception'] = text # else just swallow the exception. try: tbi = locals.get('__traceback_info__', None) if tbi is not None: data['traceback_info'] = str(tbi) except: pass marker = [] for name in ('__traceback_hide__', '__traceback_log__', '__traceback_decorator__'): try: tbh = locals.get(name, globals.get(name, marker)) if tbh is not marker: data[name[2:-2]] = tbh except: pass return data def collectExceptionOnly(self, etype, value): return traceback.format_exception_only(etype, value) def collectException(self, etype, value, tb, limit=None): # The next line provides a way to detect recursion. __exception_formatter__ = 1 frames = [] ident_data = [] traceback_decorators = [] if limit is None: limit = self.getLimit() n = 0 extra_data = {} while tb is not None and (limit is None or n < limit): if tb.tb_frame.f_locals.get('__exception_formatter__'): # Stop recursion. @@: should make a fake ExceptionFrame frames.append('(Recursive formatException() stopped)\n') break data = self.collectLine(tb, extra_data) frame = ExceptionFrame(**data) frames.append(frame) if frame.traceback_decorator is not None: traceback_decorators.append(frame.traceback_decorator) ident_data.append(frame.modname or '?') ident_data.append(frame.name or '?') tb = tb.tb_next n = n + 1 ident_data.append(str(etype)) ident = serial_number_generator.hash_identifier( ' '.join(ident_data), length=5, upper=True, prefix=DEBUG_IDENT_PREFIX) result = CollectedException( frames=frames, exception_formatted=self.collectExceptionOnly(etype, value), exception_type=etype, exception_value=self.safeStr(value), identification_code=ident, date=time.localtime(), extra_data=extra_data) if etype is ImportError: extra_data[('important', 'sys.path')] = [sys.path] for decorator in traceback_decorators: try: new_result = decorator(result) if new_result is not None: result = new_result except: pass return result def safeStr(self, obj): try: return str(obj) except UnicodeEncodeError: try: return unicode(obj).encode(FALLBACK_ENCODING, 'replace') except UnicodeEncodeError: # This is when something is really messed up, but this can # happen when the __str__ of an object has to handle unicode return repr(obj) limit = 200 class Bunch(object): """ A generic container """ def __init__(self, **attrs): for name, value in attrs.items(): setattr(self, name, value) def __repr__(self): name = '<%s ' % self.__class__.__name__ name += ' '.join(['%s=%r' % (name, str(value)[:30]) for name, value in self.__dict__.items() if not name.startswith('_')]) return name + '>' class CollectedException(Bunch): """ This is the result of collection the exception; it contains copies of data of interest. """ # A list of frames (ExceptionFrame instances), innermost last: frames = [] # The result of traceback.format_exception_only; this looks # like a normal traceback you'd see in the interactive interpreter exception_formatted = None # The *string* representation of the type of the exception # (@@: should we give the # actual class? -- we can't keep the # actual exception around, but the class should be safe) # Something like 'ValueError' exception_type = None # The string representation of the exception, from ``str(e)``. exception_value = None # An identifier which should more-or-less classify this particular # exception, including where in the code it happened. identification_code = None # The date, as time.localtime() returns: date = None # A dictionary of supplemental data: extra_data = {} class SupplementaryData(Bunch): """ The result of __traceback_supplement__. We don't keep the supplement object around, for fear of GC problems and whatnot. (@@: Maybe I'm being too superstitious about copying only specific information over) """ # These attributes are copied from the object, or left as None # if the object doesn't have these attributes: object = None source_url = None line = None column = None expression = None warnings = None # This is the *return value* of supplement.getInfo(): info = None class ExceptionFrame(Bunch): """ This represents one frame of the exception. Each frame is a context in the call stack, typically represented by a line number and module name in the traceback. """ # The name of the module; can be None, especially when the code # isn't associated with a module. modname = None # The filename (@@: when no filename, is it None or '?'?) filename = None # Line number lineno = None # The value of __revision__ or __version__ -- but only if # show_revision = True (by defaut it is false). (@@: Why not # collect this?) revision = None # The name of the function with the error (@@: None or '?' when # unknown?) name = None # A SupplementaryData object, if __traceback_supplement__ was found # (and produced no errors) supplement = None # If accessing __traceback_supplement__ causes any error, the # plain-text traceback is stored here supplement_exception = None # The str() of any __traceback_info__ value found traceback_info = None # The value of __traceback_hide__ traceback_hide = False # The value of __traceback_decorator__ traceback_decorator = None # The id() of the traceback scope, can be used to reference the # scope for use elsewhere tbid = None def get_source_line(self, context=0): """ Return the source of the current line of this frame. You probably want to .strip() it as well, as it is likely to have leading whitespace. If context is given, then that many lines on either side will also be returned. E.g., context=1 will give 3 lines. """ if not self.filename or not self.lineno: return None lines = [] for lineno in range(self.lineno-context, self.lineno+context+1): lines.append(linecache.getline(self.filename, lineno)) return ''.join(lines) if hasattr(sys, 'tracebacklimit'): limit = min(limit, sys.tracebacklimit) col = ExceptionCollector() def collect_exception(t, v, tb, limit=None): """ Collection an exception from ``sys.exc_info()``. Use like:: try: blah blah except: exc_data = collect_exception(*sys.exc_info()) """ return col.collectException(t, v, tb, limit=limit)