dumper.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655

"""dumper

Dump Python data structures (including class instances) in a nicely-
nested, easy-to-read form.  Handles recursive data structures properly,
and has sensible options for limiting the extent of the dump both by
simple depth and by some rules for how to handle contained instances."""


__revision__ = "$Id$"


"""Dumping is generally accessed through the 'dump()' function:

    dump (any_python_object)

and is controlled by setting module-level global variables:

    import dumper

    dumper.max_depth = 10           # default is 5
    dumper.dump (really_deep_object)

'dump()' is nearly equivalent to 'print' with backquotes for
non-aggregate values (ie. anything *except* lists, tuples, dictionaries,
and class instances).  That is, strings, integers, floats, and other
numeric types are printed out "as-is", and funkier things like class
objects, function objects, and type objects are also dumped using their
traditional Python string representation.  For example:

    >>> dump ("Hello" + "world")
    'Helloworld'
    >>> class Foo: pass
    >>> dump (Foo)
    <class __main__.Foo at 271050>

'dump()' is slightly more interesting than 'print' for "short" lists,
tuples, and dictionaries.  (Lists and tuples are "short" when they have
no more than 10 elements and all elements are strings or numbers;
dictionaries are short when they have no more than 5 key/value pairs and
all keys and values are strings or numbers.)

For "short" lists and tuples, you get the 'id()' of the object and its
contents on one line:

    >>> dump (['foo', 3])
    <list at 0x242cb8> ['foo', 3]
    >>> dump ((3+4j, 'blah', 2L**50))
    <tuple at 0x20f208> ((3+4j), 'blah', 1125899906842624L)

"Short" dictionaries are similar:

    >>> d = {'foo': 5, 'bar': 3, 'bonk': 4-3j}
    >>> dump (d)
    <dictionary at 0x2737f0> {'foo': 5, 'bonk': (4-3j), 'bar': 3}

'dump()' is considerably more interesting than 'print' for lists,
tuples, and dictionaries that include more complex objects or are longer
than the 10-element/5-pair limit.  A long but simple list:

    >>> f = open ('/usr/dict/words')
    >>> dump (f.readlines())
    <list at 0x243738>
      0: '10th\012'
      1: '1st\012'
      2: '2nd\012'
    ...
      25138: 'zounds\012'
      25139: "z's\012"
      25140: 'zucchini\012'
      25141: 'Zurich\012'
      25142: 'zygote\012'

(Ellipsis added: 'dump()' just dumps the whole thing.)  Nested lists
also get multiline formatting, no matter how short and simple:

    >>> dump (['nested', ['list']])
    <list at 0x2436c0>
      0: 'nested'
      1: <list at 0x243658> ['list']

Note that since the inner list is "short" it is formatted on one line.
Deeply nested lists and tuples are more fun:

    >>> l = ["top", ('tuple', 'depth', 1), 
    ...      "top again", ["level 1", ["level", 2, ('deep', 'tuple')]]]
    >>> dump (l)
    <list at 0x243798>
      0: 'top'
      1: <tuple at 0x228ca8> ('tuple', 'depth', 1)
      2: 'top again'
      3: <list at 0x243888>
        0: 'level 1'
        1: <list at 0x243580>
          0: 'level'
          1: 2
          2: <tuple at 0x229228> ('deep', 'tuple')

Obviously, this is very handy for debugging complicated data structures.
Recursive data structures are not a problem:

    >>> l = [1, 2, 3]
    >>> l.append (l)
    >>> dump (l)
    <list at 0x243a98>
      0: 1
      1: 2
      2: 3
      3: <list at 0x243a98>: already seen

which is bulkier, but somewhat more informative than "[1, 2, 3, [...]]".

Dictionaries with aggregate keys or values also get multiline displays:

    >>> dump ({(1,0): 'keys', (0,1): 'fun'})
    <dictionary at 0x2754b8>
      (0, 1): 'fun'
      (1, 0): 'keys'

Note that when dictionaries are dumped in multiline format, they are
sorted by key.  In single-line format, 'dump()' just uses 'repr()', so
"short" dictionaries come out in hash order.  Also, no matter how
complicated dictionary *keys* are, they come out all on one line before
the colon.  (Using deeply nested dictionary keys requires a special kind
of madness, though, so you probably know what you're doing if you're
into that.)  Dictionary *values* are treated much like list/tuple
elements (one line if short, indented multiline display if not).

'dump()' is *much* more interesting than 'print' for class instances.
Simple example:

    >>> class Foo:
    ...   def __init__ (self):
    ...     self.a = 37; self.b = None; self.c = self
    ... 
    >>> f = Foo ()
    >>> dump (f)
    <Foo instance at 0x243990> 
      a: 37
      b: None
      c: <Foo instance at 0x243990>: already seen

A more interesting example using a contained instance and more recursion:

    >>> g = Foo ()
    >>> g.a = 42; g.b = [3, 5, 6, f]
    >>> dump (g)
    <Foo instance at 0x243b58> 
      a: 42
      b: <list at 0x243750>
        0: 3
        1: 5
        2: 6
        3: <Foo instance at 0x243990> 
          a: 37
          b: None
          c: <Foo instance at 0x243990>: already seen
      c: <Foo instance at 0x243b58>: already seen

Dumping a large instance that contains several other large instance gets
out of control pretty quickly.  'dump()' has a couple of options to help
you get a handle on this; normally, these are set by assigning to module
globals, but there's a nicer OO way of doing it if you like.  For
example, if you don't want 'dump()' to descend more than 3 levels into
your nested data structure:

    >>> import dumper
    >>> dumper.max_depth = 3
    >>> dumper.dump ([0, [1, [2, [3, [4]]]]])
    <list at 0x240ed0>
      0: 0
      1: <list at 0x240f18>
        0: 1
        1: <list at 0x254800>
          0: 2
          1: <list at 0x254818>: suppressed (too deep)

But note that max_depth does not apply to "short" lists (or tuples or
dictionaries):

    >>> dumper.dump ([0, [1, [2, [3, '3b', '3c']]]])
    <list at 0x240d68>
      0: 0
      1: <list at 0x254878>
        0: 1
        1: <list at 0x254890>
          0: 2
          1: <list at 0x2548c0> [3, '3b', '3c']

Since "short" lists (etc.) can't contain other aggregate objects, this
only bends the "max_depth" limit by one level, though, and it doesn't
increase the amount of output (but it does increase the amount of useful
information in the dump).

'max_depth' is a pretty blunt tool, though; as soon as you set it to N,
you'll find a structure of depth N+1 that you want to see all of.  And
anyways, dumps usually get out of control as a result of dumping large
contained class instances: hence, the more useful control is to tell
'dump()' when to dump contained instances.

The default is to dump contained instances when the two classes (that of
the parent and that of the child) are from the same module.  This
applies to classes defined in the main module or an interactive session
as well, hence:

    >>> class Foo: pass
    >>> class Bar: pass
    >>> f = Foo() ; b = Bar ()
    >>> f.b = b
    >>> f.a = 37
    >>> b.a = 42
    >>> dumper.dump (f)
    <Foo instance at 0x254890> 
      a: 37
      b: <Bar instance at 0x2549b0> 
        a: 42

Note that we have dumped f.b, the contained instance of Bar.  We can
control dumping of contained instances using the 'instance_dump' global;
for example, to completely disable dumping contained instances, set it
to 'none':

    >>> dumper.instance_dump = 'none'
    >>> dumper.dump (f)
    <Foo instance at 0x254890> 
      a: 37
      b: <Bar instance at 0x2549b0> : suppressed (contained instance)

This is the most restrictive mode for contained instance dumping.  The
default mode is 'module', meaning that 'dump()' will only dump contained
instances if both classes (parent and child) were defined in the same
module.  If the two classes were defined in different modules, e.g.

    >>> from foo import Foo
    >>> from bar import Bar
    >>> f = Foo () ; f.a = 42       
    >>> b = Bar () ; b.s = "hello"
    >>> f.child = b

then dumping the container ('f') results in something like

    >>> dumper.dump (f)
    <Foo instance at 0x241308> 
      a: 42
      child: <Bar instance at 0x241578> : suppressed (contained instance from different module)

Of course, you can always explicitly dump the contained instance:

    >>> dumper.dump (f.child)
    <Bar instance at 0x241578> 
      s: 'hello'

The next most permissive level is to dump contained instances as long as
their respective classes were defined in the same package.  Continuing
the above example:

    >>> dumper.instance_dump = 'package'
    >>> dumper.dump (f)
    <Foo instance at 0x241308> 
      a: 42
      child: <Bar instance at 0x241578> 
        s: 'hello'

But if the Foo and Bar classes had come from modules in different
packages, then dumping 'f' would look like:
    
    >>> dumper.dump (f)
    <Foo instance at 0x241350> 
      a: 42
      child: <Bar instance at 0x2415d8> : suppressed (contained instance from different package)

Only if you set 'instance_dump' to its most permissive setting, 'all',
will 'dump()' dump contained instances of classes in completely
different packages:

    >>> dumper.instance_dump = 'all'
    >>> dumper.dump (f)
    <Foo instance at 0x241350> 
      a: 42
      child: <Bar instance at 0x2415d8> 
        s: 'hello'
"""


import sys, string
from types import *

DICT_TYPES = {DictionaryType: 1}
try:
    from BTree import BTree
    DICT_TYPES[BTree] = 1
except ImportError:
    pass
try:
    from BTrees import OOBTree, OIBTree, IOBTree
    DICT_TYPES[OOBTree.OOBTree] = 1
    DICT_TYPES[OIBTree.OIBTree] = 1
    DICT_TYPES[IOBTree.IOBTree] = 1
except ImportError:
    pass

# 
# IDEAS on how to restrict how deep we go when dumping:
#   - never follow cyclic links! (handled with 'seen' hash)
#   - have a maximum dumping depth (for any nestable structure:
#     list/tuple, dictionary, instance)
#   - if in an instance: don't dump any other instances
#   - if in an instance: don't dump instances that belong to classes from
#     other packages
#   - if in an instance: don't dump instances that belong to classes from
#     other modules
# ...ok, these are all implemented now -- cool!

# restrictions on dumping
max_depth = 5
instance_dump = 'module'                # or 'all', 'none', 'package':
                                        # controls whether we dump an
                                        # instance that is under another
                                        # instance (however deep)

# arg -- this is necessary because the .__name__ of a type object
# under JPython is a bit ugly (eg. 'org.python.core.PyList' not 'list')
TYPE_NAMES = {
    BuiltinFunctionType: 'builtin',
    BuiltinMethodType: 'builtin',
    ClassType: 'class',
    CodeType: 'code',
    ComplexType: 'complex',
    DictType: 'dictionary',
    DictionaryType: 'dictionary',
    EllipsisType: 'ellipsis',
    FileType: 'file',
    FloatType: 'float',
    FrameType: 'frame',
    FunctionType: 'function',
    InstanceType: 'instance',
    IntType: 'int',
    LambdaType: 'function',
    ListType: 'list',
    LongType: 'long int',
    MethodType: 'instance method',
    ModuleType: 'module',
    NoneType: 'None',
    SliceType: 'slice',
    StringType: 'string',
    TracebackType: 'traceback',
    TupleType: 'tuple',
    TypeType: 'type',
    UnboundMethodType: 'instance method',
    UnicodeType: 'unicode',
    XRangeType: 'xrange',
    }

def get_type_name (type):
    try:
        return TYPE_NAMES[type]
    except KeyError:
        return type.__name__


class Dumper:

    def __init__ (self, max_depth=None, instance_dump=None, output=None):
        self._max_depth = max_depth
        self._instance_dump = instance_dump
        if output is None:
            self.out = sys.stdout
        else:
            self.out = output

    def __getattr__ (self, attr):
        if self.__dict__.has_key ('_' + attr):
            val = self.__dict__['_' + attr]
            if val is None:             # not defined in object;
                # attribute exists in instance (after adding _), but
                # not defined: get it from the module globals
                globals = vars(sys.modules[__name__])
                return globals.get(attr)
            else:
                # _ + attr exists and is defined (non-None)
                return val
        else:
            # _ + attr doesn't exist at all
            raise AttributeError, attr


    def __setattr__ (self, attr, val):
        if self.__dict__.has_key ('_' + attr):
            self.__dict__['_' + attr] = val
        else:
            self.__dict__[attr] = val

    def _writeln (self, line):
        self.out.write(line + "\n")


    def dump (self, val, indent='', summarize=1):
        self.seen = {}
        self.containing_instance = []
        self._dump (val, indent=indent, summarize=summarize)


    def _dump (self, val, depth=0, indent='', summarize=1):

        t = type (val)

        if short_value (val):
            self._writeln("%s%s" % (indent, short_dump (val)))

        else:
            depth = depth + 1

            if depth > self.max_depth:
                #raise SuppressedDump, "too deep"
                self._writeln(indent + "contents suppressed (too deep)")
                return

            if self.seen.get(id(val)):
                self._writeln(indent + "object already seen")
                return

            self.seen[id(val)] = 1

            if DICT_TYPES.has_key(t):
                if summarize:
                    self._writeln("%s%s:" % (indent, object_summary (val)))
                    indent = indent + '  '
                self.dump_dict (val, depth, indent)

            elif t in (ListType, TupleType):
                if summarize:
                    self._writeln("%s%s:" % (indent, object_summary (val)))
                    indent = indent + '  '
                self.dump_sequence (val, depth, indent)

            elif is_instance(val):
                self.dump_instance (val, depth, indent, summarize)

            else:
                raise RuntimeError, "this should not happen"

    # _dump ()


    def dump_dict (self, dict, depth, indent, shallow_attrs=()):
        keys = dict.keys()
        if type(keys) is ListType:
            keys.sort()

        for k in keys:
            val = dict[k]
            if short_value (val) or k in shallow_attrs:
                self._writeln("%s%s: %s" % (indent, k, short_dump (val)))
            else:
                self._writeln("%s%s: %s" % (indent, k, object_summary(val)))
                self._dump(val, depth, indent+'  ', summarize=0)


    def dump_sequence (self, seq, depth, indent):
        for i in range (len (seq)):
            val = seq[i]
            if short_value (val):
                self._writeln("%s%d: %s" % (indent, i, short_dump (val)))
            else:
                self._writeln("%s%d: %s" % (indent, i, object_summary(val)))
                self._dump(val, depth, indent+'  ', summarize=0)


    def dump_instance (self, inst, depth, indent, summarize=1):
        
        if summarize:
            self._writeln(indent + "%s " % object_summary (inst))
            indent = indent + '  '
        instance_dump = self.instance_dump

        # already dumping a containing instance, and have some restrictions
        # on instance-dumping?
        if self.containing_instance and instance_dump != 'all':

            previous_instance = self.containing_instance[-1]
            container_module = previous_instance.__class__.__module__
            container_package = (string.split (container_module, '.'))[0:-1]

            current_module = inst.__class__.__module__
            current_package = (string.split (current_module, '.'))[0:-1]

            #print "dumping instance contained in another instance %s:" % \
            #      previous_instance
            #print "  container module = %s" % container_module
            #print "  container package = %s" % container_package

            # inhibit dumping of all contained instances?
            if instance_dump == 'none':
                #raise SuppressedDump, "contained instance"
                self._writeln(
                    indent + "object contents suppressed (contained instance)")
                return

            # inhibit dumping instances from a different package?
            elif (instance_dump == 'package' and
                  current_package != container_package):
                #raise SuppressedDump, \
                #      "contained instance from different package"
                self._writeln(
                    indent + "object contents suppressed (instance from different package)")
                return

            # inhibit dumping instances from a different module?
            elif (instance_dump == 'module' and
                  current_module != container_module):
                #raise SuppressedDump, \
                #      "contained instance from different module"
                self._writeln(
                    indent + "object contents suppressed (instance from different module)")
                return

        # if in containing instance and have restrictions

        #self._writeln("")
        self.containing_instance.append (inst)
        shallow_attrs = getattr(inst, "_dump_shallow_attrs", [])
        self.dump_dict (vars (inst), depth, indent, shallow_attrs)
        del self.containing_instance[-1]


# end class Dumper


# -- Utility functions -------------------------------------------------

def atomic_type (t):
    return t in (NoneType, StringType, IntType, LongType, FloatType, ComplexType)


def short_value (val):
    #if atomic_type (type (val)):
    #    return 1

    t = type(val)

    if (not DICT_TYPES.has_key(t) and t not in (ListType, TupleType) and 
            not is_instance(val)):
        return 1

    elif t in (ListType, TupleType) and len (val) <= 10:
        for x in val:
            if not atomic_type (type (x)):
                return 0
        return 1

    elif DICT_TYPES.has_key(t) and len (val) <= 5:
        for (k,v) in val.items():
            if not (atomic_type (type (k)) and atomic_type (type (v))):
                return 0
        return 1

    else:
        return 0


def short_dump (val):
    if atomic_type(type(val)) or is_instance(val) or is_class(val):
        return `val`

    else:
        return object_summary (val) + ': ' + `val`
    
        
def object_summary (val):
    t = type (val)

    if is_instance(val):
        if hasattr(val, '__str__'):
            strform = ": " + str(val)
        else:
            strform = ""
        return "<%s at %x%s>" % (val.__class__.__name__, id (val), strform)

    elif is_class(val):
        return "<%s %s at 0x%x>" % (get_type_name(t), val.__name__, id (val))

    else:
        return "<%s at 0x%x>" % (get_type_name(t), id (val))
    

def is_instance (val):
    if type(val) is InstanceType:
        return 1
    # instance of extension class, but not an actual extension class
    elif (hasattr(val, '__class__') and
          hasattr(val, '__dict__') and
          not hasattr(val, '__bases__')):
        return 1
    else:
        return 0

def is_class (val):
    return hasattr(val, '__bases__')


default_dumper = Dumper()

def dump(val, output=None):
    if output is None:
        default_dumper.dump(val)
    else:
        Dumper(output=output).dump(val)

def dumps(val):
    from StringIO import StringIO
    out = StringIO()
    Dumper(output=out).dump(val)
    return out.getvalue()


if __name__ == "__main__":

    l1 = [3, 5, 'hello']
    t1 = ('uh', 'oh')
    l2 = ['foo', t1]
    d1 = {'k1': 'val1',
          'k2': l1,
          'k2': l2}

    dump (l1)
    dump (t1)
    dump (l2)
    dump (d1)

    dumper = Dumper (max_depth=1)
    l = ['foo', ['bar', 'baz', (1, 2, 3)]]
    dumper.dump (l)
    dumper.max_depth = 2
    dumper.dump (l)
    l[1][2] = tuple (range (11))
    dumper.dump (l)
    dumper.max_depth = None
    print dumper.max_depth
    
    class Foo: pass
    class Bar: pass

    f = Foo ()
    b = Bar ()

    f.a1 = 35
    f.a2 = l1
    f.a3 = l2
    f.b = b
    f.a4 = l2
    b.a1 = f
    b.a2 = None
    b.a3 = 'foo'

    dump (f)