1 files changed, 37 insertions, 21 deletions
diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py
index 0547ffd3..ac5b4244 100644
--- a/tests/test_examplefiles.py
+++ b/tests/test_examplefiles.py
@@ -20,29 +20,34 @@ from pygments.util import ClassNotFound
 
 STORE_OUTPUT = False
 
+STATS = {}
+
+TESTDIR = os.path.dirname(__file__)
+
+
 # generate methods
 def test_example_files():
-    testdir = os.path.dirname(__file__)
-    outdir = os.path.join(testdir, 'examplefiles', 'output')
+    global STATS
+    STATS = {}
+    outdir = os.path.join(TESTDIR, 'examplefiles', 'output')
     if STORE_OUTPUT and not os.path.isdir(outdir):
         os.makedirs(outdir)
-    for fn in os.listdir(os.path.join(testdir, 'examplefiles')):
+    for fn in os.listdir(os.path.join(TESTDIR, 'examplefiles')):
         if fn.startswith('.') or fn.endswith('#'):
             continue
 
-        absfn = os.path.join(testdir, 'examplefiles', fn)
+        absfn = os.path.join(TESTDIR, 'examplefiles', fn)
         if not os.path.isfile(absfn):
             continue
 
         print(absfn)
-        code = open(absfn, 'rb').read()
+        with open(absfn, 'rb') as f:
+            code = f.read()
         try:
             code = code.decode('utf-8')
         except UnicodeError:
             code = code.decode('latin1')
 
-        outfn = os.path.join(outdir, fn)
-
         lx = None
         if '_' in fn:
             try:
@@ -57,14 +62,25 @@ def test_example_files():
                                      'nor is of the form <lexer>_filename '
                                      'for overriding, thus no lexer found.'
                                      % fn)
-        yield check_lexer, lx, absfn, outfn
+        yield check_lexer, lx, fn
 
-def check_lexer(lx, absfn, outfn):
-    fp = open(absfn, 'rb')
-    try:
+    N = 7
+    stats = list(STATS.items())
+    stats.sort(key=lambda x: x[1][1])
+    print('\nExample files that took longest absolute time:')
+    for fn, t in stats[-N:]:
+        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t))
+    print()
+    stats.sort(key=lambda x: x[1][2])
+    print('\nExample files that took longest relative time:')
+    for fn, t in stats[-N:]:
+        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t))
+
+
+def check_lexer(lx, fn):
+    absfn = os.path.join(TESTDIR, 'examplefiles', fn)
+    with open(absfn, 'rb') as fp:
         text = fp.read()
-    finally:
-        fp.close()
     text = text.replace(b'\r\n', b'\n')
     text = text.strip(b'\n') + b'\n'
     try:
@@ -75,12 +91,17 @@ def check_lexer(lx, absfn, outfn):
         text = text.decode('latin1')
     ntext = []
     tokens = []
+    import time
+    t1 = time.time()
     for type, val in lx.get_tokens(text):
         ntext.append(val)
         assert type != Error, \
             'lexer %s generated error token for %s: %r at position %d' % \
             (lx, absfn, val, len(u''.join(ntext)))
         tokens.append((type, val))
+    t2 = time.time()
+    STATS[os.path.basename(absfn)] = (len(text),
+                                      1000 * (t2 - t1), 1000 * (t2 - t1) / len(text))
     if u''.join(ntext) != text:
         print('\n'.join(difflib.unified_diff(u''.join(ntext).splitlines(),
                                              text.splitlines())))
@@ -89,19 +110,14 @@ def check_lexer(lx, absfn, outfn):
     # check output against previous run if enabled
     if STORE_OUTPUT:
         # no previous output -- store it
+        outfn = os.path.join(TESTDIR, 'examplefiles', 'output', fn)
         if not os.path.isfile(outfn):
-            fp = open(outfn, 'wb')
-            try:
+            with open(outfn, 'wb') as fp:
                 pickle.dump(tokens, fp)
-            finally:
-                fp.close()
             return
         # otherwise load it and compare
-        fp = open(outfn, 'rb')
-        try:
+        with open(outfn, 'rb') as fp:
             stored_tokens = pickle.load(fp)
-        finally:
-            fp.close()
         if stored_tokens != tokens:
             f1 = pprint.pformat(stored_tokens)
             f2 = pprint.pformat(tokens)