From 155a422986c61c399594b3be77508726c9ff0db8 Mon Sep 17 00:00:00 2001
From: zax <zach.smith@makespace.com>
Date: Wed, 28 Oct 2015 21:35:44 -0400
Subject: (formatting) pep8

---
 .gitignore    |  3 ++-
 pycco/main.py | 82 ++++++++++++++++++++++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 36 deletions(-)
diff --git a/.gitignore b/.gitignore
index 07b20dd..d4f3e3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 /Pycco.egg-info
 build/*
 dist/*
-docs/*
\ No newline at end of file
+docs/*
+/tags
diff --git a/pycco/main.py b/pycco/main.py
index 09584b3..e20430d 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -33,6 +33,7 @@ Or, to install the latest source
 
 # === Main Documentation Generation Functions ===
 
+
 def generate_documentation(source, outdir=None, preserve_paths=True,
                            language=None):
     """
@@ -49,6 +50,7 @@ def generate_documentation(source, outdir=None, preserve_paths=True,
     highlight(source, sections, language, preserve_paths=preserve_paths, outdir=outdir)
     return generate_html(source, sections, preserve_paths=preserve_paths, outdir=outdir)
 
+
 def parse(source, code, language):
     """
     Given a string of source code, parse out each comment and the code that
@@ -76,7 +78,6 @@ def parse(source, code, language):
                 lines.pop(linenum)
                 break
 
-
     def save(docs, code):
         if docs or code:
             sections.append({
@@ -92,7 +93,8 @@ def parse(source, code, language):
 
         # Only go into multiline comments section when one of the delimiters is
         # found to be at the start of a line
-        if all(multi_line_delimiters) and any([line.lstrip().startswith(delim) or line.rstrip().endswith(delim) for delim in multi_line_delimiters]):
+        if all(multi_line_delimiters) and any([line.lstrip().startswith(delim) or
+                                               line.rstrip().endswith(delim) for delim in multi_line_delimiters]):
             if not multi_line:
                 multi_line = True
 
@@ -100,8 +102,8 @@ def parse(source, code, language):
                 multi_line = False
 
             if (multi_line
-               and line.strip().endswith(language.get("multiend"))
-               and len(line.strip()) > len(language.get("multiend"))):
+                    and line.strip().endswith(language.get("multiend"))
+                    and len(line.strip()) > len(language.get("multiend"))):
                 multi_line = False
 
             # Get rid of the delimiters so that they aren't in the final docs
@@ -137,13 +139,13 @@ def parse(source, code, language):
             has_code = True
             code_text += line + '\n'
 
-
     save(docs_text, code_text)
 
     return sections
 
 # === Preprocessing the comments ===
 
+
 def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
     """
     Add cross-references before having the text processed by markdown.  It's
@@ -157,6 +159,7 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
 
     if not outdir:
         raise TypeError("Missing the required 'outdir' keyword argument.")
+
     def sanitize_section_name(name):
         return "-".join(name.lower().strip().split(" "))
 
@@ -178,9 +181,9 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
 
     def replace_section_name(match):
         return '%(lvl)s <span id="%(id)s" href="%(id)s">%(name)s</span>' % {
-            "lvl"  : re.sub('=', '#', match.group(1)),
-            "id"   : sanitize_section_name(match.group(2)),
-            "name" : match.group(2)
+            "lvl": re.sub('=', '#', match.group(1)),
+            "id": sanitize_section_name(match.group(2)),
+            "name": match.group(2)
         }
 
     comment = re.sub('^([=]+)([^=]+)[=]*\s*$', replace_section_name, comment)
@@ -190,6 +193,7 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
 
 # === Highlighting the source code ===
 
+
 def highlight(source, sections, language, preserve_paths=True, outdir=None):
     """
     Highlights a single chunk of code using the **Pygments** module, and runs
@@ -223,6 +227,7 @@ def highlight(source, sections, language, preserve_paths=True, outdir=None):
 
 # === HTML Code generation ===
 
+
 def generate_html(source, sections, preserve_paths=True, outdir=None):
     """
     Once all of the code is finished highlighting, we can generate the HTML file
@@ -245,12 +250,12 @@ def generate_html(source, sections, preserve_paths=True, outdir=None):
         sect["code_html"] = re.sub(r"\{\{", r"__DOUBLE_OPEN_STACHE__", sect["code_html"])
 
     rendered = pycco_template({
-        "title"       : title,
-        "stylesheet"  : csspath,
-        "sections"    : sections,
-        "source"      : source,
-        "path"        : path,
-        "destination" : destination
+        "title": title,
+        "stylesheet": csspath,
+        "sections": sections,
+        "source": source,
+        "path": path,
+        "destination": destination
     })
 
     return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8")
@@ -276,39 +281,39 @@ from pygments import lexers, formatters
 # the name of the Pygments lexer and the symbol that indicates a comment. To
 # add another language to Pycco's repertoire, add it here.
 languages = {
-    ".coffee": { "name": "coffee-script", "symbol": "#",
-        "multistart": '###', "multiend": '###' },
+    ".coffee": {"name": "coffee-script", "symbol": "#",
+                "multistart": '###', "multiend": '###'},
 
-    ".pl":  { "name": "perl", "symbol": "#" },
+    ".pl":  {"name": "perl", "symbol": "#"},
 
-    ".sql": { "name": "sql", "symbol": "--" },
+    ".sql": {"name": "sql", "symbol": "--"},
 
-    ".c":   { "name": "c", "symbol": "//",
-        "multistart": "/*", "multiend": "*/"},
+    ".c":   {"name": "c", "symbol": "//",
+             "multistart": "/*", "multiend": "*/"},
 
-    ".cpp": { "name": "cpp", "symbol": "//"},
+    ".cpp": {"name": "cpp", "symbol": "//"},
 
-    ".js": { "name": "javascript", "symbol": "//",
-        "multistart": "/*", "multiend": "*/"},
+    ".js": {"name": "javascript", "symbol": "//",
+            "multistart": "/*", "multiend": "*/"},
 
-    ".rb": { "name": "ruby", "symbol": "#",
-        "multistart": "=begin", "multiend": "=end"},
+    ".rb": {"name": "ruby", "symbol": "#",
+            "multistart": "=begin", "multiend": "=end"},
 
-    ".py": { "name": "python", "symbol": "#",
-        "multistart": '"""', "multiend": '"""' },
+    ".py": {"name": "python", "symbol": "#",
+            "multistart": '"""', "multiend": '"""' },
 
-    ".scm": { "name": "scheme", "symbol": ";;",
-        "multistart": "#|", "multiend": "|#"},
+    ".scm": {"name": "scheme", "symbol": ";;",
+             "multistart": "#|", "multiend": "|#"},
 
-    ".lua": { "name": "lua", "symbol": "--",
-        "multistart": "--[[", "multiend": "--]]"},
+    ".lua": {"name": "lua", "symbol": "--",
+             "multistart": "--[[", "multiend": "--]]"},
 
-    ".erl": { "name": "erlang", "symbol": "%%" },
+    ".erl": {"name": "erlang", "symbol": "%%"},
 
-    ".tcl":  { "name": "tcl", "symbol": "#" },
+    ".tcl":  {"name": "tcl", "symbol": "#"},
 
-    ".hs": { "name": "haskell", "symbol": "--",
-        "multistart": "{-", "multiend": "-}"},
+    ".hs": {"name": "haskell", "symbol": "--",
+            "multistart": "{-", "multiend": "-}"},
 }
 
 # Build out the appropriate matchers and delimiters for each language.
@@ -327,6 +332,7 @@ for ext, l in languages.items():
     # Get the Pygments Lexer for this language.
     l["lexer"] = lexers.get_lexer_by_name(l["name"])
 
+
 def get_language(source, code, language=None):
     """Get the current language we're documenting, based on the extension."""
 
@@ -348,6 +354,7 @@ def get_language(source, code, language=None):
         else:
             raise ValueError("Can't figure out the language!")
 
+
 def destination(filepath, preserve_paths=True, outdir=None):
     """
     Compute the destination HTML path for an input source file path. If the
@@ -365,6 +372,7 @@ def destination(filepath, preserve_paths=True, outdir=None):
         name = path.join(dirname, name)
     return path.join(outdir, "%s.html" % name)
 
+
 def shift(list, default):
     """
     Shift items off the front of the `list` until it is empty, then return
@@ -376,12 +384,14 @@ def shift(list, default):
     except IndexError:
         return default
 
+
 def ensure_directory(directory):
     """Ensure that the destination directory exists."""
 
     if not os.path.isdir(directory):
         os.makedirs(directory)
 
+
 def template(source):
     return lambda context: pystache.render(source, context)
 
@@ -397,6 +407,7 @@ highlight_start = "<div class=\"highlight\"><pre>"
 # The end of each Pygments highlight block.
 highlight_end = "</pre></div>"
 
+
 def process(sources, preserve_paths=True, outdir=None, language=None):
     """For each source file passed as argument, generate the documentation."""
 
@@ -451,6 +462,7 @@ def monitor(sources, opts):
 
     class RegenerateHandler(watchdog.events.FileSystemEventHandler):
         """A handler for recompiling files which triggered watchdog events"""
+
         def on_modified(self, event):
             """Regenerate documentation for a file which triggered an event"""
             # Re-generate documentation from a source file if it was listed on
-- 
cgit v1.2.1


From 1fc58dd74503e3346314a51c9427de1a1228414c Mon Sep 17 00:00:00 2001
From: zax <zach.smith@makespace.com>
Date: Wed, 28 Oct 2015 21:36:05 -0400
Subject: Requirements, cleanup and tests

---
 .gitignore                |  6 +++
 pycco/main.py             | 94 +++++++++++++++++++++++++----------------------
 pycco/tests/__init__.py   |  0
 pycco/tests/test_pycco.py | 29 +++++++++++++++
 requirements.txt          |  2 +
 5 files changed, 87 insertions(+), 44 deletions(-)
 create mode 100644 pycco/tests/__init__.py
 create mode 100644 pycco/tests/test_pycco.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
index d4f3e3a..5306a68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,9 @@ build/*
 dist/*
 docs/*
 /tags
+
+.cache
+.hypothesis
+.ropeproject
+
+.DS_Store
diff --git a/pycco/main.py b/pycco/main.py
index e20430d..6b8abac 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -46,12 +46,12 @@ def generate_documentation(source, outdir=None, preserve_paths=True,
         raise TypeError("Missing the required 'outdir' keyword argument.")
     code = open(source, "r").read()
     language = get_language(source, code, language=language)
-    sections = parse(source, code, language)
-    highlight(source, sections, language, preserve_paths=preserve_paths, outdir=outdir)
+    sections = parse(code, language)
+    highlight(sections, language, preserve_paths=preserve_paths, outdir=outdir)
     return generate_html(source, sections, preserve_paths=preserve_paths, outdir=outdir)
 
 
-def parse(source, code, language):
+def parse(code, language):
     """
     Given a string of source code, parse out each comment and the code that
     follows it, and create an individual **section** for it.
@@ -78,7 +78,7 @@ def parse(source, code, language):
                 lines.pop(linenum)
                 break
 
-    def save(docs, code):
+    def save(docs, code, sections):
         if docs or code:
             sections.append({
                 "docs_text": docs,
@@ -87,66 +87,64 @@ def parse(source, code, language):
 
     # Setup the variables to get ready to check for multiline comments
     multi_line = False
-    multi_line_delimiters = [language.get("multistart"), language.get("multiend")]
+    multistart, multiend = [language.get("multistart"), language.get("multiend")]
+    comment_matcher = language['comment_matcher']
 
     for line in lines:
-
         # Only go into multiline comments section when one of the delimiters is
         # found to be at the start of a line
-        if all(multi_line_delimiters) and any([line.lstrip().startswith(delim) or
-                                               line.rstrip().endswith(delim) for delim in multi_line_delimiters]):
-            if not multi_line:
-                multi_line = True
-
-            else:
-                multi_line = False
+        if multistart and multiend and \
+           any(line.lstrip().startswith(delim) or line.rstrip().endswith(delim)
+                for delim in (multistart, multiend)):
+            multi_line = not multi_line
 
             if (multi_line
-                    and line.strip().endswith(language.get("multiend"))
-                    and len(line.strip()) > len(language.get("multiend"))):
+                    and line.strip().endswith(multiend)
+                    and len(line.strip()) > len(multiend)):
                 multi_line = False
 
             # Get rid of the delimiters so that they aren't in the final docs
-            line = line.replace(language["multistart"], '')
-            line = line.replace(language["multiend"], '')
+            line = line.replace(multistart, '')
+            line = line.replace(multiend, '')
             docs_text += line.strip() + '\n'
             indent_level = re.match("\s*", line).group(0)
 
             if has_code and docs_text.strip():
-                save(docs_text, code_text[:-1])
+                save(docs_text, code_text[:-1], sections)
                 code_text = code_text.split('\n')[-1]
                 has_code = docs_text = ''
 
         elif multi_line:
             # Remove leading spaces
-            if re.match(r' {%d}' % len(indent_level), line):
+            if re.match(r' {:d}'.format(len(indent_level), line)):
                 docs_text += line[len(indent_level):] + '\n'
             else:
                 docs_text += line + '\n'
 
-        elif re.match(language["comment_matcher"], line):
+        elif re.match(comment_matcher, line):
             if has_code:
-                save(docs_text, code_text)
+                save(docs_text, code_text, sections)
                 has_code = docs_text = code_text = ''
-            docs_text += re.sub(language["comment_matcher"], "", line) + "\n"
+            docs_text += re.sub(comment_matcher, "", line) + "\n"
 
         else:
-            if code_text and any([line.lstrip().startswith(x) for x in ['class ', 'def ', '@']]):
+            if code_text and any(line.lstrip().startswith(x)
+                                 for x in ['class ', 'def ', '@']):
                 if not code_text.lstrip().startswith("@"):
-                    save(docs_text, code_text)
+                    save(docs_text, code_text, sections)
                     code_text = has_code = docs_text = ''
 
             has_code = True
             code_text += line + '\n'
 
-    save(docs_text, code_text)
+    save(docs_text, code_text, sections)
 
     return sections
 
 # === Preprocessing the comments ===
 
 
-def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
+def preprocess(comment, preserve_paths=True, outdir=None):
     """
     Add cross-references before having the text processed by markdown.  It's
     possible to reference another file, like this : `[[main.py]]` which renders
@@ -167,24 +165,27 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
         # Check if the match contains an anchor
         if '#' in match.group(1):
             name, anchor = match.group(1).split('#')
-            return " [%s](%s#%s)" % (name,
-                                     path.basename(destination(name,
-                                                               preserve_paths=preserve_paths,
-                                                               outdir=outdir)),
-                                     anchor)
+            return " [{}]({}#{})".format(name,
+                                         path.basename(destination(name,
+                                                                   preserve_paths=preserve_paths,
+                                                                   outdir=outdir)),
+                                         anchor)
 
         else:
-            return " [%s](%s)" % (match.group(1),
-                                  path.basename(destination(match.group(1),
-                                                            preserve_paths=preserve_paths,
-                                                            outdir=outdir)))
+            return " [{}]({})".format(match.group(1),
+                                      path.basename(destination(match.group(1),
+                                                                preserve_paths=preserve_paths,
+                                                                outdir=outdir)))
 
     def replace_section_name(match):
-        return '%(lvl)s <span id="%(id)s" href="%(id)s">%(name)s</span>' % {
-            "lvl": re.sub('=', '#', match.group(1)),
-            "id": sanitize_section_name(match.group(2)),
-            "name": match.group(2)
-        }
+        """
+        Replace equals-sign-formatted section names with anchor links.
+        """
+        return '{lvl} <span id="{id}" href="{id}">{name}</span>'.format(
+            lvl=re.sub('=', '#', match.group(1)),
+            id=sanitize_section_name(match.group(2)),
+            name=match.group(2)
+        )
 
     comment = re.sub('^([=]+)([^=]+)[=]*\s*$', replace_section_name, comment)
     comment = re.sub('[^`]\[\[(.+?)\]\]', replace_crossref, comment)
@@ -194,7 +195,7 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
 # === Highlighting the source code ===
 
 
-def highlight(source, sections, language, preserve_paths=True, outdir=None):
+def highlight(sections, language, preserve_paths=True, outdir=None):
     """
     Highlights a single chunk of code using the **Pygments** module, and runs
     the text of its corresponding comment through **Markdown**.
@@ -220,7 +221,6 @@ def highlight(source, sections, language, preserve_paths=True, outdir=None):
         except UnicodeError:
             docs_text = unicode(section["docs_text"].decode('utf-8'))
         section["docs_html"] = markdown(preprocess(docs_text,
-                                                   i,
                                                    preserve_paths=preserve_paths,
                                                    outdir=outdir))
         section["num"] = i
@@ -370,7 +370,13 @@ def destination(filepath, preserve_paths=True, outdir=None):
         name = filename
     if preserve_paths:
         name = path.join(dirname, name)
-    return path.join(outdir, "%s.html" % name)
+    dest = path.join(outdir, u"{}.html".format(name))
+    # If `join` is passed an absolute path, it will ignore any earlier path
+    # elements. We will force outdir to the beginning of the path to avoid
+    # writing outside our destination.
+    if not dest.startswith(outdir):
+        dest = outdir + os.sep + dest
+    return dest
 
 
 def shift(list, default):
@@ -438,7 +444,7 @@ def process(sources, preserve_paths=True, outdir=None, language=None):
                 f.write(generate_documentation(s, preserve_paths=preserve_paths, outdir=outdir,
                                                language=language))
 
-            print "pycco = %s -> %s" % (s, dest)
+            print "pycco = {} -> {}".format(s, dest)
 
             if sources:
                 next_file()
diff --git a/pycco/tests/__init__.py b/pycco/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pycco/tests/test_pycco.py b/pycco/tests/test_pycco.py
new file mode 100644
index 0000000..43abe36
--- /dev/null
+++ b/pycco/tests/test_pycco.py
@@ -0,0 +1,29 @@
+from hypothesis import given
+from hypothesis.strategies import lists, text, booleans, integers
+import pycco.main as p
+import copy
+
+
+@given(lists(text()), text())
+def test_shift(fragments, default):
+    if fragments == []:
+        assert p.shift(fragments, default) == default
+    else:
+        fragments2 = copy.copy(fragments)
+        head = p.shift(fragments, default)
+        assert [head] + fragments == fragments2
+
+
+@given(text(), booleans(), text(min_size=1))
+def test_destination(filepath, preserve_paths, outdir):
+    dest = p.destination(filepath, preserve_paths=preserve_paths, outdir=outdir)
+    assert dest.startswith(outdir)
+    assert dest.endswith(".html")
+
+
+@given(integers(min_value=0, max_value=12), text())
+def test_parse(n, source):
+    languages = p.languages
+    l = languages[languages.keys()[n]]
+    parsed = p.parse(source, l)
+    assert [{"code_text", "docs_text"} == set(s.keys()) for s in parsed]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b2e7043
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+pystache==0.5.4
+markdown==2.6.3
-- 
cgit v1.2.1


From 29e059bccc2206f927d1a3ca348ed04ae2b1a17e Mon Sep 17 00:00:00 2001
From: zax <zach.smith@makespace.com>
Date: Sun, 1 Nov 2015 12:46:54 -0500
Subject: Basic CI with Travis and Coveralls.

---
 .gitignore                |   1 +
 .travis.yml               |  10 +++++
 README                    |  25 -----------
 README.md                 |  30 +++++++++++++
 pycco/main.py             |  36 +++++++++++-----
 pycco/tests/__init__.py   |   0
 pycco/tests/test_pycco.py |  29 -------------
 requirements.test.txt     |   3 ++
 requirements.txt          |   1 +
 tests/__init__.py         |   0
 tests/test_pycco.py       | 107 ++++++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 177 insertions(+), 65 deletions(-)
 create mode 100644 .travis.yml
 delete mode 100644 README
 create mode 100644 README.md
 delete mode 100644 pycco/tests/__init__.py
 delete mode 100644 pycco/tests/test_pycco.py
 create mode 100644 requirements.test.txt
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_pycco.py

diff --git a/.gitignore b/.gitignore
index 5306a68..e1c9655 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.coverage
 *.pyc
 /Pycco.egg-info
 build/*
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..bfbe563
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+language: python
+python:
+    - '2.7'
+install:
+    - 'pip install -r requirements.txt'
+    - 'pip install -r requirements.test.txt'
+script:
+    - 'py.test --cov=pycco tests/'
+after_success:
+    - coveralls
diff --git a/README b/README
deleted file mode 100644
index 5343728..0000000
--- a/README
+++ /dev/null
@@ -1,25 +0,0 @@
-888888b.
-888   Y88b
-888    888
-888   d88P  888  888   .d8888b  .d8888b  .d88b.
-8888888P"   888  888  d88P"    d88P"    d88""88b
-888         888  888  888      888      888  888
-888         Y88b 888  Y88b.    Y88b.    Y88..88P
-888          "Y88888   "Y8888P  "Y8888P  "Y88P"
-                 888
-            Y8b d88P
-             "Y88P"
-
-Pycco is a Python port of Docco: the original quick-and-dirty, hundred-line-
-long, literate-programming-style documentation generator. For more information,
-see:
-
-http://fitzgen.github.com/pycco/
-
-Others:
-
-CoffeeScript (Original) - http://jashkenas.github.com/docco/
-
-Ruby - http://rtomayko.github.com/rocco/
-
-Sh - http://rtomayko.github.com/shocco/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9cad492
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+```
+888888b.
+888   Y88b
+888    888
+888   d88P  888  888   .d8888b  .d8888b  .d88b.
+8888888P"   888  888  d88P"    d88P"    d88""88b
+888         888  888  888      888      888  888
+888         Y88b 888  Y88b.    Y88b.    Y88..88P
+888          "Y88888   "Y8888P  "Y8888P  "Y88P"
+                 888
+            Y8b d88P
+             "Y88P"
+```
+
+Pycco is a Python port of Docco: the original quick-and-dirty, hundred-line-
+long, literate-programming-style documentation generator. For more information,
+see:
+
+http://fitzgen.github.com/pycco/
+
+Others:
+
+CoffeeScript (Original) - http://jashkenas.github.com/docco/
+
+Ruby - http://rtomayko.github.com/rocco/
+
+Sh - http://rtomayko.github.com/shocco/
+
+[![Build Status](https://travis-ci.org/subsetpark/pycco.svg?branch=hypothesis)](https://travis-ci.org/subsetpark/pycco)
+[![Coverage Status](https://coveralls.io/repos/subsetpark/pycco/badge.svg?branch=hypothesis&service=github)](https://coveralls.io/github/subsetpark/pycco?branch=hypothesis)
diff --git a/pycco/main.py b/pycco/main.py
index 6b8abac..20e5a6b 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -116,7 +116,7 @@ def parse(code, language):
 
         elif multi_line:
             # Remove leading spaces
-            if re.match(r' {:d}'.format(len(indent_level), line)):
+            if re.match(r' {:d}'.format(len(indent_level)), line):
                 docs_text += line[len(indent_level):] + '\n'
             else:
                 docs_text += line + '\n'
@@ -300,7 +300,7 @@ languages = {
             "multistart": "=begin", "multiend": "=end"},
 
     ".py": {"name": "python", "symbol": "#",
-            "multistart": '"""', "multiend": '"""' },
+            "multistart": '"""', "multiend": '"""'},
 
     ".scm": {"name": "scheme", "symbol": ";;",
              "multistart": "#|", "multiend": "|#"},
@@ -343,15 +343,21 @@ def get_language(source, code, language=None):
         else:
             raise ValueError("Unknown forced language: " + language)
 
-    m = re.match(r'.*(\..+)', os.path.basename(source))
+    m = re.match(r'.*(\..+)', os.path.basename(source)) if source else None
     if m and m.group(1) in languages:
         return languages[m.group(1)]
     else:
-        lang = lexers.guess_lexer(code).name.lower()
-        for l in languages.values():
-            if l["name"] == lang:
-                return l
-        else:
+        try:
+            lang = lexers.guess_lexer(code).name.lower()
+            for l in languages.values():
+                if l["name"] == lang:
+                    return l
+            else:
+                raise ValueError()
+        except ValueError:
+                # If pygments can't find any lexers, it will raise its own
+                # subclass of ValueError. We will catch it and raise ours
+                # for consistency.
             raise ValueError("Can't figure out the language!")
 
 
@@ -392,11 +398,19 @@ def shift(list, default):
 
 
 def ensure_directory(directory):
-    """Ensure that the destination directory exists."""
-
+    """
+    Sanitize directory string and ensure that the destination directory exists.
+    """
+    # Sanitization regexp copied from
+    # http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
+    control_chars = ''.join(map(unichr, range(0, 32) + range(127, 160)))
+    control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
+    directory = control_char_re.sub('', directory)
     if not os.path.isdir(directory):
         os.makedirs(directory)
 
+    return directory
+
 
 def template(source):
     return lambda context: pystache.render(source, context)
@@ -426,7 +440,7 @@ def process(sources, preserve_paths=True, outdir=None, language=None):
 
     # Proceed to generating the documentation.
     if sources:
-        ensure_directory(outdir)
+        outdir = ensure_directory(outdir)
         css = open(path.join(outdir, "pycco.css"), "w")
         css.write(pycco_styles)
         css.close()
diff --git a/pycco/tests/__init__.py b/pycco/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/pycco/tests/test_pycco.py b/pycco/tests/test_pycco.py
deleted file mode 100644
index 43abe36..0000000
--- a/pycco/tests/test_pycco.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from hypothesis import given
-from hypothesis.strategies import lists, text, booleans, integers
-import pycco.main as p
-import copy
-
-
-@given(lists(text()), text())
-def test_shift(fragments, default):
-    if fragments == []:
-        assert p.shift(fragments, default) == default
-    else:
-        fragments2 = copy.copy(fragments)
-        head = p.shift(fragments, default)
-        assert [head] + fragments == fragments2
-
-
-@given(text(), booleans(), text(min_size=1))
-def test_destination(filepath, preserve_paths, outdir):
-    dest = p.destination(filepath, preserve_paths=preserve_paths, outdir=outdir)
-    assert dest.startswith(outdir)
-    assert dest.endswith(".html")
-
-
-@given(integers(min_value=0, max_value=12), text())
-def test_parse(n, source):
-    languages = p.languages
-    l = languages[languages.keys()[n]]
-    parsed = p.parse(source, l)
-    assert [{"code_text", "docs_text"} == set(s.keys()) for s in parsed]
diff --git a/requirements.test.txt b/requirements.test.txt
new file mode 100644
index 0000000..8439fc2
--- /dev/null
+++ b/requirements.test.txt
@@ -0,0 +1,3 @@
+hypothesis==1.14.0
+pytest-cov==2.2.0
+coveralls==1.1
diff --git a/requirements.txt b/requirements.txt
index b2e7043..38964da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 pystache==0.5.4
+Pygments==2.0.2
 markdown==2.6.3
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_pycco.py b/tests/test_pycco.py
new file mode 100644
index 0000000..04cb57e
--- /dev/null
+++ b/tests/test_pycco.py
@@ -0,0 +1,107 @@
+import copy
+import tempfile
+import pytest
+import os
+import re
+from hypothesis import given, example, assume
+from hypothesis.strategies import lists, text, booleans, choices, none
+
+import pycco.main as p
+
+PYTHON = p.languages['.py']
+PYCCO_SOURCE = 'pycco/main.py'
+FOO_FUNCTION = """def foo():\n    return True"""
+
+
+@given(lists(text()), text())
+def test_shift(fragments, default):
+    if fragments == []:
+        assert p.shift(fragments, default) == default
+    else:
+        fragments2 = copy.copy(fragments)
+        head = p.shift(fragments, default)
+        assert [head] + fragments == fragments2
+
+
+@given(text(), booleans(), text(min_size=1))
+@example("/foo", True, "0")
+def test_destination(filepath, preserve_paths, outdir):
+    dest = p.destination(filepath, preserve_paths=preserve_paths, outdir=outdir)
+    assert dest.startswith(outdir)
+    assert dest.endswith(".html")
+
+
+@given(choices(), text())
+def test_parse(choice, source):
+    l = choice(p.languages.values())
+    parsed = p.parse(source, l)
+    assert [{"code_text", "docs_text"} == set(s.keys()) for s in parsed]
+
+
+def test_skip_coding_directive():
+    source = "# -*- coding: utf-8 -*-\n" + FOO_FUNCTION
+    parsed = p.parse(source, PYTHON)
+    for section in parsed:
+        assert "coding" not in section['code_text']
+
+
+def test_multi_line_leading_spaces():
+    source = "# This is a\n# comment that\n# is indented\n"
+    source += FOO_FUNCTION
+    parsed = p.parse(source, PYTHON)
+    # The resulting comment has leading spaces stripped out.
+    assert parsed[0]["docs_text"] == "This is a\ncomment that\nis indented\n"
+
+
+@given(text(), text())
+def test_get_language_specify_language(source, code):
+    assert p.get_language(source, code, language="python") == p.languages['.py']
+
+    with pytest.raises(ValueError):
+        p.get_language(source, code, language="non-existent")
+
+
+@given(text() | none())
+def test_get_language_bad_source(source):
+    code = "#!/usr/bin/python\n"
+    code += FOO_FUNCTION
+    assert p.get_language(source, code) == PYTHON
+    with pytest.raises(ValueError) as e:
+        assert p.get_language(source, "badlang")
+
+    assert e.value.message == "Can't figure out the language!"
+
+
+@given(text() | none())
+def test_get_language_bad_code(code):
+    source = "test.py"
+    assert p.get_language(source, code) == PYTHON
+
+
+@given(text(max_size=64))
+def test_ensure_directory(dir_name):
+    tempdir = os.path.join(tempfile.gettempdir(), dir_name)
+
+    # Copy and paste sanitization from function, but only for housekeeping. We
+    # pass in the unsanitized string to the function.
+    control_chars = ''.join(map(unichr, range(0, 32) + range(127, 160)))
+    control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
+    safe_name = control_char_re.sub('', tempdir)
+
+    if not os.path.isdir(safe_name):
+        assume(os.access(safe_name, os.W_OK))
+        p.ensure_directory(tempdir)
+        assert os.path.isdir(safe_name)
+
+# The following functions get good test coverage, but effort should be put into
+# decomposing the functions they test and actually testing their output.
+
+
+def test_generate_documentation():
+    p.generate_documentation(PYCCO_SOURCE, outdir=tempfile.gettempdir())
+
+
+@given(booleans(), choices())
+def test_process(preserve_paths, choice):
+    lang_name = choice([l["name"] for l in p.languages.values()])
+    p.process([PYCCO_SOURCE], preserve_paths=preserve_paths, outdir=tempfile.gettempdir(), language=lang_name)
-- 
cgit v1.2.1


From 32010829e4c1f2ca43eef7e5ba197daa8cfaad1e Mon Sep 17 00:00:00 2001
From: zax <zach.smith@makespace.com>
Date: Sun, 1 Nov 2015 16:29:49 -0500
Subject: Factor out file handling in generate_documentation

---
 pycco/main.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pycco/main.py b/pycco/main.py
index 20e5a6b..df2b2bc 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -45,10 +45,17 @@ def generate_documentation(source, outdir=None, preserve_paths=True,
     if not outdir:
         raise TypeError("Missing the required 'outdir' keyword argument.")
     code = open(source, "r").read()
-    language = get_language(source, code, language=language)
+    return _generate_documentation(source, code, outdir, preserve_paths, language)
+
+
+def _generate_documentation(file_path, code, outdir, preserve_paths, language):
+    """
+    Helper function to allow documentation generation without file handling.
+    """
+    language = get_language(file_path, code, language=language)
     sections = parse(code, language)
     highlight(sections, language, preserve_paths=preserve_paths, outdir=outdir)
-    return generate_html(source, sections, preserve_paths=preserve_paths, outdir=outdir)
+    return generate_html(file_path, sections, preserve_paths=preserve_paths, outdir=outdir)
 
 
 def parse(code, language):
@@ -56,7 +63,6 @@ def parse(code, language):
     Given a string of source code, parse out each comment and the code that
     follows it, and create an individual **section** for it.
     Sections take the form:
-
         { "docs_text": ...,
           "docs_html": ...,
           "code_text": ...,
@@ -78,7 +84,7 @@ def parse(code, language):
                 lines.pop(linenum)
                 break
 
-    def save(docs, code, sections):
+    def save(docs, code):
         if docs or code:
             sections.append({
                 "docs_text": docs,
@@ -87,14 +93,14 @@ def parse(code, language):
 
     # Setup the variables to get ready to check for multiline comments
     multi_line = False
-    multistart, multiend = [language.get("multistart"), language.get("multiend")]
+    multistart, multiend = language.get("multistart"), language.get("multiend")
     comment_matcher = language['comment_matcher']
 
     for line in lines:
         # Only go into multiline comments section when one of the delimiters is
         # found to be at the start of a line
         if multistart and multiend and \
-           any(line.lstrip().startswith(delim) or line.rstrip().endswith(delim)
+            any(line.lstrip().startswith(delim) or line.rstrip().endswith(delim)
                 for delim in (multistart, multiend)):
             multi_line = not multi_line
 
@@ -110,20 +116,20 @@ def parse(code, language):
             indent_level = re.match("\s*", line).group(0)
 
             if has_code and docs_text.strip():
-                save(docs_text, code_text[:-1], sections)
+                save(docs_text, code_text[:-1])
                 code_text = code_text.split('\n')[-1]
                 has_code = docs_text = ''
 
         elif multi_line:
             # Remove leading spaces
-            if re.match(r' {:d}'.format(len(indent_level)), line):
+            if re.match(r' {{{:d}}}'.format(len(indent_level)), line):
                 docs_text += line[len(indent_level):] + '\n'
             else:
                 docs_text += line + '\n'
 
         elif re.match(comment_matcher, line):
             if has_code:
-                save(docs_text, code_text, sections)
+                save(docs_text, code_text)
                 has_code = docs_text = code_text = ''
             docs_text += re.sub(comment_matcher, "", line) + "\n"
 
@@ -131,13 +137,13 @@ def parse(code, language):
             if code_text and any(line.lstrip().startswith(x)
                                  for x in ['class ', 'def ', '@']):
                 if not code_text.lstrip().startswith("@"):
-                    save(docs_text, code_text, sections)
+                    save(docs_text, code_text)
                     code_text = has_code = docs_text = ''
 
             has_code = True
             code_text += line + '\n'
 
-    save(docs_text, code_text, sections)
+    save(docs_text, code_text)
 
     return sections
 
-- 
cgit v1.2.1


From 4f82bd2d8fb796c746680c1d1e8d5f99a1cd18fd Mon Sep 17 00:00:00 2001
From: zax <zach.smith@makespace.com>
Date: Sun, 1 Nov 2015 16:31:47 -0500
Subject: Basic Python 3 support

---
 .travis.yml         |  2 ++
 pycco/compat.py     |  4 ++++
 pycco/main.py       | 44 +++++++++++++++++++++++++++-----------------
 tests/test_pycco.py | 29 ++++++++++++++++++-----------
 4 files changed, 51 insertions(+), 28 deletions(-)
 create mode 100644 pycco/compat.py

diff --git a/.travis.yml b/.travis.yml
index bfbe563..62e7c6b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,12 @@
 language: python
 python:
     - '2.7'
+    - '3.5'
 install:
     - 'pip install -r requirements.txt'
     - 'pip install -r requirements.test.txt'
 script:
     - 'py.test --cov=pycco tests/'
+    - 'python -m pycco.main pycco/main.py'
 after_success:
     - coveralls
diff --git a/pycco/compat.py b/pycco/compat.py
new file mode 100644
index 0000000..6660531
--- /dev/null
+++ b/pycco/compat.py
@@ -0,0 +1,4 @@
+try:
+    pycco_unichr = unichr
+except NameError:
+    pycco_unichr = chr
diff --git a/pycco/main.py b/pycco/main.py
index df2b2bc..cde05d7 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 
 """
 "**Pycco**" is a Python port of [Docco](http://jashkenas.github.com/docco/):
@@ -35,7 +36,7 @@ Or, to install the latest source
 
 
 def generate_documentation(source, outdir=None, preserve_paths=True,
-                           language=None):
+                           language=None, encoding="utf8"):
     """
     Generate the documentation for a source file by reading it in, splitting it
     up into comment/code sections, highlighting them for the appropriate
@@ -44,7 +45,7 @@ def generate_documentation(source, outdir=None, preserve_paths=True,
 
     if not outdir:
         raise TypeError("Missing the required 'outdir' keyword argument.")
-    code = open(source, "r").read()
+    code = open(source, "rb").read().decode(encoding)
     return _generate_documentation(source, code, outdir, preserve_paths, language)
 
 
@@ -226,6 +227,8 @@ def highlight(sections, language, preserve_paths=True, outdir=None):
             docs_text = unicode(section["docs_text"])
         except UnicodeError:
             docs_text = unicode(section["docs_text"].decode('utf-8'))
+        except NameError:
+            docs_text = section['docs_text']
         section["docs_html"] = markdown(preprocess(docs_text,
                                                    preserve_paths=preserve_paths,
                                                    outdir=outdir))
@@ -361,9 +364,9 @@ def get_language(source, code, language=None):
             else:
                 raise ValueError()
         except ValueError:
-                # If pygments can't find any lexers, it will raise its own
-                # subclass of ValueError. We will catch it and raise ours
-                # for consistency.
+            # If pygments can't find any lexers, it will raise its own
+            # subclass of ValueError. We will catch it and raise ours
+            # for consistency.
             raise ValueError("Can't figure out the language!")
 
 
@@ -403,15 +406,20 @@ def shift(list, default):
         return default
 
 
+def remove_control_chars(s):
+    # Sanitization regexp copied from
+    # http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
+    from pycco.compat import pycco_unichr
+    control_chars = ''.join(map(pycco_unichr, list(range(0, 32)) + list(range(127, 160))))
+    control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
+    return control_char_re.sub('', s)
+
+
 def ensure_directory(directory):
     """
     Sanitize directory string and ensure that the destination directory exists.
     """
-    # Sanitization regexp copied from
-    # http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
-    control_chars = ''.join(map(unichr, range(0, 32) + range(127, 160)))
-    control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
-    directory = control_char_re.sub('', directory)
+    directory = remove_control_chars(directory)
     if not os.path.isdir(directory):
         os.makedirs(directory)
 
@@ -434,7 +442,7 @@ highlight_start = "<div class=\"highlight\"><pre>"
 highlight_end = "</pre></div>"
 
 
-def process(sources, preserve_paths=True, outdir=None, language=None):
+def process(sources, preserve_paths=True, outdir=None, language=None, encoding="utf8"):
     """For each source file passed as argument, generate the documentation."""
 
     if not outdir:
@@ -447,8 +455,8 @@ def process(sources, preserve_paths=True, outdir=None, language=None):
     # Proceed to generating the documentation.
     if sources:
         outdir = ensure_directory(outdir)
-        css = open(path.join(outdir, "pycco.css"), "w")
-        css.write(pycco_styles)
+        css = open(path.join(outdir, "pycco.css"), "wb")
+        css.write(pycco_styles.encode(encoding))
         css.close()
 
         def next_file():
@@ -460,11 +468,13 @@ def process(sources, preserve_paths=True, outdir=None, language=None):
             except OSError:
                 pass
 
-            with open(dest, "w") as f:
-                f.write(generate_documentation(s, preserve_paths=preserve_paths, outdir=outdir,
-                                               language=language))
+            with open(dest, "wb") as f:
+                f.write(generate_documentation(s, preserve_paths=preserve_paths,
+                                               outdir=outdir,
+                                               language=language,
+                                               encoding=encoding))
 
-            print "pycco = {} -> {}".format(s, dest)
+            print("pycco = {} -> {}".format(s, dest))
 
             if sources:
                 next_file()
diff --git a/tests/test_pycco.py b/tests/test_pycco.py
index 04cb57e..22503c2 100644
--- a/tests/test_pycco.py
+++ b/tests/test_pycco.py
@@ -1,18 +1,24 @@
 import copy
+import os
 import tempfile
+import time
+
 import pytest
-import os
-import re
 from hypothesis import given, example, assume
 from hypothesis.strategies import lists, text, booleans, choices, none
 
 import pycco.main as p
 
+
 PYTHON = p.languages['.py']
 PYCCO_SOURCE = 'pycco/main.py'
 FOO_FUNCTION = """def foo():\n    return True"""
 
 
+def get_language(choice):
+    return choice(list(p.languages.values()))
+
+
 @given(lists(text()), text())
 def test_shift(fragments, default):
     if fragments == []:
@@ -33,7 +39,7 @@ def test_destination(filepath, preserve_paths, outdir):
 
 @given(choices(), text())
 def test_parse(choice, source):
-    l = choice(p.languages.values())
+    l = get_language(choice)
     parsed = p.parse(source, l)
     assert [{"code_text", "docs_text"} == set(s.keys()) for s in parsed]
 
@@ -69,7 +75,11 @@ def test_get_language_bad_source(source):
     with pytest.raises(ValueError) as e:
         assert p.get_language(source, "badlang")
 
-    assert e.value.message == "Can't figure out the language!"
+    msg = "Can't figure out the language!"
+    try:
+        assert e.value.message == msg
+    except AttributeError:
+        assert e.value.args[0] == msg
 
 
 @given(text() | none())
@@ -80,16 +90,13 @@ def test_get_language_bad_code(code):
 
 @given(text(max_size=64))
 def test_ensure_directory(dir_name):
-    tempdir = os.path.join(tempfile.gettempdir(), dir_name)
+    tempdir = os.path.join(tempfile.gettempdir(), str(int(time.time())), dir_name)
 
-    # Copy and paste sanitization from function, but only for housekeeping. We
+    # Use sanitization from function, but only for housekeeping. We
     # pass in the unsanitized string to the function.
-    control_chars = ''.join(map(unichr, range(0, 32) + range(127, 160)))
-    control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
-    safe_name = control_char_re.sub('', tempdir)
+    safe_name = p.remove_control_chars(dir_name)
 
-    if not os.path.isdir(safe_name):
-        assume(os.access(safe_name, os.W_OK))
+    if not os.path.isdir(safe_name) and os.access(safe_name, os.W_OK):
         p.ensure_directory(tempdir)
         assert os.path.isdir(safe_name)
 
-- 
cgit v1.2.1


From ff586cd08ad4e226ea5d65eda8683afb5a5c9373 Mon Sep 17 00:00:00 2001
From: Zach Smith <zach.smith@makespace.com>
Date: Sat, 7 Nov 2015 19:39:42 -0500
Subject: Unwrap comprehension in test

---
 tests/test_pycco.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_pycco.py b/tests/test_pycco.py
index 22503c2..5a38dd5 100644
--- a/tests/test_pycco.py
+++ b/tests/test_pycco.py
@@ -41,7 +41,8 @@ def test_destination(filepath, preserve_paths, outdir):
 def test_parse(choice, source):
     l = get_language(choice)
     parsed = p.parse(source, l)
-    assert [{"code_text", "docs_text"} == set(s.keys()) for s in parsed]
+    for s in parsed:
+        assert {"code_text", "docs_text"} == set(s.keys())
 
 
 def test_skip_coding_directive():
-- 
cgit v1.2.1