87 files changed, 5905 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..4aec012
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,5 @@
+python-sqlparse is written and maintained by Andi Albrecht <albrecht.andi@gmail.com>.
+
+This module contains code (namely the lexer and filter mechanism) from
+the pygments project that was written by Georg Brandl.
+
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..4dfa2e5
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,3 @@
+Release 0.1.0 (In Development)
+------------------------------
+ * Initial.
+\ No newline at end of file
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..0334d9f
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,25 @@
+Copyright (c) 2008, Andi Albrecht <albrecht.andi@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the authors nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..60ab652
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include COPYING
+include test.py
+recursive-include docs *.rst
+\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..868043f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+# Makefile to simplify some common development tasks.
+# Run 'make help' for a list of commands.
+
+PYTHON=`which python`
+
+default: help
+
+help:
+	@echo "Available commands:"
+	@sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort
+
+test:
+	$(PYTHON) tests/run_tests.py
+
+clean:
+	$(PYTHON) setup.py clean
+	find . -name '*.pyc' -delete
+	find . -name '*~' -delete
+\ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..10c8c1a
--- /dev/null
+++ b/README
@@ -0,0 +1,45 @@
+python-sqlparse - Parse SQL statements
+======================================
+
+sqlparse currently support only one feature: You can split a series
+of SQL statements into single statements.
+
+Parts of the code are based on pygments written by Georg Brandl and others.
+pygments-Homepage: http://pygments.org/
+
+Run Tests
+---------
+
+  python test/run_tests.py
+
+
+Create Documentation
+--------------------
+
+You need `Sphinx`_ installed on your system to build the documentation.
+
+  cd docs/
+  make html
+
+
+Install
+-------
+
+Run
+
+  python setup.py install
+
+with root privileges to install python-sqlparse on your system.
+
+
+Links
+-----
+
+Source code: http://github.com/andialbrecht/python-sqlparse
+
+
+python-sqlparse is licensed under the BSD license:
+http://github.com/andialbrecht/python-sqlparse
+
+
+.. _Sphinx: http://sphinx.pocoo.org/
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..77f09f5
--- /dev/null
+++ b/TODO
@@ -0,0 +1 @@
+- Add GQL dialect.
+\ No newline at end of file
diff --git a/bin/sqlformat b/bin/sqlformat
new file mode 100755
index 0000000..8d219a5
--- /dev/null
+++ b/bin/sqlformat
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+import optparse
+import os
+import sys
+
+import sqlparse
+
+
+_CASE_CHOICES = ['upper', 'lower', 'capitalize']
+
+
+parser = optparse.OptionParser(usage='%prog [OPTIONS] FILE, ...',
+                               version='%%prog %s' % sqlparse.__version__)
+parser.set_description(('Format FILE according to OPTIONS. Use "-" as FILE '
+                        'to read from stdin.'))
+parser.add_option('-v', '--verbose', dest='verbose', action='store_true')
+parser.add_option('-o', '--outfile', dest='outfile', metavar='FILE',
+                  help='write output to FILE (defaults to stdout)')
+group = parser.add_option_group('Formatting Options')
+group.add_option('-k', '--keywords', metavar='CHOICE',
+                 dest='keyword_case', choices=_CASE_CHOICES,
+                 help=('change case of keywords, CHOICE is one of %s'
+                       % ', '.join('"%s"' % x for x in _CASE_CHOICES)))
+group.add_option('-i', '--identifiers', metavar='CHOICE',
+                 dest='identifier_case', choices=_CASE_CHOICES,
+                 help=('change case of identifiers, CHOICE is one of %s'
+                       % ', '.join('"%s"' % x for x in _CASE_CHOICES)))
+group.add_option('-l', '--language', metavar='LANG',
+                 dest='output_format', choices=['python', 'php'],
+                 help=('output a snippet in programming language LANG, '
+                       'choices are "python", "php"'))
+group.add_option('--strip-comments', dest='strip_comments',
+                 action='store_true', default=False,
+                 help='remove comments')
+group.add_option('-r', '--reindent', dest='reindent',
+                 action='store_true', default=False,
+                 help='reindent statements')
+group.add_option('--indent_width', dest='indent_width', default=2,
+                 help='indentation width (defaults to 2 spaces)')
+
+_FORMATTING_GROUP = group
+
+
+def _error(msg, exit_=None):
+    """Print msg and optionally exit with return code exit_."""
+    print >>sys.stderr, '[ERROR] %s' % msg
+    if exit_ is not None:
+        sys.exit(exit_)
+
+
+def _build_formatter_opts(options):
+    """Convert command line options to dictionary."""
+    d = {}
+    for option in _FORMATTING_GROUP.option_list:
+        d[option.dest] = getattr(options, option.dest)
+    return d
+
+
+def main():
+    options, args = parser.parse_args()
+    if options.verbose:
+        print >>sys.stderr, 'Verbose mode'
+
+    if len(args) != 1:
+        _error('No input data.')
+        parser.print_usage()
+        sys.exit(1)
+
+    if '-' in args:  # read from stdin
+        data = sys.stdin.read()
+    else:
+        try:
+            data = '\n'.join(open(args[0]).readlines())
+        except OSError, err:
+            _error('Failed to read %s: %s' % (args[0], err), exit_=1)
+
+    if options.outfile:
+        try:
+            stream = open(options.outfile, 'w')
+        except OSError, err:
+            _error('Failed to open %s: %s' % (options.outfile, err), exit_=1)
+    else:
+        stream = sys.stdout
+
+    formatter_opts = _build_formatter_opts(options)
+    try:
+        formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
+    except sqlparse.SQLParseError, err:
+        _error('Invalid options: %s' % err, exit_=1)
+
+    stream.write(sqlparse.format(data, **formatter_opts).encode('utf-8',
+                                                                'replace'))
+    stream.flush()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..cb23107
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,88 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf build/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
+	@echo
+	@echo "Build finished. The HTML pages are in build/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) build/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in build/dirhtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in build/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) build/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in build/qthelp, like this:"
+	@echo "# qcollectiongenerator build/qthelp/python-sqlparse.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile build/qthelp/python-sqlparse.qhc"
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in build/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
+	@echo
+	@echo "The overview file is in build/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in build/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) build/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in build/doctest/output.txt."
diff --git a/docs/source/api.rst b/docs/source/api.rst
new file mode 100644
index 0000000..3bce389
--- /dev/null
+++ b/docs/source/api.rst
@@ -0,0 +1,12 @@
+:mod:`sqlparse` -- Parse SQL statements
+=======================================
+
+The :mod:`sqlparse` module provides the following functions on module-level.
+
+.. autofunction:: sqlparse.split
+
+.. autofunction:: sqlparse.format
+
+.. autofunction:: sqlparse.parse
+
+
diff --git a/docs/source/changes.rst b/docs/source/changes.rst
new file mode 100644
index 0000000..5811c57
--- /dev/null
+++ b/docs/source/changes.rst
@@ -0,0 +1,7 @@
+.. _changes:
+
+Changes in python-sqlparse
+==========================
+
+.. include:: ../../CHANGES
+
diff --git a/docs/source/cmdline.rst b/docs/source/cmdline.rst
new file mode 100644
index 0000000..93a35a7
--- /dev/null
+++ b/docs/source/cmdline.rst
@@ -0,0 +1,8 @@
+``sqlformat`` -- Command Line Script
+===========================================
+
+The :mod:`sqlparse` module is shipped with the script
+:program:`sqlformat` which provides a command line interface to the formatting
+functions.
+
+.. todo:: Describe options and example usage.
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..b84245a
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+#
+# python-sqlparse documentation build configuration file, created by
+# sphinx-quickstart on Thu Feb 26 08:19:28 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.append(os.path.abspath('.'))
+sys.path.append(os.path.abspath('../../'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage',
+              'sphinx.ext.autosummary']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'python-sqlparse'
+copyright = u'2009, Andi Albrecht'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.1.0'
+# The full version, including alpha/beta/rc tags.
+release = '0.1.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+html_theme = 'sphinxdoc'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = [os.path.abspath('../')]
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'python-sqlparsedoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'python-sqlparse.tex', ur'python-sqlparse Documentation',
+   ur'Andi Albrecht', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
+todo_include_todos = True
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..40e99e0
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,26 @@
+.. python-sqlparse documentation master file, created by
+   sphinx-quickstart on Thu Feb 26 08:19:28 2009.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to python-sqlparse's documentation!
+===========================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
+   api
+   cmdline
+   changes
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
new file mode 100644
index 0000000..3d7a888
--- /dev/null
+++ b/docs/source/intro.rst
@@ -0,0 +1,89 @@
+Introduction
+============
+
+:mod:`sqlparse` is a non-validating SQL parser for Python.
+
+It provides support for parsing, splitting and formatting SQL statements.
+
+:mod:`sqlparse` is released under the terms of the
+`New BSD license <http://www.opensource.org/licenses/bsd-license.php>`_.
+
+Visit http://sqlformat.appspot.com to try it's formatting features.
+
+
+Download & Installation
+-----------------------
+
+To download and install :mod:`sqlparse` on your system run the following
+commands:
+
+.. code-block:: bash
+
+  $ git clone git://github.com/andialbrecht/python-sqlparse.git
+  $ cd python-sqlparse.git/
+  $ sudo python setup.py install
+
+A tarball of the current sources is available under the following URL:
+http://github.com/andialbrecht/python-sqlparse/tarball/master
+
+
+Example Usage
+-------------
+
+Here are some usage examples of this module.
+
+Splitting statements::
+
+  >>> import sqlparse
+  >>> sql = 'select * from foo; select * from bar;'
+  >>> sqlparse.split(sql)
+  <<< [u'select * from foo; ', u'select * from bar;']
+
+Formatting statemtents::
+
+  >>> sql = 'select * from foo where id in (select id from bar);'
+  >>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
+  SELECT *
+  FROM foo
+  WHERE id IN
+    (SELECT id
+     FROM bar);
+
+Now, let's have a deeper look at the internals::
+
+  >>> sql = 'select * from "someschema"."mytable" where id = 1'
+  >>> pared = sqlparse.parse(sql)
+  >>> pared
+  <<< (<Statement 'select...' at 0x9ad08ec>,)
+  >>> stmt = parsed[0]
+  >>> stmt.to_unicode()  # converting it back to unicode
+  <<< u'select * from "someschema"."mytable" where id = 1'
+  >>> # This is how the internal representation looks like:
+  >>> stmt.tokens
+  <<<
+  (<DML 'select' at 0x9b63c34>,
+   <Whitespace ' ' at 0x9b63e8c>,
+   <Operator '*' at 0x9b63e64>,
+   <Whitespace ' ' at 0x9b63c5c>,
+   <Keyword 'from' at 0x9b63c84>,
+   <Whitespace ' ' at 0x9b63cd4>,
+   <Identifier '"somes...' at 0x9b5c62c>,
+   <Whitespace ' ' at 0x9b63f04>,
+   <Where 'where ...' at 0x9b5caac>)
+  >>>
+
+
+.. todo:: Describe general concepts
+   Why non-validating? Processing stages (tokens, groups,...), filter,
+
+
+Development & Contributing
+--------------------------
+
+Please file bug reports and feature requests on the project site at
+http://code.google.com/p/python-sqlparse/issues/entry or if you have
+code to contribute upload it to http://codereview.appspot.com and
+add albrecht.andi@googlemail.com as reviewer.
+
+For more information about the review tool and how to use it visit
+it's project page: http://code.google.com/p/rietveld.
diff --git a/docs/tango/static/bgfooter.png b/docs/tango/static/bgfooter.png
new file mode 100644
index 0000000..9ce5bdd
--- /dev/null
+++ b/docs/tango/static/bgfooter.png
diff --git a/docs/tango/static/bgtop.png b/docs/tango/static/bgtop.png
new file mode 100644
index 0000000..a0d4709
--- /dev/null
+++ b/docs/tango/static/bgtop.png
diff --git a/docs/tango/static/default.css_t b/docs/tango/static/default.css_t
new file mode 100644
index 0000000..c86759a
--- /dev/null
+++ b/docs/tango/static/default.css_t
@@ -0,0 +1,90 @@
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+@import url("basic.css");
+
+body {
+  color: #000000;
+  font-family: "Free Sans", Arial, Verdana, sans;
+  font-size: 11pt;
+  width: 70em;
+  margin: 0;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+
+div.related {
+  background: #eeeeec; /* #d3d7cf; */
+  color: white;
+  padding-bottom: .6em;
+  padding-top: .6em;
+}
+
+div.related a {
+  color: #2e3436;
+  text-decoration: none;
+}
+
+div.document {
+  background: #eeeeec;
+}
+
+div.body {
+  background: white;
+  padding: .5em;
+  -moz-border-radius: 0px 10px 10px 10px;
+  border-right: 1px solid #eeeeec;
+}
+
+div.document h1, h2, h3, h4 {
+  color: #204a87;
+  font-weight: normal;
+  letter-spacing: .05em;
+}
+
+div.document p, dl, div.highlight {
+  margin-left: 25px;
+}
+
+div.document dd p, dd {
+  margin-left: .5em;
+}
+
+div.document a {
+  color: #f57900;
+}
+
+div.sphinxsidebarwrapper {
+  background: url(bgfooter.png) top left repeat-x #eeeeec;
+  padding-right: .5em;
+}
+
+div.sphinxsidebar ul {
+  margin: 0;
+  padding: 0;
+}
+
+div.sphinxsidebar a {
+  color: #888a85;
+  text-decoration: none;
+}
+
+div.footer {
+  font-size: .8em;
+  color: #888a85;
+  text-align: right;
+  padding: 10px;
+}
+
+div.footer a {
+  color: #888a85;
+}
+
+div.highlight pre {
+  background-color: #eeeeec;
+  border: 1px solid #babdb6;
+  padding: 7px;
+}
+\ No newline at end of file
diff --git a/docs/tango/theme.conf b/docs/tango/theme.conf
new file mode 100644
index 0000000..812330f
--- /dev/null
+++ b/docs/tango/theme.conf
@@ -0,0 +1,28 @@
+[theme]
+inherit = basic
+stylesheet = default.css
+pygments_style = sphinx
+
+[options]
+rightsidebar = false
+stickysidebar = false
+
+footerbgcolor    = #11303d
+footertextcolor  = #ffffff
+sidebarbgcolor   = #1c4e63
+sidebartextcolor = #ffffff
+sidebarlinkcolor = #98dbcc
+relbarbgcolor    = #133f52
+relbartextcolor  = #ffffff
+relbarlinkcolor  = #ffffff
+bgcolor          = #ffffff
+textcolor        = #000000
+headbgcolor      = #f2f2f2
+headtextcolor    = #20435c
+headlinkcolor    = #c60f0f
+linkcolor        = #355f7c
+codebgcolor      = #eeffcc
+codetextcolor    = #333333
+
+bodyfont = sans-serif
+headfont = 'Trebuchet MS', sans-serif
diff --git a/extras/appengine/Makefile b/extras/appengine/Makefile
new file mode 100644
index 0000000..28380cf
--- /dev/null
+++ b/extras/appengine/Makefile
@@ -0,0 +1,51 @@
+# Makefile to simplify some common AppEngine actions.
+# Use 'make help' for a list of commands.
+
+PYTHON=`which python2.5`
+DEV_APPSERVER=$(PYTHON) `which dev_appserver.py`
+APPCFG=$(PYTHON) `which appcfg.py`
+PORT=8080
+
+
+default: help
+
+help:
+	@echo "Available commands:"
+	@sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort
+
+serve:
+	$(DEV_APPSERVER) --port=$(PORT) .
+
+serve_remote:
+	$(DEV_APPSERVER) --port=$(PORT) --address 0.0.0.0 .
+
+serve_email:
+	$(DEV_APPSERVER) --port=$(PORT) --enable_sendmail .
+
+serve_remote_email:
+	$(DEV_APPSERVER) --port=$(PORT) --enable_sendmail --address 0.0.0.0 .
+
+release: make_release.sh django/.svn
+	sh make_release.sh
+
+update: release
+	$(APPCFG) update release
+
+upload: update
+
+update_indexes:
+	$(APPCFG) update_indexes .
+
+vacuum_indexes:
+	$(APPCFG) vacuum_indexes .
+
+all: django pygments sqlparse
+
+django:
+	svn co http://code.djangoproject.com/svn/django/trunk/django
+
+pygments:
+	ln -s `python -c "import pygments,os; print os.path.dirname(pygments.__file__)"` .
+
+sqlparse:
+	ln -s ../../sqlparse .
diff --git a/extras/appengine/README b/extras/appengine/README
new file mode 100644
index 0000000..4762faa
--- /dev/null
+++ b/extras/appengine/README
@@ -0,0 +1,22 @@
+gae-sqlformat - An SQL formatting tool runnging on App Engine
+=============================================================
+
+
+To set up this application run
+
+  make all
+
+This command fetches Django from Subversion and symlinks Pygments
+and sqlparse. Note: You'll need Pygments installed somewhere in your
+PYTHONPATH.
+
+For a manual setup have a look at the Makefile ;-)
+
+
+To run the development server run
+
+  make serve
+
+
+Homepage: http://sqlformat.appspot.com
+
diff --git a/extras/appengine/__init__.py b/extras/appengine/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/__init__.py
diff --git a/extras/appengine/app.yaml b/extras/appengine/app.yaml
new file mode 100644
index 0000000..6aa08b2
--- /dev/null
+++ b/extras/appengine/app.yaml
@@ -0,0 +1,27 @@
+application: sqlformat
+version: 2
+runtime: python
+api_version: 1
+
+default_expiration: 7d  # This is good for images, which never change
+
+handlers:
+
+- url: /(robots.txt|favicon.ico)
+  static_files: static/\1
+  upload: static/(robots.txt|favicon.ico)
+
+- url: /google7a062e78b56854c0.html
+  static_files: static/robots.txt
+  upload: static/robots.txt
+
+- url: /static/(script.js|styles.css|upload.py)
+  static_files: static/\1
+  upload: static/(script.js|styles.css|upload.py)
+  expiration: 1h  # Shorter expiration, these change often
+
+- url: /static
+  static_dir: static
+
+- url: .*
+  script: main.py
diff --git a/extras/appengine/examples/customers.sql b/extras/appengine/examples/customers.sql
new file mode 100644
index 0000000..8b73850
--- /dev/null
+++ b/extras/appengine/examples/customers.sql
@@ -0,0 +1 @@
+USE mydatabase;SELECT orders.customer, orders.day_of_order, orders.product, orders.quantity as number_ordered, inventory.quantity as number_instock, inventory.price FROM orders JOIN inventory ON orders.product = inventory.product;
+\ No newline at end of file
diff --git a/extras/appengine/examples/multiple_inserts.sql b/extras/appengine/examples/multiple_inserts.sql
new file mode 100644
index 0000000..cf49d5d
--- /dev/null
+++ b/extras/appengine/examples/multiple_inserts.sql
@@ -0,0 +1 @@
+insert into customer (id, name) values (1, 'John');insert into customer (id, name) values (2, 'Jack');insert into customer (id, name) values (3, 'Jane');insert into customer (id, name) values (4, 'Jim');insert into customer (id, name) values (5, 'Jerry');insert into customer (id, name) values (1, 'Joe');
+\ No newline at end of file
diff --git a/extras/appengine/examples/pg_view.sql b/extras/appengine/examples/pg_view.sql
new file mode 100644
index 0000000..edf9f06
--- /dev/null
+++ b/extras/appengine/examples/pg_view.sql
@@ -0,0 +1 @@
+SELECT DISTINCT (current_database())::information_schema.sql_identifier AS view_catalog, (nv.nspname)::information_schema.sql_identifier AS view_schema, (v.relname)::information_schema.sql_identifier AS view_name, (current_database())::information_schema.sql_identifier AS table_catalog, (nt.nspname)::information_schema.sql_identifier AS table_schema, (t.relname)::information_schema.sql_identifier AS table_name FROM pg_namespace nv, pg_class v, pg_depend dv, pg_depend dt, pg_class t, pg_namespace nt WHERE ((((((((((((((nv.oid = v.relnamespace) AND (v.relkind = 'v'::"char")) AND (v.oid = dv.refobjid)) AND (dv.refclassid = ('pg_class'::regclass)::oid)) AND (dv.classid = ('pg_rewrite'::regclass)::oid)) AND (dv.deptype = 'i'::"char")) AND (dv.objid = dt.objid)) AND (dv.refobjid <> dt.refobjid)) AND (dt.classid = ('pg_rewrite'::regclass)::oid)) AND (dt.refclassid = ('pg_class'::regclass)::oid)) AND (dt.refobjid = t.oid)) AND (t.relnamespace = nt.oid)) AND (t.relkind = ANY (ARRAY['r'::"char", 'v'::"char"]))) AND pg_has_role(t.relowner, 'USAGE'::text)) ORDER BY (current_database())::information_schema.sql_identifier, (nv.nspname)::information_schema.sql_identifier, (v.relname)::information_schema.sql_identifier, (current_database())::information_schema.sql_identifier, (nt.nspname)::information_schema.sql_identifier, (t.relname)::information_schema.sql_identifier;
diff --git a/extras/appengine/examples/subquery.sql b/extras/appengine/examples/subquery.sql
new file mode 100644
index 0000000..dd4bbc1
--- /dev/null
+++ b/extras/appengine/examples/subquery.sql
@@ -0,0 +1 @@
+select sum(a1.Sales) from Store_Information a1 where a1.Store_name in (select store_name from Geography a2 where a2.store_name = a1.store_name);
+\ No newline at end of file
diff --git a/extras/appengine/examples/subquery2.sql b/extras/appengine/examples/subquery2.sql
new file mode 100644
index 0000000..6c00a87
--- /dev/null
+++ b/extras/appengine/examples/subquery2.sql
@@ -0,0 +1 @@
+select user_id, count(*) as how_many from bboard where not exists (select 1 from bboard_authorized_maintainers bam where bam.user_id = bboard.user_id) and posting_time + 60 > sysdate group by user_id order by how_many desc;
+\ No newline at end of file
diff --git a/extras/appengine/index.yaml b/extras/appengine/index.yaml
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/index.yaml
diff --git a/extras/appengine/main.py b/extras/appengine/main.py
new file mode 100644
index 0000000..d0a8418
--- /dev/null
+++ b/extras/appengine/main.py
@@ -0,0 +1,131 @@
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Main program for Rietveld.
+
+This is also a template for running a Django app under Google App
+Engine, especially when using a newer version of Django than provided
+in the App Engine standard library.
+
+The site-specific code is all in other files: urls.py, models.py,
+views.py, settings.py.
+"""
+
+# Standard Python imports.
+import os
+import sys
+import logging
+
+# Log a message each time this module get loaded.
+logging.info('Loading %s, app version = %s',
+             __name__, os.getenv('CURRENT_VERSION_ID'))
+
+# Delete the preloaded copy of Django.
+for key in [key for key in sys.modules if key.startswith('django')]:
+  del sys.modules[key]
+
+os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
+
+# Force sys.path to have our own directory first, so we can import from it.
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+# Import Django from a zipfile.
+sys.path.insert(0, os.path.abspath('django.zip'))
+
+# Fail early if we can't import Django.  Log identifying information.
+import django
+logging.info('django.__file__ = %r, django.VERSION = %r',
+             django.__file__, django.VERSION)
+assert django.VERSION[0] >= 1, "This Django version is too old"
+
+# AppEngine imports.
+from google.appengine.ext.webapp import util
+
+
+# Helper to enter the debugger.  This passes in __stdin__ and
+# __stdout__, because stdin and stdout are connected to the request
+# and response streams.  You must import this from __main__ to use it.
+# (I tried to make it universally available via __builtin__, but that
+# doesn't seem to work for some reason.)
+def BREAKPOINT():
+  import pdb
+  p = pdb.Pdb(None, sys.__stdin__, sys.__stdout__)
+  p.set_trace()
+
+
+# Custom Django configuration.
+from django.conf import settings
+settings._target = None
+
+# Import various parts of Django.
+import django.core.handlers.wsgi
+import django.core.signals
+import django.db
+import django.dispatch.dispatcher
+import django.forms
+
+# Work-around to avoid warning about django.newforms in djangoforms.
+django.newforms = django.forms
+
+
+def log_exception(*args, **kwds):
+  """Django signal handler to log an exception."""
+  cls, err = sys.exc_info()[:2]
+  logging.exception('Exception in request: %s: %s', cls.__name__, err)
+
+
+# Log all exceptions detected by Django.
+django.core.signals.got_request_exception.connect(log_exception)
+
+# Unregister Django's default rollback event handler.
+django.core.signals.got_request_exception.disconnect(
+    django.db._rollback_on_exception)
+
+
+def real_main():
+  """Main program."""
+  # Create a Django application for WSGI.
+  application = django.core.handlers.wsgi.WSGIHandler()
+  # Run the WSGI CGI handler with that application.
+  util.run_wsgi_app(application)
+
+
+def profile_main():
+  """Main program for profiling."""
+  import cProfile
+  import pstats
+  import StringIO
+
+  prof = cProfile.Profile()
+  prof = prof.runctx('real_main()', globals(), locals())
+  stream = StringIO.StringIO()
+  stats = pstats.Stats(prof, stream=stream)
+  # stats.strip_dirs()  # Don't; too many modules are named __init__.py.
+  stats.sort_stats('time')  # 'time', 'cumulative' or 'calls'
+  stats.print_stats()  # Optional arg: how many to print
+  # The rest is optional.
+  # stats.print_callees()
+  # stats.print_callers()
+  print '\n<hr>'
+  print '<h1>Profile</h1>'
+  print '<pre>'
+  print stream.getvalue()[:1000000]
+  print '</pre>'
+
+# Set this to profile_main to enable profiling.
+main = real_main
+
+
+if __name__ == '__main__':
+  main()
diff --git a/extras/appengine/make_release.sh b/extras/appengine/make_release.sh
new file mode 100755
index 0000000..adb8a5c
--- /dev/null
+++ b/extras/appengine/make_release.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+# Script to create a "release" subdirectory.  This is a subdirectory
+# containing a bunch of symlinks, from which the app can be updated.
+# The main reason for this is to import Django from a zipfile, which
+# saves dramatically in upload time: statting and computing the SHA1
+# for 1000s of files is slow.  Even if most of those files don't
+# actually need to be uploaded, they still add to the work done for
+# each update.
+
+ZIPFILE=django.zip
+RELEASE=release
+FILES="app.yaml index.yaml __init__.py main.py settings.py"
+DIRS="static templates sqlparse pygments sqlformat examples"
+
+# Remove old $ZIPFILE file.
+rm -rf $ZIPFILE
+
+# Create new $ZIPFILE file.
+# We prune:
+# - .svn subdirectories for obvious reasons.
+# - contrib/gis/ and related files because it's huge and unneeded.
+# - *.po and *.mo files because they are bulky and unneeded.
+# - *.pyc and *.pyo because they aren't used by App Engine anyway.
+zip -q $ZIPFILE `find django/ \
+    -name .svn -prune -o \
+    -name gis -prune -o \
+    -name admin -prune -o \
+    -name localflavor -prune -o \
+    -name mysql -prune -o \
+    -name mysql_old -prune -o \
+    -name oracle -prune -o \
+    -name postgresql-prune -o \
+    -name postgresql_psycopg2 -prune -o \
+    -name sqlite3 -prune -o \
+    -name test -prune -o \
+    -type f ! -name \*.py[co] ! -name \*.[pm]o -print`
+
+# Remove old $RELEASE directory.
+rm -rf $RELEASE
+
+# Create new $RELEASE directory.
+mkdir $RELEASE
+
+# Create symbolic links.
+for x in $FILES $DIRS $ZIPFILE
+do
+    ln -s ../$x $RELEASE/$x
+done
diff --git a/extras/appengine/settings.py b/extras/appengine/settings.py
new file mode 100644
index 0000000..fad6bb5
--- /dev/null
+++ b/extras/appengine/settings.py
@@ -0,0 +1,37 @@
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Minimal Django settings."""
+
+import os
+
+APPEND_SLASH = False
+DEBUG = os.environ['SERVER_SOFTWARE'].startswith('Dev')
+INSTALLED_APPS = (
+    'sqlformat',
+)
+MIDDLEWARE_CLASSES = (
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.http.ConditionalGetMiddleware',
+#    'codereview.middleware.AddUserToRequestMiddleware',
+)
+ROOT_URLCONF = 'sqlformat.urls'
+TEMPLATE_CONTEXT_PROCESSORS = ()
+TEMPLATE_DEBUG = DEBUG
+TEMPLATE_DIRS = (
+    os.path.join(os.path.dirname(__file__), 'templates'),
+    )
+TEMPLATE_LOADERS = (
+    'django.template.loaders.filesystem.load_template_source',
+    )
diff --git a/extras/appengine/sqlformat/__init__.py b/extras/appengine/sqlformat/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/sqlformat/__init__.py
diff --git a/extras/appengine/sqlformat/urls.py b/extras/appengine/sqlformat/urls.py
new file mode 100644
index 0000000..c83290e
--- /dev/null
+++ b/extras/appengine/sqlformat/urls.py
@@ -0,0 +1,11 @@
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns(
+    'sqlformat.views',
+    (r'^$', 'index'),
+    (r'^source/$', 'source'),
+    (r'^about/$', 'about'),
+    (r'^api/$', 'api'),
+    (r'^format/$', 'format'),
+    (r'^load_example', 'load_example'),
+)
diff --git a/extras/appengine/sqlformat/views.py b/extras/appengine/sqlformat/views.py
new file mode 100644
index 0000000..d135c44
--- /dev/null
+++ b/extras/appengine/sqlformat/views.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import md5
+import os
+import time
+
+from django import forms
+from django.http import HttpResponse
+from django.shortcuts import render_to_response
+from django.utils import simplejson as json
+
+from google.appengine.api import users
+
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import SqlLexer, PythonLexer, PhpLexer
+
+import sqlparse
+
+
+INITIAL_SQL = "select * from foo join bar on val1 = val2 where id = 123;"
+EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), '../examples')
+
+def _get_user_image(user):
+    if user is None:
+        return None
+    digest = md5.new(user.email().lower()).hexdigest()
+    if os.environ['SERVER_SOFTWARE'].startswith('Dev'):
+        host = 'localhost%3A8080'
+    else:
+        host = 'sqlformat.appspot.com'
+    default = 'http%3A%2F%2F'+host+'%2Fstatic%2Fblank.gif'
+    return 'http://gravatar.com/avatar/%s?s=32&d=%s' % (digest, default)
+
+def _get_examples():
+    fnames = os.listdir(EXAMPLES_DIR)
+    fnames.sort()
+    return fnames
+
+
+class FormOptions(forms.Form):
+    data = forms.CharField(widget=forms.Textarea({'class': 'resizable'}),
+                           initial=INITIAL_SQL, required=False)
+    datafile = forms.FileField(required=False)
+    highlight = forms.BooleanField(initial=True, required=False,
+                                   widget=forms.CheckboxInput(),
+                                   label='Enable syntax highlighting')
+    remove_comments = forms.BooleanField(initial=False, required=False,
+                                         widget=forms.CheckboxInput(),
+                                         label='Remove comments')
+    keyword_case = forms.CharField(
+        widget=forms.Select(choices=(('', 'Unchanged'),
+                                     ('lower', 'Lower case'),
+                                     ('upper', 'Upper case'),
+                                     ('capitalize', 'Capitalize'))),
+        required=False, initial='upper', label='Keywords')
+    identifier_case = forms.CharField(
+        widget=forms.Select(choices=(('', 'Unchanged'),
+                                     ('lower', 'Lower case'),
+                                     ('upper', 'Upper case'),
+                                     ('capitalize', 'Capitalize'))),
+        required=False, initial='', label='Identifiers')
+    n_indents = forms.IntegerField(min_value=0, max_value=30,
+                                   initial=2, required=False,
+                                   label='spaces',
+                                   widget=forms.TextInput({'size': 2,
+                                                           'maxlength': 2}))
+#    right_margin = forms.IntegerField(min_value=10, max_value=500,
+#                                      initial=60, required=False,
+#                                      label='characters',
+#                                      widget=forms.TextInput({'size': 3,
+#                                                              'maxlength': 3}))
+    output_format = forms.CharField(
+        widget=forms.Select(choices=(('sql', 'SQL'),
+                                     ('python', 'Python'),
+                                     ('php', 'PHP'),
+                                     )),
+        required=False, initial='sql', label='Language')
+
+    def clean(self):
+        super(FormOptions, self).clean()
+        data = self.cleaned_data.get('data')
+        logging.info(self.files)
+        if 'datafile' in self.files:
+            self._datafile = self.files['datafile'].read()
+        else:
+            self._datafile =  None
+        if not data and not self._datafile:
+            raise forms.ValidationError('Whoops, I need a file or text!')
+        elif data and self._datafile:
+            raise forms.ValidationError('Whoops, I need a file OR text!')
+        return self.cleaned_data
+
+    def get_data(self):
+        data = self.cleaned_data.get('data')
+        if self._datafile:
+            return self._datafile
+        else:
+            return data
+
+
+def format_sql(form, format='html'):
+    data = form.cleaned_data
+    popts = {}
+    sql = form.get_data()
+    if data.get('remove_comments'):
+        popts['strip_comments'] = True
+    if data.get('keyword_case'):
+        popts['keyword_case'] = data.get('keyword_case')
+    if data.get('identifier_case'):
+        popts['identifier_case'] = data.get('identifier_case')
+    if data.get('n_indents', None) is not None:
+        popts['reindent'] = True
+        popts['indent_width'] = data.get('n_indents')
+    if data.get('right_margin', None) is not None:
+        popts['right_margin'] = data.get('right_margin')
+    if data.get('output_format', None) is not None:
+        popts['output_format'] = data.get('output_format')
+    sql = sqlparse.format(sql, **popts)
+    if format in ('html', 'json'):
+        if data.get('highlight', False):
+            if popts['output_format'] == 'python':
+                lexer = PythonLexer()
+            elif popts['output_format'] == 'php':
+                lexer = PhpLexer()
+            else:
+                lexer = SqlLexer()
+            sql = highlight(sql, lexer, HtmlFormatter())
+        else:
+            sql = ('<textarea class="resizable" '
+                   'style="height: 350px; margin-top: 1em;">%s</textarea>'
+                   % sql)
+    return sql
+
+
+def index(request):
+    output = None
+    data = {}
+    proc_time = None
+    if request.method == 'POST':
+        logging.debug(request.POST)
+        form = FormOptions(request.POST, request.FILES)
+        if form.is_valid():
+            start = time.time()
+            output = format_sql(form,
+                                format=request.POST.get('format', 'html'))
+            proc_time = time.time()-start
+    else:
+        form = FormOptions()
+    if request.POST.get('format', None) == 'json':
+        logging.warning(form.errors)
+        data['errors'] = str(form.errors)
+        data['output'] = output
+        logging.info('%r', proc_time)
+        data['proc_time'] = '%.3f' % proc_time or 0.0
+        data = json.dumps(data)
+        return HttpResponse(data, content_type='text/x-json')
+    elif request.POST.get('format', None) == 'text':
+        if not form.is_valid():
+            data = str(form.errors)  # XXX convert to plain text
+        else:
+            data = output
+        return HttpResponse(data, content_type='text/plain')
+    return render_to_response('index.html',
+                              {'form': form, 'output': output,
+                               'proc_time': proc_time and '%.3f' % proc_time or None,
+                               'user': users.get_current_user(),
+                               'login_url': users.create_login_url('/'),
+                               'logout_url': users.create_logout_url('/'),
+                               'userimg': _get_user_image(users.get_current_user()),
+                               'examples': _get_examples()})
+
+
+def format(request):
+    if request.method == 'POST':
+        form = FormOptions(request.POST)
+        if form.is_valid():
+            response = format_sql(form, format='text')
+        else:
+            response = 'ERROR: %s' % str(form.errors)
+    else:
+        response = 'POST request required'
+    return HttpResponse(response, content_type='text/plain')
+
+def source(request):
+    return render_to_response('source.html')
+
+def about(request):
+    return render_to_response('about.html')
+
+def api(request):
+    return render_to_response('api.html')
+
+def load_example(request):
+    fname = request.POST.get('fname')
+    if fname is None:
+        answer = 'Uups, I\'ve got no filename...'
+    elif fname not in _get_examples():
+        answer = 'Hmm, I think you don\'t want to do that.'
+    else:
+        answer = open(os.path.join(EXAMPLES_DIR, fname)).read()
+    data = json.dumps({'answer': answer})
+    return HttpResponse(data, content_type='text/x-json')
diff --git a/extras/appengine/static/bg_options.png b/extras/appengine/static/bg_options.png
new file mode 100644
index 0000000..bc1a6ed
--- /dev/null
+++ b/extras/appengine/static/bg_options.png
diff --git a/extras/appengine/static/bgfieldset.png b/extras/appengine/static/bgfieldset.png
new file mode 100644
index 0000000..4d55f4a
--- /dev/null
+++ b/extras/appengine/static/bgfieldset.png
diff --git a/extras/appengine/static/bgfooter.png b/extras/appengine/static/bgfooter.png
new file mode 100644
index 0000000..9ce5bdd
--- /dev/null
+++ b/extras/appengine/static/bgfooter.png
diff --git a/extras/appengine/static/bgtop.png b/extras/appengine/static/bgtop.png
new file mode 100644
index 0000000..a0d4709
--- /dev/null
+++ b/extras/appengine/static/bgtop.png
diff --git a/extras/appengine/static/blank.gif b/extras/appengine/static/blank.gif
new file mode 100644
index 0000000..3be2119
--- /dev/null
+++ b/extras/appengine/static/blank.gif
diff --git a/extras/appengine/static/canvas.html b/extras/appengine/static/canvas.html
new file mode 100644
index 0000000..ab642d0
--- /dev/null
+++ b/extras/appengine/static/canvas.html
@@ -0,0 +1,114 @@
+<html>
+
+<head>
+<style type="text/css">
+ /*
+  These styles are customizable.  
+  Provide  .canvas-gadget (the div that holds the canvas mode gadget) 
+  at least 500px width so that  the gadget has sufficient screen real estate
+*/
+body {
+  margin: 0;
+  font-family:arial, sans-serif;
+  text-align:center;
+}
+.container {
+  width:652px;
+  margin:0 auto;
+  text-align:left;
+}
+.fc-sign-in-header {
+  text-align:left;
+  font-size: 13px;
+  padding:3px 10px;
+  border-bottom:1px solid #000000;
+}
+.signin {
+  text-align:left;
+  float:right;
+  font-size: 13px;
+  height: 32px;
+}
+.go-back {
+  text-align:left;
+  margin:5px auto 15px auto;
+}
+.go-back a, .go-back a:visited {
+  font-weight:bold;
+}
+.canvas-gadget {
+  text-align:left;
+  width:650px; /* ALLOW AT LEAST 500px WIDTH*/
+  margin:10px auto 10px auto;
+  border:1px solid #cccccc;
+}
+.site-header {
+  margin-top: 10px;
+}
+.section-title {
+  font-size: 2em;
+}
+.clear {
+  clear:both;
+  font-size:1px;
+  height:1px;
+  line-height:0;
+  margin:0;
+  padding:0;
+}
+</style>
+<script type="text/javascript" src="http://www.google.com/friendconnect/script/friendconnect.js"></script>
+</head>
+<body>
+<div class="container">
+  <div class="fc-sign-in-header">
+    <!--REQUIRED SO VISITORS CAN SIGN IN-->
+    <div class="signin" id="gadget-signin"></div>
+    <script type="text/javascript">
+    var skin = {};
+    skin['BORDER_COLOR'] = '#cccccc';
+    skin['ENDCAP_BG_COLOR'] = '#e0ecff';
+    skin['ENDCAP_TEXT_COLOR'] = '#333333';
+    skin['ENDCAP_LINK_COLOR'] = '#0000cc';
+    skin['ALTERNATE_BG_COLOR'] = '#ffffff';
+    skin['CONTENT_BG_COLOR'] = '#ffffff';
+    skin['CONTENT_LINK_COLOR'] = '#0000cc';
+    skin['CONTENT_TEXT_COLOR'] = '#333333';
+    skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc';
+    skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666';
+    skin['CONTENT_HEADLINE_COLOR'] = '#333333';
+    skin['ALIGNMENT'] = 'right';
+    google.friendconnect.container.renderCanvasSignInGadget({'id': 'gadget-signin'}, skin);
+    </script>
+    <!--END REQUIRED-->
+    <div class="clear"></div>
+  </div>
+
+  <div class="site-header"><span class="section-title">Site Name</span></div>
+  <div class="go-back">
+    <!--REQUIRED SO VISITORS CAN RETURN TO REFERRING PAGE-->
+    <a href="javascript:google.friendconnect.container.goBackToSite();">
+    &lsaquo;&lsaquo; Return home</a>
+    <!--END REQUIRED-->
+  </div>
+  <!-- REQUIRED - THIS IS WHERE THE GADGET IS PRESENTED. ALLOW AT LEAST 500px WIDTH -->
+  <div id="gadget-canvas" class="canvas-gadget"></div>
+  <script type="text/javascript">
+  var skin = {};
+  skin['BORDER_COLOR'] = '#cccccc';
+  skin['ENDCAP_BG_COLOR'] = '#e0ecff';
+  skin['ENDCAP_TEXT_COLOR'] = '#333333';
+  skin['ENDCAP_LINK_COLOR'] = '#0000cc';
+  skin['ALTERNATE_BG_COLOR'] = '#ffffff';
+  skin['CONTENT_BG_COLOR'] = '#ffffff';
+  skin['CONTENT_LINK_COLOR'] = '#0000cc';
+  skin['CONTENT_TEXT_COLOR'] = '#333333';
+  skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc';
+  skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666';
+  skin['CONTENT_HEADLINE_COLOR'] = '#333333';
+  google.friendconnect.container.renderUrlCanvasGadget({'id': 'gadget-canvas'}, skin);
+  </script>
+  <!--END REQUIRED-->
+</div>
+</body>
+</html>
+\ No newline at end of file
diff --git a/extras/appengine/static/hotkeys.js b/extras/appengine/static/hotkeys.js
new file mode 100644
index 0000000..0e62a92
--- /dev/null
+++ b/extras/appengine/static/hotkeys.js
@@ -0,0 +1 @@
+(function(B){B.fn.__bind__=B.fn.bind;B.fn.__unbind__=B.fn.unbind;B.fn.__find__=B.fn.find;var A={version:"0.7.8",override:/keydown|keypress|keyup/g,triggersMap:{},specialKeys:{27:"esc",9:"tab",32:"space",13:"return",8:"backspace",145:"scroll",20:"capslock",144:"numlock",19:"pause",45:"insert",36:"home",46:"del",35:"end",33:"pageup",34:"pagedown",37:"left",38:"up",39:"right",40:"down",112:"f1",113:"f2",114:"f3",115:"f4",116:"f5",117:"f6",118:"f7",119:"f8",120:"f9",121:"f10",122:"f11",123:"f12"},shiftNums:{"`":"~","1":"!","2":"@","3":"#","4":"$","5":"%","6":"^","7":"&","8":"*","9":"(","0":")","-":"_","=":"+",";":":","'":'"',",":"<",".":">","/":"?","\\":"|"},newTrigger:function(E,D,F){var C={};C[E]={};C[E][D]={cb:F,disableInInput:false};return C}};if(B.browser.mozilla){A.specialKeys=B.extend(A.specialKeys,{96:"0",97:"1",98:"2",99:"3",100:"4",101:"5",102:"6",103:"7",104:"8",105:"9"})}B.fn.find=function(C){this.query=C;return B.fn.__find__.apply(this,arguments)};B.fn.unbind=function(H,E,G){if(B.isFunction(E)){G=E;E=null}if(E&&typeof E==="string"){var F=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();var D=H.split(" ");for(var C=0;C<D.length;C++){delete A.triggersMap[F][D[C]][E]}}return this.__unbind__(H,G)};B.fn.bind=function(J,F,K){var H=J.match(A.override);if(B.isFunction(F)||!H){return this.__bind__(J,F,K)}else{var N=null,I=B.trim(J.replace(A.override,""));if(I){N=this.__bind__(I,F,K)}if(typeof F==="string"){F={combi:F}}if(F.combi){for(var M=0;M<H.length;M++){var D=H[M];var G=F.combi.toLowerCase(),E=A.newTrigger(D,G,K),L=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();E[D][G].disableInInput=F.disableInInput;if(!A.triggersMap[L]){A.triggersMap[L]=E}else{if(!A.triggersMap[L][D]){A.triggersMap[L][D]=E[D]}}var C=A.triggersMap[L][D][G];if(!C){A.triggersMap[L][D][G]=[E[D][G]]}else{if(C.constructor!==Array){A.triggersMap[L][D][G]=[C]}else{A.triggersMap[L][D][G][C.length]=E[D][G]}}this.each(function(){var O=B(this);if(O.attr("hkId")&&O.attr("hkId")!==L){L=O.attr("hkId")+";"+L}O.attr("hkId",L)});N=this.__bind__(H.join(" "),F,A.handler)}}return N}};A.findElement=function(C){if(!B(C).attr("hkId")){if(B.browser.opera||B.browser.safari){while(!B(C).attr("hkId")&&C.parentNode){C=C.parentNode}}}return C};A.handler=function(E){var O=A.findElement(E.currentTarget),I=B(O),D=I.attr("hkId");if(D){D=D.split(";");var G=E.which,Q=E.type,P=A.specialKeys[G],N=!P&&String.fromCharCode(G).toLowerCase(),H=E.shiftKey,C=E.ctrlKey,M=E.altKey||E.originalEvent.altKey,F=null;for(var R=0;R<D.length;R++){if(A.triggersMap[D[R]][Q]){F=A.triggersMap[D[R]][Q];break}}if(F){var J;if(!H&&!C&&!M){J=F[P]||(N&&F[N])}else{var L="";if(M){L+="alt+"}if(C){L+="ctrl+"}if(H){L+="shift+"}J=F[L+P];if(!J){if(N){J=F[L+N]||F[L+A.shiftNums[N]]||(L==="shift+"&&F[A.shiftNums[N]])}}}if(J){var S=false;for(var R=0;R<J.length;R++){if(J[R].disableInInput){var K=B(E.target);if(I.is("input")||I.is("textarea")||K.is("input")||K.is("textarea")){return true}}S=S||J[R].cb.apply(this,[E])}return S}}}};window.hotkeys=A;return B})(jQuery);
+\ No newline at end of file
diff --git a/extras/appengine/static/img_loading.gif b/extras/appengine/static/img_loading.gif
new file mode 100644
index 0000000..6465823
--- /dev/null
+++ b/extras/appengine/static/img_loading.gif
diff --git a/extras/appengine/static/jquery.textarearesizer.compressed.js b/extras/appengine/static/jquery.textarearesizer.compressed.js
new file mode 100644
index 0000000..5464ae6
--- /dev/null
+++ b/extras/appengine/static/jquery.textarearesizer.compressed.js
@@ -0,0 +1 @@
+(function($){var textarea,staticOffset;var iLastMousePos=0;var iMin=32;var grip;$.fn.TextAreaResizer=function(){return this.each(function(){textarea=$(this).addClass('processed'),staticOffset=null;$(this).wrap('<div class="resizable-textarea"><span></span></div>').parent().append($('<div class="grippie"></div>').bind("mousedown",{el:this},startDrag));var grippie=$('div.grippie',$(this).parent())[0];grippie.style.marginRight=(grippie.offsetWidth-$(this)[0].offsetWidth)+'px'})};function startDrag(e){textarea=$(e.data.el);textarea.blur();iLastMousePos=mousePosition(e).y;staticOffset=textarea.height()-iLastMousePos;textarea.css('opacity',0.25);$(document).mousemove(performDrag).mouseup(endDrag);return false}function performDrag(e){var iThisMousePos=mousePosition(e).y;var iMousePos=staticOffset+iThisMousePos;if(iLastMousePos>=(iThisMousePos)){iMousePos-=5}iLastMousePos=iThisMousePos;iMousePos=Math.max(iMin,iMousePos);textarea.height(iMousePos+'px');if(iMousePos<iMin){endDrag(e)}return false}function endDrag(e){$(document).unbind('mousemove',performDrag).unbind('mouseup',endDrag);textarea.css('opacity',1);textarea.focus();textarea=null;staticOffset=null;iLastMousePos=0}function mousePosition(e){return{x:e.clientX+document.documentElement.scrollLeft,y:e.clientY+document.documentElement.scrollTop}}})(jQuery);
+\ No newline at end of file
diff --git a/extras/appengine/static/loading.gif b/extras/appengine/static/loading.gif
new file mode 100644
index 0000000..a879bed
--- /dev/null
+++ b/extras/appengine/static/loading.gif
diff --git a/extras/appengine/static/lynx_screenshot.png b/extras/appengine/static/lynx_screenshot.png
new file mode 100644
index 0000000..d1592ac
--- /dev/null
+++ b/extras/appengine/static/lynx_screenshot.png
diff --git a/extras/appengine/static/pygments.css b/extras/appengine/static/pygments.css
new file mode 100644
index 0000000..da02807
--- /dev/null
+++ b/extras/appengine/static/pygments.css
@@ -0,0 +1,59 @@
+.c { color: #408080; font-style: italic } /* Comment */
+.err { border: 1px solid #FF0000 } /* Error */
+.k { color: #008000; font-weight: bold } /* Keyword */
+.o { color: #666666 } /* Operator */
+.cm { color: #408080; font-style: italic } /* Comment.Multiline */
+.cp { color: #BC7A00 } /* Comment.Preproc */
+.c1 { color: #408080; font-style: italic } /* Comment.Single */
+.cs { color: #408080; font-style: italic } /* Comment.Special */
+.gd { color: #A00000 } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #FF0000 } /* Generic.Error */
+.gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.gi { color: #00A000 } /* Generic.Inserted */
+.go { color: #808080 } /* Generic.Output */
+.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.gt { color: #0040D0 } /* Generic.Traceback */
+.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
+.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
+.kp { color: #008000 } /* Keyword.Pseudo */
+.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
+.kt { color: #B00040 } /* Keyword.Type */
+.m { color: #666666 } /* Literal.Number */
+.s { color: #BA2121 } /* Literal.String */
+.na { color: #7D9029 } /* Name.Attribute */
+.nb { color: #008000 } /* Name.Builtin */
+.nc { color: #0000FF; font-weight: bold } /* Name.Class */
+.no { color: #880000 } /* Name.Constant */
+.nd { color: #AA22FF } /* Name.Decorator */
+.ni { color: #999999; font-weight: bold } /* Name.Entity */
+.ne { color: #D2413A; font-weight: bold } /* Name.Exception */
+.nf { color: #0000FF } /* Name.Function */
+.nl { color: #A0A000 } /* Name.Label */
+.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
+.nt { color: #008000; font-weight: bold } /* Name.Tag */
+.nv { color: #19177C } /* Name.Variable */
+.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
+.w { color: #bbbbbb } /* Text.Whitespace */
+.mf { color: #666666 } /* Literal.Number.Float */
+.mh { color: #666666 } /* Literal.Number.Hex */
+.mi { color: #666666 } /* Literal.Number.Integer */
+.mo { color: #666666 } /* Literal.Number.Oct */
+.sb { color: #BA2121 } /* Literal.String.Backtick */
+.sc { color: #BA2121 } /* Literal.String.Char */
+.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
+.s2 { color: #BA2121 } /* Literal.String.Double */
+.se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
+.sh { color: #BA2121 } /* Literal.String.Heredoc */
+.si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
+.sx { color: #008000 } /* Literal.String.Other */
+.sr { color: #BB6688 } /* Literal.String.Regex */
+.s1 { color: #BA2121 } /* Literal.String.Single */
+.ss { color: #19177C } /* Literal.String.Symbol */
+.bp { color: #008000 } /* Name.Builtin.Pseudo */
+.vc { color: #19177C } /* Name.Variable.Class */
+.vg { color: #19177C } /* Name.Variable.Global */
+.vi { color: #19177C } /* Name.Variable.Instance */
+.il { color: #666666 } /* Literal.Number.Integer.Long */
diff --git a/extras/appengine/static/resize-grip.png b/extras/appengine/static/resize-grip.png
new file mode 100644
index 0000000..cae2a4e
--- /dev/null
+++ b/extras/appengine/static/resize-grip.png
diff --git a/extras/appengine/static/robots.txt b/extras/appengine/static/robots.txt
new file mode 100644
index 0000000..c033917
--- /dev/null
+++ b/extras/appengine/static/robots.txt
@@ -0,0 +1,8 @@
+# Directions for web crawlers.
+# See http://www.robotstxt.org/wc/norobots.html.
+
+User-agent: HTTrack
+User-agent: puf
+User-agent: MSIECrawler
+User-agent: Nutch
+Disallow: /
diff --git a/extras/appengine/static/rpc_relay.html b/extras/appengine/static/rpc_relay.html
new file mode 100644
index 0000000..c602043
--- /dev/null
+++ b/extras/appengine/static/rpc_relay.html
@@ -0,0 +1 @@
+<html><head><script type="text/javascript" src="http://www.google.com/friendconnect/script/rpc_relay.js"></script></head></html>
+\ No newline at end of file
diff --git a/extras/appengine/static/script.js b/extras/appengine/static/script.js
new file mode 100644
index 0000000..71bbabb
--- /dev/null
+++ b/extras/appengine/static/script.js
@@ -0,0 +1,103 @@
+var initialized = false;
+
+function update_output() {
+    data = {}
+    data.data = $('#id_data').val();
+    data.format = 'json';
+    if ( $('#id_remove_comments').attr('checked') ) {
+	data.remove_comments = 1
+    }
+    if ( $('#id_highlight').attr('checked') ) { data.highlight = 1 }
+    data.keyword_case = $('#id_keyword_case').val();
+    data.identifier_case = $('#id_identifier_case').val();
+    data.n_indents = $('#id_n_indents').val();
+    data.right_margin = $('#id_right_margin').val();
+    data.output_format = $('#id_output_format').val();
+    form = document.getElementById('form_options');
+    $(form.elements).attr('disabled', 'disabled');
+    $('#response').addClass('loading');
+    $.post('/', data,
+	function(data) {
+	    if ( data.output )  {
+		$('#response').html(data.output);
+		proc_time = 'Processed in '+data.proc_time+' seconds.';
+	    } else {
+		$('#response').html('An error occured: '+data.errors);
+		proc_time = '';
+	    }
+	    $('#proc_time').html(proc_time);
+	    $(form.elements).each( function(idx) {
+		    obj = $(this);
+		    if ( ! obj.is('.keep-disabled') ) {
+			obj.removeAttr('disabled');
+		    }
+		});
+	    $('#response').removeClass('loading');
+	}, 'json');
+    return false;
+}
+
+function toggle_fieldset(event) {
+    id = $(this).attr('id');
+    $('#'+id+'_content').slideDown();
+    $('legend').each(function(idx) {
+	    obj = $('#'+this.id+'_content');
+	    if ( this.id != id ) {
+		obj.slideUp();
+	    }
+	});
+}
+
+
+function textarea_grab_focus(evt) {
+    evt.stopPropagation();
+    evt.preventDefault();
+    $('#id_data').focus();
+    return false;
+}
+
+
+function show_help() {
+    $('#help').toggle();
+    return false;
+}
+
+
+function hide_help() {
+    $('#help').hide();
+    return false;
+}
+
+function load_example() {
+    fname = $('#sel_example').val();
+    data = {fname: fname};
+    $.post('/load_example', data,
+	   function(data) {
+	       $('#id_data').val(data.answer);
+	   }, 'json');
+}
+
+
+function init() {
+    if (initialized) { return }
+    //$('legend').bind('click', toggle_fieldset);
+    //    $('legend').each(function(idx) {
+    //	    obj = $('#'+this.id+'_content');
+    //	    if ( this.id != 'general' ) {
+    //		obj.hide();
+    //	    }
+    //	});
+    $(document).bind('keydown', {combi:'Ctrl+f'},
+		     update_output);
+    $('#btn_format').val('Format SQL [Ctrl+F]');
+    $(document).bind('keydown', {combi: 'h', disableInInput: true},
+		     show_help);
+    $(document).bind('keydown', 'Esc', hide_help);
+    $(document).bind('keydown', {combi: 't', disableInInput: true},
+		     textarea_grab_focus);
+    initialized = true;
+    /* jQuery textarea resizer plugin usage */
+    $(document).ready(function() {
+	    $('textarea.resizable:not(.processed)').TextAreaResizer();
+	});
+}
+\ No newline at end of file
diff --git a/extras/appengine/static/sqlformat_client_example.py b/extras/appengine/static/sqlformat_client_example.py
new file mode 100644
index 0000000..3b3bf0f
--- /dev/null
+++ b/extras/appengine/static/sqlformat_client_example.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import urllib
+import urllib2
+
+payload = (
+    ('data', 'select * from foo join bar on val1 = val2 where id = 123;'),
+    ('format', 'text'),
+    ('keyword_case', 'upper'),
+    ('reindent', True),
+    ('n_indents', 2),
+    )
+
+response = urllib2.urlopen('http://sqlformat.appspot.com/format/',
+                           urllib.urlencode(payload))
+print response.read()
+
diff --git a/extras/appengine/static/styles.css b/extras/appengine/static/styles.css
new file mode 100644
index 0000000..41a540a
--- /dev/null
+++ b/extras/appengine/static/styles.css
@@ -0,0 +1,245 @@
+body {
+    color: #000000;
+    background: #eeeeec;
+    font-family: "Free Sans", Arial, Verdana, sans;
+    font-size: 10pt;
+    margin: 0;
+    padding: 0;
+}
+
+#header {
+    background: url(/static/bgtop.png) top left repeat-x;
+    border-bottom: 3px solid #2e3436;
+}
+
+#header-inner, #main-inner, #footer-inner {
+    width: 70em;
+    margin-left: auto;
+    margin-right: auto;
+}
+
+
+#header-inner h1 {
+    margin: 0;
+    padding: 0;
+    margin-bottom: .2em;
+    font-weight: normal;
+    float: left;
+    font-size: 2em;
+    letter-spacing: .07em;
+}
+
+#header-inner .q {
+    color: #f57900;
+    padding-right: 3px;
+}
+
+#header-inner .q2 {
+    font-family: Georgia, "Times New Roman", serif;
+}
+
+#header-inner h1 a {
+    text-decoration: none;
+    color: #eeeeec;
+}
+
+#header-inner #slogan {
+    float: left;
+    color: #babdb6;
+    font-size: 1.4em;
+    margin-left: 1em;
+    letter-spacing: .18em;
+    padding-top: .2em;
+}
+
+
+#topmenu {
+    color: #729fcf;
+    clear: left;
+    padding-top: .5em;
+    padding-bottom: .5em;
+    font-size: 1.1em;
+}
+
+#topmenu a {
+    color: #eeeeec;
+    text-decoration: none;
+}
+
+#topmenu a:hover {
+    color: #ce5c00;
+}
+
+
+#main {
+    padding: 10px;
+    background: white;
+    line-height: 1.5em;
+    text-align: justify;
+}
+
+#main form ul {
+    margin: 0;
+    padding: 0;
+    list-style-type: none;
+}
+
+#main p, #main ol, #main .example, #main dl {
+    font-size: 12pt;
+    margin-left: 2em;
+}
+
+#main dt {
+    font-weight: bold;
+}
+
+#main li {
+    margin-bottom: .7em;
+}
+
+#main a {
+    color: #f57900;
+}
+
+#main h1, h2, h3, h4 {
+    color: #204a87;
+    font-weight: normal;
+    letter-spacing: .05em;
+}
+
+#main pre, #main code.pre {
+    font-size: 10pt;
+    line-height: 1em;
+    padding: 4px;
+    background-color: #eeeeec;
+    border: 1px solid #babdb6;
+}
+
+#input {
+    width: 50em;
+    float: right;
+    margin-left: 2em;
+}
+
+#options {
+    width: 18em;
+    float: left;
+    color: #2e3436;
+    margin-top: .75em;
+    text-align: left;
+}
+
+#options fieldset {
+    border: 1px solid #dddddd;
+    margin-bottom: .6em;
+    background: url(/static/bgfieldset.png) bottom left repeat-x;
+    -moz-border-radius: 3px;
+}
+
+
+#options input, select {
+    border: 1px solid #dddddd;
+}
+
+#options .help {
+    font-size: .9em;
+    color: #888a85;
+    margin-bottom: .6em;
+}
+
+
+#footer {
+    background: url(/static/bgfooter.png) top left repeat-x;
+    padding: 10px;
+    min-height: 80px;
+    border-top: 4px solid #babdb6;
+}
+
+#footer-inner {
+    width: 70em;
+    margin-left: auto;
+    margin-right: auto;
+    color: #888a85;
+}
+
+#footer-inner a {
+    color: #888a85;
+}
+
+#footer-inner a:hover {
+    color: #555753;
+}
+
+.clearfix {
+    clear: both;
+}
+
+.skip {
+    display: none;
+}
+
+textarea {
+    border: 1px solid #cccccc;
+    border-bottom: none;
+    padding: 4px;
+    font-size: 12pt;
+    width: 100%;
+}
+
+textarea:focus {
+    background-color: #eeeeec;
+}
+
+div.grippie {
+    background: url(/static/resize-grip.png) bottom right no-repeat #eeeeec;
+    border-color: #cccccc;
+    border-style: solid;
+    border-width: 0pt 1px 1px;
+    cursor: se-resize;
+    height: 14px;
+    overflow: hidden;
+}
+
+#help {
+    display: none;
+    position: fixed;
+    right: 10%;
+    left: 10%;
+    top: 0;
+    opacity: 0.85;
+    -moz-opacity: 0.85;
+    -khtml-opacity: 0.85;
+    filter: alpha(opacity=85);
+    -moz-border-radius: 0px 0px 10px 10px;
+
+    background: #2e3436;
+    color: white;
+    font-weight: bold;
+
+    padding: 1em;
+    z-index: 1;
+    overflow-x: hidden;
+    overflow-y: auto;
+}
+
+#help .shortcut {
+    color: #f57900;
+    font-weight: bold;
+    width: 20px;
+    display: inline;
+}
+
+.loading {
+    background: url(/static/loading.gif) top left no-repeat;
+}
+
+.dev {
+    color: #cc0000;
+    font-size: .9em;
+    letter-spacing: 1;
+}
+
+#proc_time {
+    color: #888a85;
+    font-size: .85em;
+}
+\ No newline at end of file
diff --git a/extras/appengine/templates/about.html b/extras/appengine/templates/about.html
new file mode 100644
index 0000000..3222b97
--- /dev/null
+++ b/extras/appengine/templates/about.html
@@ -0,0 +1,44 @@
+{% extends "master.html" %}
+
+{% block main %}
+<h1>About this Application</h1>
+<p>
+  This application is a online SQL formatting tool.
+</p>
+<p>
+  Basically it's a playground for a Python module to parse and format
+  SQL statements. Sometimes it's easier to combine the available
+  options and to see the resulting output using a web front-end than
+  on the command line ;-)
+</p>
+<p>
+  To get started, enter a SQL statement in the text box on the top,
+  choose some options and click on &quot;Format SQL&quot; (Ctrl+F)
+  to see the result.
+</p>
+<p>
+  <em>Note:</em> The SQL formatter and parser is in an early stage
+  of development. If you're looking for a mature tool, try one of
+  <a href="http://www.google.com/search?q=online+sql+formatter">these</a>.
+</p>
+<h2>Using it from the Command Line</h2>
+<p>
+  There are three ways to use this SQL formatter from the command line:
+</p>
+<ol>
+  <li>Grab the <a href="/source/">sources</a> and use the module in your
+    Python scripts.</li>
+  <li>
+    Write a little script in your favorite language that sends a POST
+    request to this application.<br/>
+    Read the <a href="/api/">API Documentation</a> for more information.
+  </li>
+  <li>Use
+    <a href="/static/lynx_screenshot.png"
+       alt="Lynx Screenshot" target="_blank"
+       title="Screenshot: sqlformat.appspot.com on Lynx">Lynx
+    </a>
+  </li>
+</ol>
+
+{% endblock %}
diff --git a/extras/appengine/templates/api.html b/extras/appengine/templates/api.html
new file mode 100644
index 0000000..b9aaae7
--- /dev/null
+++ b/extras/appengine/templates/api.html
@@ -0,0 +1,50 @@
+{% extends "master.html" %}
+
+{% block main %}
+<h1>API Documentation</h1>
+
+<p>
+  Using the API for this application is pretty simple. Just send a
+  <code>POST</code> request to
+</p>
+<p>
+  <code>http://sqlformat.appspot.com/format/</code>
+</p>
+
+<h2>Options</h2>
+<p>
+  The <code>POST</code> request accepts various options to control
+  formatting. Only the <em>data</em> option is required. All others
+  are optional.
+</p>
+
+<dl>
+  <dt>data</dt>
+  <dd>The SQL statement to format.</dd>
+  <dt>remove_comments</dt>
+  <dd>Set to 1 to remove comments.</dd>
+  <dt>keyword_case</dt>
+  <dd>How to convert keywords. Allowed values are 'lower', 'upper',
+    'capitalize'.</dd>
+  <dt>identifier_case</dt>
+  <dd>How to convert identifiers. Allowed values are 'lower', 'upper',
+    'capitalize'.</dd>
+  <dt>n_indents</dt>
+  <dd>An integer indicating the indendation depth.</dd>
+  <dt>right_margin</dt>
+  <dd>An integer indicating the maximum line length.</dd>
+  <dt>output_format</dt>
+  <dd>Transfer the statement into another programming language.
+    Allowed values are 'python', 'php'</dd>
+</dl>
+
+<h2>Example</h2>
+<p>
+  Here's a example in Python:
+</p>
+{% include "python-client-example.html" %}
+<p>
+  <a href="/static/sqlformat_client_example.py">Download sqlformat_example_client.py</a>
+</p>
+
+{% endblock %}
diff --git a/extras/appengine/templates/index.html b/extras/appengine/templates/index.html
new file mode 100644
index 0000000..57e7ed3
--- /dev/null
+++ b/extras/appengine/templates/index.html
@@ -0,0 +1,107 @@
+{% extends "master.html" %}
+
+{% block main %}
+
+{% if output %}
+  <a href="#output" class="skip">Jump to formatted query</a>
+{% endif %}
+
+<form method="post" action="" id="form_options" enctype="multipart/form-data">
+  <div id="input">
+    {% if form.non_field_errors %}{{form.non_field_errors}}{% endif %}
+    <div>
+      <strong>Type your SQL here:</strong><br />
+      {{form.data}}
+      {% if form.data.errors %}{{form.data.errors}}{% endif %}
+    </div>
+    <div style="margin-top: .5em;">
+      <strong>...or upload a file:</strong>
+      {{form.datafile}}
+    </div>
+    <div id="examples" style="margin-top: .5em;"></div>
+    <div id="actions" style="margin-top: .5em;">
+      <input type="submit" value="Format SQL" id="btn_format" />
+    </div>
+    {% if output %}<a name="output"></a>
+    <div id="response">{{output|safe}}</div>
+    {% else %}
+    <div id="response"></div>
+    {% endif %}
+    <div id="proc_time">
+      {% if proc_time %}Processed in {{proc_time}} seconds.{% endif %}
+    </div>
+    <div style="margin-top: 1em;">
+      <script type="text/javascript">
+	<!--
+	   google_ad_client = "pub-8870624642249726";
+	   /* 468x60, Erstellt 07.03.09 */
+	   google_ad_slot = "9840041509";
+	   google_ad_width = 468;
+	   google_ad_height = 60;
+	//-->
+      </script>
+      <script type="text/javascript"
+	      src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
+      </script>
+    </div>
+
+  </div>
+  <div id="options">
+    <h1 class="skip">Options</h1>
+    <fieldset><legend id="general"><strong>General Options</strong></legend>
+      <div id="general_content" class="content">
+      {{form.remove_comments}}
+      <label for="id_remove_comments">{{form.remove_comments.label}}</label>
+      <br />
+      {{form.highlight}}
+      <label for="id_highlight">{{form.highlight.label}}</label>
+      {% if form.highlight.errors %}
+        <ul class="errors">{{form.highlight.errors</ul>
+      {% endif %}
+      </div>
+    </fieldset>
+    <fieldset><legend id="kwcase">
+	<strong>Keywords &amp; Identifiers</strong></legend>
+        <div>
+	  {{form.keyword_case.label}}: {{form.keyword_case}}
+	</div>
+	<div>
+	  {{form.identifier_case.label}}: {{form.identifier_case}}
+	</div>
+    </fieldset>
+    <fieldset><legend id="indent"><strong>Indentation &amp; Margins</strong>
+	</legend>
+	  <div id="indent_content" class="content">
+	    <label for="id_n_indents">Indentation: </label>
+	    {{form.n_indents}} {{form.n_indents.label}}
+	    <div class="help">Empty field means leave indentation unchanged.</div>
+<!--
+	    <label for="id_right_margin">Right margin: </label>
+	    {{form.right_margin}} {{form.right_margin.label}}
+	    <div class="help">Empty field means don't mind right margin.</div>
+-->
+	  </div>
+    </fieldset>
+    <fieldset><legend id="output"><strong>Output Format</strong></legend>
+      <label for="id_output_format">Language: </label>
+      {{form.output_format}}
+    </fieldset>
+
+    <div class="dev">This software is in development.</div>
+
+  </div>
+  <div class="clearfix"></div>
+</form>
+
+<script language="javascript">
+html = '<strong>...or select an example:</strong> ';
+html = html + '<select onchange="load_example();" id="sel_example">';
+html = html + '<option value="">-- Choose Example --</option>';
+{% for ex in examples %}
+  html = html + '<option value="{{ex}}">{{ex}}</option>';
+{% endfor %}
+html = html + '</select>';
+$('#examples').html(html);
+</script>
+{% endblock %}
+
diff --git a/extras/appengine/templates/master.html b/extras/appengine/templates/master.html
new file mode 100644
index 0000000..4294cf4
--- /dev/null
+++ b/extras/appengine/templates/master.html
@@ -0,0 +1,103 @@
+<html>
+  <head>
+    <title>SQLFormat - Online SQL Formatting Service</title>
+    <meta name="keywords" content="SQL, format, parse, python, beautify" />
+    <link rel="stylesheet" href="/static/pygments.css" />
+    <link rel="stylesheet" href="/static/styles.css" />
+    <script src="http://www.google.com/jsapi"></script>
+    <script>
+      google.load("jquery", "1.2.6");
+    </script>
+    <script src="/static/hotkeys.js"></script>
+    <script type="text/javascript"
+	    src="/static/jquery.textarearesizer.compressed.js"></script>
+    <script src="/static/script.js"></script>
+  </head>
+  <body>
+
+    <div id="help">
+      <p>Keyboard Shortcuts</p>
+      <p>
+	<span class="shortcut">H</span> - Show / hide this help window<br/>
+	<span class="shortcut">Ctrl+F</span> - Format SQL and display result<br/>
+	<span class="shortcut">O</span> - Show / hide options<br/>
+	<span class="shortcut">T</span> - Set focus on SQL input<br/>
+      </p>
+    </div>
+
+    <div id="header">
+      <div id="header-inner">
+	{% if user %}<img src="{{userimg}}" border="0" align="right" style="padding-top:.4em;"/>{% endif %}
+	<h1>
+	  <a href="/">
+	    <span class="q">S<span class="q2">Q</span>L</span>Format
+	  </a>
+	</h1>
+	<div id="slogan">Online SQL formatting service</div>
+	<div id="topmenu">
+	  <a href="/">Home</a>
+	  |
+	  <a href="/about/">About</a>
+	  |
+	  <a href="/source/">Source Code</a>
+	  |
+	  <a href="/api/">API</a>
+<!--
+	  |
+	  {% if user %}
+	  <a href="{{logout_url}}">Sign out</a>
+	  {% else %}
+	  <a href="{{login_url}}">Sign in</a>
+	  <span style="color: #babdb6;">with your Google account
+	  to save preferred settings.</span>
+	  {% endif %}
+-->
+	</div>
+      </div>
+    </div>
+
+    <div id="main">
+      <div id="main-inner">
+	{% block main %}MAIN CONTENT GOES HERE{% endblock %}
+      </div>
+    </div>
+
+    <div id="footer">
+      <div id="footer-inner">
+	<div style="float: left; font-size: .85em;">
+	  <div>&copy; 2009 Andi Albrecht&nbsp;&nbsp;
+	    <code>&lt;albrecht dot andi gmail&gt;</code>
+	  </div>
+	  <div>
+	    <a href="/">Home</a>
+	    |
+	    <a href="/about/">About</a>
+	    |
+	    <a href="/source/">Source Code</a>
+	    |
+	    <a href="/api/">API</a>
+	    |
+	    <a href="http://andialbrecht.wordpress.com/">Blog</a>
+	  </div>
+	</div>
+	<div style="float: right;">
+	  <img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"
+	       alt="Powered by Google App Engine" />
+	</div>
+	<div class="clearfix"></div>
+      </div>
+    </div>
+
+    <script type="text/javascript">
+      var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+      document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+    </script>
+    <script type="text/javascript">
+      try {
+      var pageTracker = _gat._getTracker("UA-3535525-2");
+      pageTracker._trackPageview();
+      } catch(err) {}</script>
+    <script>init();</script>
+
+  </body>
+</html>
diff --git a/extras/appengine/templates/python-client-example.html b/extras/appengine/templates/python-client-example.html
new file mode 100644
index 0000000..68bf820
--- /dev/null
+++ b/extras/appengine/templates/python-client-example.html
@@ -0,0 +1,17 @@
+<div class="highlight example"><pre><span class="c">#!/usr/bin/env python</span>
+
+<span class="k">import</span> <span class="nn">urllib</span>
+<span class="k">import</span> <span class="nn">urllib2</span>
+
+<span class="n">payload</span> <span class="o">=</span> <span class="p">(</span>
+    <span class="p">(</span><span class="s">&#39;data&#39;</span><span class="p">,</span> <span class="s">&#39;select * from foo join bar on val1 = val2 where id = 123;&#39;</span><span class="p">),</span>
+    <span class="p">(</span><span class="s">&#39;format&#39;</span><span class="p">,</span> <span class="s">&#39;text&#39;</span><span class="p">),</span>
+    <span class="p">(</span><span class="s">&#39;keyword_case&#39;</span><span class="p">,</span> <span class="s">&#39;upper&#39;</span><span class="p">),</span>
+    <span class="p">(</span><span class="s">&#39;reindent&#39;</span><span class="p">,</span> <span class="bp">True</span><span class="p">),</span>
+    <span class="p">(</span><span class="s">&#39;n_indents&#39;</span><span class="p">,</span> <span class="mf">2</span><span class="p">),</span>
+    <span class="p">)</span>
+
+<span class="n">response</span> <span class="o">=</span> <span class="n">urllib2</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s">&#39;http://sqlformat.appspot.com/format/&#39;</span><span class="p">,</span>
+                           <span class="n">urllib</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">payload</span><span class="p">))</span>
+<span class="k">print</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+</pre></div>
diff --git a/extras/appengine/templates/source.html b/extras/appengine/templates/source.html
new file mode 100644
index 0000000..4988f2f
--- /dev/null
+++ b/extras/appengine/templates/source.html
@@ -0,0 +1,56 @@
+{% extends "master.html" %}
+
+{% block main %}
+<div id="response">
+  <h1>Source Code</h1>
+
+  <h2>Python Module</h2>
+  <p>
+    The sources for the SQL parser and formatter module are currently
+    hosted on Gitorious.
+    To clone the repository run:
+  <p>
+    <code class="pre">git clone git://github.com/andialbrecht/python-sqlparse.git</code>
+  </p>
+  <p>
+    <a href="http://python-sqlparse.googlecode.com">Visit the project page</a>
+    |
+    <a href="http://gitorious.org/projects/python-sqlparse/repos/mainline/trees/master">Browse the sources online</a>
+  </p>
+  <p>
+    Some relevant parts of the Python module contain code from the
+    <a href="http://pygments.org/">pygments</a> syntax highlighter.
+    The underlying Python module uses a non-validating SQL parser.
+    This approach makes it possible to parse even syntactically incorrect
+    SQL statements.
+  </p>
+
+  <p>
+    Currently the parser module is used by
+    <a href="http://crunchyfrog.googlecode.com/">CrunchyFrog</a> - a
+    database front-end for Gnome.
+  </p>
+
+  <p>
+    The <code>sqlparse</code> module is released under the terms of the
+    <a href="http://www.opensource.org/licenses/bsd-license.php">New BSD License</a>.
+  </p>
+
+  <h2>App Engine Application</h2>
+  <p>
+    The source code for this App Engine application is available in the
+    <code>examples</code> directory of the Python module
+    (but it's really nothing special ;-).
+  </p>
+
+  <h2>Contributing</h2>
+  <p>
+    Please file bug reports and feature requests on the project site at
+    <a href="http://code.google.com/p/python-sqlparse/issues/entry">http://code.google.com/p/python-sqlparse/issues/entry</a>
+    or if you have code to contribute upload it to
+    <a href="http://codereview.appspot.com">http://codereview.appspot.com</a>
+    and add albrecht.andi@googlemail.com as reviewer.
+  </p>
+
+</div>
+{% endblock %}
diff --git a/extras/sqlformat.png b/extras/sqlformat.png
new file mode 100644
index 0000000..4189bc4
--- /dev/null
+++ b/extras/sqlformat.png
diff --git a/extras/sqlformat.svg b/extras/sqlformat.svg
new file mode 100644
index 0000000..59e1183
--- /dev/null
+++ b/extras/sqlformat.svg
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="120"
+   height="60"
+   id="svg2"
+   sodipodi:version="0.32"
+   inkscape:version="0.46"
+   version="1.0"
+   sodipodi:docname="sqlformat.svg"
+   inkscape:output_extension="org.inkscape.output.svg.inkscape">
+  <defs
+     id="defs4">
+    <linearGradient
+       id="linearGradient3163">
+      <stop
+         id="stop3165"
+         offset="0"
+         style="stop-color:#888a85;stop-opacity:1;" />
+      <stop
+         id="stop3167"
+         offset="1"
+         style="stop-color:#565854;stop-opacity:1;" />
+    </linearGradient>
+    <inkscape:perspective
+       sodipodi:type="inkscape:persp3d"
+       inkscape:vp_x="0 : 526.18109 : 1"
+       inkscape:vp_y="0 : 1000 : 0"
+       inkscape:vp_z="744.09448 : 526.18109 : 1"
+       inkscape:persp3d-origin="372.04724 : 350.78739 : 1"
+       id="perspective10" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3163"
+       id="linearGradient3161"
+       x1="57.357143"
+       y1="1.6964277"
+       x2="57.464291"
+       y2="58.125"
+       gradientUnits="userSpaceOnUse" />
+  </defs>
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="5.6"
+     inkscape:cx="58.539628"
+     inkscape:cy="24.488765"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1436"
+     inkscape:window-height="720"
+     inkscape:window-x="209"
+     inkscape:window-y="171" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Ebene 1"
+     inkscape:groupmode="layer"
+     id="layer1">
+    <rect
+       style="fill:url(#linearGradient3161);fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.5103448;fill-opacity:1"
+       id="rect2383"
+       width="118.21429"
+       height="58.035713"
+       x="1.0714285"
+       y="1.0714284"
+       ry="3.5714285" />
+    <text
+       xml:space="preserve"
+       style="font-size:41.68727112px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#f57900;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans"
+       x="3.8212376"
+       y="38.731842"
+       id="text3169"
+       transform="scale(1.3303696,0.7516708)"><tspan
+         sodipodi:role="line"
+         id="tspan3171"
+         x="3.8212376"
+         y="38.731842">S<tspan
+   style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Georgia;-inkscape-font-specification:Georgia Italic"
+   id="tspan3173">Q</tspan>L</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:29.59413719px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#babdb6;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans"
+       x="5.0176215"
+       y="60.359047"
+       id="text3177"
+       transform="scale(1.1410205,0.8764084)"><tspan
+         sodipodi:role="line"
+         id="tspan3179"
+         x="5.0176215"
+         y="60.359047">Format</tspan></text>
+  </g>
+</svg>
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..6ed5178
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This setup script is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+from distutils.core import setup
+
+setup(
+    name='sqlparse',
+    version='0.1.0',
+    py_modules=['sqlparse'],
+    description='Provides simple SQL parsing',
+    author='Andi Albrecht',
+    author_email='albrecht.andi@gmail.com',
+    #long_description=release.long_description,
+    license='BSD',
+    url='http://python-sqlparse.googlecode.com/',
+    classifiers = [
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: BSD License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Topic :: Database',
+        'Topic :: Software Development'
+    ],
+    scripts=['bin/sqlformat'],
+)
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
new file mode 100644
index 0000000..01b3bd8
--- /dev/null
+++ b/sqlparse/__init__.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""Parse SQL statements."""
+
+__version__ = '0.1.0'
+
+import logging
+import os
+
+
+if 'SQLPARSE_DEBUG' in os.environ:
+    logging.basicConfig(level=logging.DEBUG)
+
+
+class SQLParseError(Exception):
+    """Base class for exceptions in this module."""
+
+
+# Setup namespace
+from sqlparse import engine
+from sqlparse import filters
+from sqlparse import formatter
+
+
+def parse(sql):
+    """Parse sql and return a list of statements.
+
+    *sql* is a single string containting one or more SQL statements.
+
+    The returned :class:`~sqlparse.parser.Statement` are fully analyzed.
+
+    Returns a list of :class:`~sqlparse.parser.Statement` instances.
+    """
+    stack = engine.FilterStack()
+    stack.full_analyze()
+    return tuple(stack.run(sql))
+
+
+def format(sql, **options):
+    """Format *sql* according to *options*.
+
+    Returns a list of :class:`~sqlparse.parse.Statement` instances like
+    :meth:`parse`, but the statements are formatted according to *options*.
+
+    Available options are documented in the :mod:`~sqlparse.format` module.
+    """
+    stack = engine.FilterStack()
+    options = formatter.validate_options(options)
+    stack = formatter.build_filter_stack(stack, options)
+    stack.postprocess.append(filters.SerializerUnicode())
+    return ''.join(stack.run(sql))
+
+
+def split(sql):
+    """Split *sql* into separate statements.
+
+    Returns a list of strings.
+    """
+    stack = engine.FilterStack()
+    stack.split_statements = True
+    return [unicode(stmt) for stmt in stack.run(sql)]
+
diff --git a/sqlparse/dialects.py b/sqlparse/dialects.py
new file mode 100644
index 0000000..cabe503
--- /dev/null
+++ b/sqlparse/dialects.py
@@ -0,0 +1,88 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""This module contains classes that represent SQL dialects."""
+
+from tokens import *
+
+
+class Dialect(object):
+    """Base class for SQL dialect implementations."""
+
+    def handle_token(self, tokentype, text):
+        """Handle a token.
+
+        Arguments:
+          tokentype: A token type.
+          text: Text representation of the token.
+
+        Returns:
+          A tuple of three items: tokentype, text, splitlevel.
+          splitlevel is either -1, 0 or 1 and describes an identation level.
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """Reset Dialect state."""
+        pass
+
+
+class DefaultDialect(Dialect):
+
+    def __init__(self):
+        self._in_declare = False
+        self._stmt_type = None
+
+    def get_statement_type(self):
+        return self._stmt_type
+
+    def set_statement_type(self, type_):
+        self._stmt_type = type_
+
+    def handle_token(self, tokentype, text):
+        if not tokentype == Keyword:
+            return tokentype, text, 0
+        unified = text.upper()
+        if unified == 'DECLARE':
+            self._in_declare = True
+            return tokentype, text, 1
+        if unified == 'BEGIN':
+            if self._in_declare:
+                return tokentype, text, 0
+            return tokentype, text, 0
+        if unified == 'END':
+            return tokentype, text, -1
+        # TODO: Use a constant here
+        if unified in ('IF', 'FOR') and self._stmt_type == 6:
+            return tokentype, text, 1
+        return tokentype, text, 0
+
+    def reset(self):
+        self._in_declare = False
+
+
+class PSQLDialect(DefaultDialect):
+
+    def __init__(self):
+        super(PSQLDialect, self).__init__()
+        self._in_dbldollar = False
+
+    def handle_token(self, tokentype, text):
+        if (tokentype == Name.Builtin
+            and text.startswith('$') and text.endswith('$')):
+            if self._in_dbldollar:
+                self._in_dbldollar = False
+                return tokentype, text, -1
+            else:
+                self._in_dbldollar = True
+                return tokentype, text, 1
+        elif self._in_dbldollar:
+            return tokentype, text, 0
+        else:
+            return super(PSQLDialect, self).handle_token(tokentype, text)
+
+    def reset(self):
+        self._dollar_started = False
+        self._in_dbldollar = False
diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py
new file mode 100644
index 0000000..5cac528
--- /dev/null
+++ b/sqlparse/engine/__init__.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""filter"""
+
+import logging
+import re
+
+from sqlparse import lexer, SQLParseError
+from sqlparse.engine import grouping
+from sqlparse.engine.filter import StatementFilter
+
+# XXX remove this when cleanup is complete
+Filter = object
+
+
+class FilterStack(object):
+
+    def __init__(self):
+        self.preprocess = []
+        self.stmtprocess = []
+        self.postprocess = []
+        self.split_statements = False
+        self._grouping = False
+
+    def _flatten(self, stream):
+        for token in stream:
+            if token.is_group():
+                for t in self._flatten(token.tokens):
+                    yield t
+            else:
+                yield token
+
+    def enable_grouping(self):
+        self._grouping = True
+
+    def full_analyze(self):
+        self.enable_grouping()
+
+    def run(self, sql):
+        stream = lexer.tokenize(sql)
+        # Process token stream
+        if self.preprocess:
+           for filter_ in self.preprocess:
+               stream = filter_.process(self, stream)
+
+        if (self.stmtprocess or self.postprocess or self.split_statements
+            or self._grouping):
+            splitter = StatementFilter()
+            stream = splitter.process(self, stream)
+
+        if self._grouping:
+            def _group(stream):
+                for stmt in stream:
+                    grouping.group(stmt)
+                    yield stmt
+            stream = _group(stream)
+
+        if self.stmtprocess:
+            def _run(stream):
+                ret = []
+                for stmt in stream:
+                    for filter_ in self.stmtprocess:
+                        filter_.process(self, stmt)
+                    ret.append(stmt)
+                return ret
+            stream = _run(stream)
+
+        if self.postprocess:
+            def _run(stream):
+                for stmt in stream:
+                    stmt.tokens = list(self._flatten(stmt.tokens))
+                    for filter_ in self.postprocess:
+                        stmt = filter_.process(self, stmt)
+                    yield stmt
+            stream = _run(stream)
+
+        return stream
+
diff --git a/sqlparse/engine/_grouping.py b/sqlparse/engine/_grouping.py
new file mode 100644
index 0000000..512c590
--- /dev/null
+++ b/sqlparse/engine/_grouping.py
@@ -0,0 +1,499 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine.filter import TokenFilter
+from sqlparse import tokens as T
+
+class _Base(object):
+
+    __slots__ = ('to_unicode', 'to_str', '_get_repr_name')
+
+    def __unicode__(self):
+        return 'Unkown _Base object'
+
+    def __str__(self):
+        return unicode(self).encode('latin-1')
+
+    def __repr__(self):
+        raw = unicode(self)
+        if len(raw) > 7:
+            short = raw[:6]+u'...'
+        else:
+            short = raw
+        short = re.sub('\s+', ' ', short)
+        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+                                          short, id(self))
+
+    def _get_repr_name(self):
+        return self.__class__.__name__
+
+    def to_unicode(self):
+        return unicode(self)
+
+    def to_str(self):
+        return str(self)
+
+
+class Token(_Base):
+
+    __slots__ = ('value', 'ttype')
+
+    def __init__(self, ttype, value):
+        self.value = value
+        self.ttype = ttype
+
+    def __unicode__(self):
+        return self.value
+
+    def _get_repr_name(self):
+        return str(self.ttype).split('.')[-1]
+
+    def match(self, ttype, values):
+        if self.ttype is not ttype:
+            return False
+        if isinstance(values, basestring):
+            values = [values]
+        if self.ttype is T.Keyword:
+            return self.value.upper() in [v.upper() for v in values]
+        else:
+            return self.value in values
+
+    def is_group(self):
+        return False
+
+    def is_whitespace(self):
+        return self.ttype and self.ttype is T.Whitespace
+
+
+class _Group(Token):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def __init__(self, tokens=None):
+        super(_Group, self).__init__(None, None)
+        if tokens is None:
+            tokens = []
+        self._tokens = tokens
+
+    def _set_tokens(self, tokens):
+        self._tokens = tokens
+    def _get_tokens(self):
+        if type(self._tokens) is not types.TupleType:
+            self._tokens = tuple(self._tokens)
+        return self._tokens
+    tokens = property(fget=_get_tokens, fset=_set_tokens)
+
+    def _get_repr_name(self):
+        return self.__class__.__name__
+
+    def _pprint_tree(self, depth=0):
+        """Pretty-print the object tree."""
+        indent = ' '*(depth*2)
+        for token in self.tokens:
+            print '%s%r' % (indent, token)
+            if token.is_group():
+                token._pprint_tree(depth+1)
+
+    def __unicode__(self):
+        return u''.join(unicode(t) for t in self.tokens)
+
+    @property
+    def subgroups(self):
+        #return [x for x in self.tokens if isinstance(x, _Group)]
+        for item in self.tokens:
+            if item.is_group():
+                yield item
+
+    def is_group(self):
+        return True
+
+
+class Statement(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Parenthesis(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Where(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+
+class CommentMulti(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Identifier(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+
+class TypeCast(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+    @property
+    def casted_object(self):
+        return self.tokens[0]
+
+    @property
+    def casted_type(self):
+        return self.tokens[-1]
+
+
+class Alias(_Group):
+    __slots__ = ('value', 'ttype', '_tokens')
+
+    @property
+    def aliased_object(self):
+        return self.tokens[0]
+
+    @property
+    def alias(self):
+        return self.tokens[-1]
+
+
+
+
+# - Filter
+
+class StatementFilter(TokenFilter):
+
+    def __init__(self):
+        self._in_declare = False
+        self._in_dbldollar = False
+        self._is_create = False
+
+    def _reset(self):
+        self._in_declare = False
+        self._in_dbldollar = False
+        self._is_create = False
+
+    def _change_splitlevel(self, ttype, value):
+        # PostgreSQL
+        if (ttype == T.Name.Builtin
+            and value.startswith('$') and value.endswith('$')):
+            if self._in_dbldollar:
+                self._in_dbldollar = False
+                return -1
+            else:
+                self._in_dbldollar = True
+                return 1
+        elif self._in_dbldollar:
+            return 0
+
+        # ANSI
+        if ttype is not T.Keyword:
+            return 0
+
+        unified = value.upper()
+
+        if unified == 'DECLARE':
+            self._in_declare = True
+            return 1
+
+        if unified == 'BEGIN':
+            if self._in_declare:
+                return 0
+            return 0
+
+        if unified == 'END':
+            return -1
+
+        if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+            self._is_create = True
+
+        if unified in ('IF', 'FOR') and self._is_create:
+            return 1
+
+        # Default
+        return 0
+
+    def process(self, stack, stream):
+        splitlevel = 0
+        stmt = None
+        consume_ws = False
+        stmt_tokens = []
+        for ttype, value in stream:
+            # Before appending the token
+            if (consume_ws and ttype is not T.Whitespace
+                and ttype is not T.Comment.Single):
+                consume_ws = False
+                stmt.tokens = stmt_tokens
+                yield stmt
+                self._reset()
+                stmt = None
+                splitlevel = 0
+            if stmt is None:
+                stmt = Statement()
+                stmt_tokens = []
+            splitlevel += self._change_splitlevel(ttype, value)
+            # Append the token
+            stmt_tokens.append(Token(ttype, value))
+            # After appending the token
+            if (not splitlevel and ttype is T.Punctuation
+                and value == ';'):
+                consume_ws = True
+        if stmt is not None:
+            stmt.tokens = stmt_tokens
+            yield stmt
+
+
+class GroupFilter(object):
+
+    def process(self, stream):
+        pass
+
+
+class GroupParenthesis(GroupFilter):
+    """Group parenthesis groups."""
+
+    def _finish_group(self, group):
+        start = group[0]
+        end = group[-1]
+        tokens = list(self._process(group[1:-1]))
+        return [start]+tokens+[end]
+
+    def _process(self, stream):
+        group = None
+        depth = 0
+        for token in stream:
+            if token.is_group():
+                token.tokens = self._process(token.tokens)
+            if token.match(T.Punctuation, '('):
+                if depth == 0:
+                    group = []
+                depth += 1
+            if group is not None:
+                group.append(token)
+            if token.match(T.Punctuation, ')'):
+                depth -= 1
+                if depth == 0:
+                    yield Parenthesis(self._finish_group(group))
+                    group = None
+                    continue
+            if group is None:
+                yield token
+
+    def process(self, group):
+        if not isinstance(group, Parenthesis):
+            group.tokens = self._process(group.tokens)
+
+
+class GroupWhere(GroupFilter):
+
+    def _process(self, stream):
+        group = None
+        depth = 0
+        for token in stream:
+            if token.is_group():
+                token.tokens = self._process(token.tokens)
+            if token.match(T.Keyword, 'WHERE'):
+                if depth == 0:
+                    group = []
+                depth += 1
+            # Process conditions here? E.g. "A =|!=|in|is|... B"...
+            elif (token.ttype is T.Keyword
+                  and token.value.upper() in ('ORDER', 'GROUP',
+                                              'LIMIT', 'UNION')):
+                depth -= 1
+                if depth == 0:
+                    yield Where(group)
+                    group = None
+                if depth < 0:
+                    depth = 0
+            if group is not None:
+                group.append(token)
+            else:
+                yield token
+        if group is not None:
+            yield Where(group)
+
+    def process(self, group):
+        if not isinstance(group, Where):
+            group.tokens = self._process(group.tokens)
+
+
+class GroupMultiComments(GroupFilter):
+    """Groups Comment.Multiline and adds trailing whitespace up to first lb."""
+
+    def _process(self, stream):
+        new_tokens = []
+        grp = None
+        consume_ws = False
+        for token in stream:
+            if token.is_group():
+                token.tokens = self._process(token.tokens)
+            if token.ttype is T.Comment.Multiline:
+                if grp is None:
+                    grp = []
+                    consume_ws = True
+                grp.append(token)
+            elif consume_ws and token.ttype is not T.Whitespace:
+                yield CommentMulti(grp)
+                grp = None
+                consume_ws = False
+                yield token
+            elif consume_ws:
+                lines = token.value.splitlines(True)
+                grp.append(Token(T.Whitespace, lines[0]))
+                if lines[0].endswith('\n'):
+                    yield CommentMulti(grp)
+                    grp = None
+                    consume_ws = False
+                    if lines[1:]:
+                        yield Token(T.Whitespace, ''.join(lines[1:]))
+            else:
+                yield token
+
+    def process(self, group):
+        if not isinstance(group, CommentMulti):
+            group.tokens = self._process(group.tokens)
+
+
+## class GroupIdentifier(GroupFilter):
+
+##     def _process(self, stream):
+##         buff = []
+##         expect_dot = False
+##         for token in stream:
+##             if token.is_group():
+##                 token.tokens = self._process(token.tokens)
+##             if (token.ttype is T.String.Symbol or token.ttype is T.Name
+##                 and not expect_dot):
+##                 buff.append(token)
+##                 expect_dot = True
+##             elif expect_dot and token.match(T.Punctuation, '.'):
+##                 buff.append(token)
+##                 expect_dot = False
+##             else:
+##                 if expect_dot == False:
+##                     # something's wrong, it ends with a dot...
+##                     while buff:
+##                         yield buff.pop(0)
+##                     expect_dot = False
+##                 elif buff:
+##                     idt = Identifier()
+##                     idt.tokens = buff
+##                     yield idt
+##                     buff = []
+##                 yield token
+##         if buff and expect_dot:
+##             idt = Identifier()
+##             idt.tokens = buff
+##             yield idt
+##             buff = []
+##         while buff:
+##             yield buff.pop(0)
+
+##     def process(self, group):
+##         if not isinstance(group, Identifier):
+##             group.tokens = self._process(group.tokens)
+
+
+class AddTypeCastFilter(GroupFilter):
+
+    def _process(self, stream):
+        buff = []
+        expect_colon = False
+        has_colons = False
+        for token in stream:
+            if token.is_group():
+                token.tokens = self._process(token.tokens)
+            if ((isinstance(token, Parenthesis)
+                 or isinstance(token, Identifier))
+                and not expect_colon):
+                buff.append(token)
+                expect_colon = True
+            elif expect_colon and token.match(T.Punctuation, ':'):
+                buff.append(token)
+                has_colons = True
+            elif (expect_colon
+                  and (token.ttype in T.Name
+                       or isinstance(token, Identifier))
+                  ):
+                if not has_colons:
+                    while buff:
+                        yield buff.pop(0)
+                    yield token
+                else:
+                    buff.append(token)
+                    grp = TypeCast()
+                    grp.tokens = buff
+                    buff = []
+                    yield grp
+                expect_colons = has_colons = False
+            else:
+                while buff:
+                    yield buff.pop(0)
+                yield token
+        while buff:
+            yield buff.pop(0)
+
+    def process(self, group):
+        if not isinstance(group, TypeCast):
+            group.tokens = self._process(group.tokens)
+
+
+class AddAliasFilter(GroupFilter):
+
+    def _process(self, stream):
+        buff = []
+        search_alias = False
+        lazy = False
+        for token in stream:
+            if token.is_group():
+                token.tokens = self._process(token.tokens)
+            if search_alias and (isinstance(token, Identifier)
+                                 or token.ttype in (T.Name,
+                                                    T.String.Symbol)
+                                 or (lazy and not token.is_whitespace())):
+                buff.append(token)
+                search_alias = lazy = False
+                grp = Alias()
+                grp.tokens = buff
+                buff = []
+                yield grp
+            elif (isinstance(token, (Identifier, TypeCast))
+                or token.ttype in (T.Name, T.String.Symbol)):
+                buff.append(token)
+                search_alias = True
+            elif search_alias and (token.is_whitespace()
+                                   or token.match(T.Keyword, 'as')):
+                buff.append(token)
+                if token.match(T.Keyword, 'as'):
+                    lazy = True
+            else:
+                while buff:
+                    yield buff.pop(0)
+                yield token
+                search_alias = False
+        while buff:
+            yield buff.pop(0)
+
+    def process(self, group):
+        if not isinstance(group, Alias):
+            group.tokens = self._process(group.tokens)
+
+
+GROUP_FILTER = (GroupParenthesis(),
+                GroupMultiComments(),
+                GroupWhere(),
+                GroupIdentifier(),
+                AddTypeCastFilter(),
+                AddAliasFilter(),
+                )
+
+import types
+def group_tokens(group):
+    def _materialize(g):
+        if type(g.tokens) is not types.TupleType:
+            g.tokens = tuple(g.tokens)
+        for sg in g.subgroups:
+            _materialize(sg)
+    for groupfilter in GROUP_FILTER:
+        groupfilter.process(group)
+#    _materialize(group)
+#    group.tokens = tuple(group.tokens)
+#    for subgroup in group.subgroups:
+#        group_tokens(subgroup)
diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py
new file mode 100644
index 0000000..146690c
--- /dev/null
+++ b/sqlparse/engine/filter.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+from sqlparse import tokens as T
+from sqlparse.engine.grouping import Statement, Token
+
+
+class TokenFilter(object):
+
+    def __init__(self, **options):
+        self.options = options
+
+    def process(self, stack, stream):
+        """Process token stream."""
+        raise NotImplementedError
+
+
+class StatementFilter(TokenFilter):
+
+    def __init__(self):
+        self._in_declare = False
+        self._in_dbldollar = False
+        self._is_create = False
+
+    def _reset(self):
+        self._in_declare = False
+        self._in_dbldollar = False
+        self._is_create = False
+
+    def _change_splitlevel(self, ttype, value):
+        # PostgreSQL
+        if (ttype == T.Name.Builtin
+            and value.startswith('$') and value.endswith('$')):
+            if self._in_dbldollar:
+                self._in_dbldollar = False
+                return -1
+            else:
+                self._in_dbldollar = True
+                return 1
+        elif self._in_dbldollar:
+            return 0
+
+        # ANSI
+        if ttype is not T.Keyword:
+            return 0
+
+        unified = value.upper()
+
+        if unified == 'DECLARE':
+            self._in_declare = True
+            return 1
+
+        if unified == 'BEGIN':
+            if self._in_declare:
+                return 0
+            return 0
+
+        if unified == 'END':
+            # Should this respect a preceeding BEGIN?
+            # In CASE ... WHEN ... END this results in a split level -1.
+            return -1
+
+        if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+            self._is_create = True
+
+        if unified in ('IF', 'FOR') and self._is_create:
+            return 1
+
+        # Default
+        return 0
+
+    def process(self, stack, stream):
+        splitlevel = 0
+        stmt = None
+        consume_ws = False
+        stmt_tokens = []
+        for ttype, value in stream:
+            # Before appending the token
+            if (consume_ws and ttype is not T.Whitespace
+                and ttype is not T.Comment.Single):
+                consume_ws = False
+                stmt.tokens = stmt_tokens
+                yield stmt
+                self._reset()
+                stmt = None
+                splitlevel = 0
+            if stmt is None:
+                stmt = Statement()
+                stmt_tokens = []
+            splitlevel += self._change_splitlevel(ttype, value)
+            # Append the token
+            stmt_tokens.append(Token(ttype, value))
+            # After appending the token
+            if (splitlevel <= 0 and ttype is T.Punctuation
+                and value == ';'):
+                consume_ws = True
+        if stmt is not None:
+            stmt.tokens = stmt_tokens
+            yield stmt
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
new file mode 100644
index 0000000..433f539
--- /dev/null
+++ b/sqlparse/engine/grouping.py
@@ -0,0 +1,537 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+import re
+import types
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+
+    __slots__ = ('value', 'ttype')
+
+    def __init__(self, ttype, value):
+        self.value = value
+        self.ttype = ttype
+
+    def __str__(self):
+        return unicode(self).encode('latin-1')
+
+    def __repr__(self):
+        short = self._get_repr_value()
+        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+                                          short, id(self))
+
+    def __unicode__(self):
+        return self.value
+
+    def to_unicode(self):
+        return unicode(self)
+
+    def _get_repr_name(self):
+        return str(self.ttype).split('.')[-1]
+
+    def _get_repr_value(self):
+        raw = unicode(self)
+        if len(raw) > 7:
+            short = raw[:6]+u'...'
+        else:
+            short = raw
+        return re.sub('\s+', ' ', short)
+
+    def match(self, ttype, values, regex=False):
+        if self.ttype is not ttype:
+            return False
+        if values is None:
+            return self.ttype is ttype
+        if isinstance(values, basestring):
+            values = [values]
+        if regex:
+            if self.ttype is T.Keyword:
+                values = [re.compile(v, re.IGNORECASE) for v in values]
+            else:
+                values = [re.compile(v) for v in values]
+            for pattern in values:
+                if pattern.search(self.value):
+                    return True
+            return False
+        else:
+            if self.ttype is T.Keyword:
+                return self.value.upper() in [v.upper() for v in values]
+            else:
+                return self.value in values
+
+    def is_group(self):
+        return False
+
+    def is_whitespace(self):
+        return self.ttype and self.ttype in T.Whitespace
+
+
+class TokenList(Token):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def __init__(self, tokens=None):
+        if tokens is None:
+            tokens = []
+        self.tokens = tokens
+        Token.__init__(self, None, None)
+
+    def __unicode__(self):
+        return ''.join(unicode(x) for x in self.flatten())
+
+    def __str__(self):
+        return unicode(self).encode('latin-1')
+
+    def _get_repr_name(self):
+        return self.__class__.__name__
+
+    def _pprint_tree(self, max_depth=None, depth=0):
+        """Pretty-print the object tree."""
+        indent = ' '*(depth*2)
+        for token in self.tokens:
+            if token.is_group():
+                pre = ' | '
+            else:
+                pre = ' | '
+            print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(),
+                                     token._get_repr_value())
+            if (token.is_group() and max_depth is not None
+                and depth < max_depth):
+                token._pprint_tree(max_depth, depth+1)
+
+    def flatten(self):
+        for token in self.tokens:
+            if isinstance(token, TokenList):
+                for item in token.flatten():
+                    yield item
+            else:
+                yield token
+
+    def is_group(self):
+        return True
+
+    def get_sublists(self):
+        return [x for x in self.tokens if isinstance(x, TokenList)]
+
+    def token_first(self, ignore_whitespace=True):
+        for token in self.tokens:
+            if ignore_whitespace and token.is_whitespace():
+                continue
+            return token
+        return None
+
+    def token_next_by_instance(self, idx, clss):
+        if type(clss) not in (types.ListType, types.TupleType):
+            clss = (clss,)
+        if type(clss) is not types.TupleType:
+            clss = tuple(clss)
+        for token in self.tokens[idx:]:
+            if isinstance(token, clss):
+                return token
+        return None
+
+    def token_next_by_type(self, idx, ttypes):
+        if not isinstance(ttypes, (types.TupleType, types.ListType)):
+            ttypes = [ttypes]
+        for token in self.tokens[idx:]:
+            if token.ttype in ttypes:
+                return token
+        return None
+
+    def token_next_match(self, idx, ttype, value, regex=False):
+        if type(idx) != types.IntType:
+            idx = self.token_index(idx)
+        for token in self.tokens[idx:]:
+            if token.match(ttype, value, regex):
+                return token
+        return None
+
+    def token_not_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            passed = False
+            for func in funcs:
+                if func(token):
+                   passed = True
+                   break
+            if not passed:
+                return token
+        return None
+
+    def token_prev(self, idx, skip_ws=True):
+        while idx != 0:
+            idx -= 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_next(self, idx, skip_ws=True):
+        while idx < len(self.tokens)-1:
+            idx += 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_index(self, token):
+        """Return list index of token."""
+        return self.tokens.index(token)
+
+    def tokens_between(self, start, end, exclude_end=False):
+        """Return all tokens between (and including) start and end."""
+        if exclude_end:
+            offset = 0
+        else:
+            offset = 1
+        return self.tokens[self.token_index(start):self.token_index(end)+offset]
+
+    def group_tokens(self, grp_cls, tokens):
+        """Replace tokens by instance of grp_cls."""
+        idx = self.token_index(tokens[0])
+        for t in tokens:
+            self.tokens.remove(t)
+        grp = grp_cls(tokens)
+        self.tokens.insert(idx, grp)
+        return grp
+
+    def insert_before(self, where, token):
+        self.tokens.insert(self.token_index(where), token)
+
+
+class Statement(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_type(self):
+        first_token = self.token_first()
+        if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return first_token.value.upper()
+        else:
+            return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def has_alias(self):
+        return self.get_alias() is not None
+
+    def get_alias(self):
+        kw = self.token_next_match(0, T.Keyword, 'AS')
+        if kw is not None:
+            alias = self.token_next(self.token_index(kw))
+            if alias is None:
+                return None
+        else:
+            next_ = self.token_next(0)
+            if next_ is None or not isinstance(next_, Identifier):
+                return None
+            alias = next_
+        if isinstance(alias, Identifier):
+            return alias.get_name()
+        else:
+            return alias.to_unicode()
+
+    def get_name(self):
+        alias = self.get_alias()
+        if alias is not None:
+            return alias
+        return self.get_real_name()
+
+    def get_real_name(self):
+        return self.token_next_by_type(0, T.Name).value
+
+    def get_typecast(self):
+        marker = self.token_next_match(0, T.Punctuation, '::')
+        if marker is None:
+            return None
+        next_ = self.token_next(self.token_index(marker), False)
+        if next_ is None:
+            return None
+        return next_.to_unicode()
+
+
+class IdentifierList(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_identifiers(self):
+        return [x for x in self.tokens if isinstance(x, Identifier)]
+
+
+class Parenthesis(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Assignment(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class If(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class For(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Comparsion(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Comment(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Where(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_cases(self):
+        """Returns a list of 2-tuples (condition, value).
+
+        If an ELSE exists condition is None.
+        """
+        ret = []
+        in_condition = in_value = False
+        for token in self.tokens:
+            if token.match(T.Keyword, 'WHEN'):
+                ret.append(([], []))
+                in_condition = True
+                in_value = False
+            elif token.match(T.Keyword, 'ELSE'):
+                ret.append((None, []))
+                in_condition = False
+                in_value = True
+            elif token.match(T.Keyword, 'THEN'):
+                in_condition = False
+                in_value = True
+            elif token.match(T.Keyword, 'END'):
+                in_condition = False
+                in_value = False
+            if in_condition:
+                ret[-1][0].append(token)
+            elif in_value:
+                ret[-1][1].append(token)
+        return ret
+
+def _group_left_right(tlist, ttype, value, cls,
+                      check_right=lambda t: True,
+                      include_semicolon=False):
+#    [_group_left_right(sgroup, ttype, value, cls, check_right,
+#                       include_semicolon) for sgroup in tlist.get_sublists()
+#     if not isinstance(sgroup, cls)]
+    idx = 0
+    token = tlist.token_next_match(idx, ttype, value)
+    while token:
+        right = tlist.token_next(tlist.token_index(token))
+        left = tlist.token_prev(tlist.token_index(token))
+        if (right is None or not check_right(right)
+            or left is None):
+            token = tlist.token_next_match(tlist.token_index(token)+1,
+                                           ttype, value)
+        else:
+            if include_semicolon:
+                right = tlist.token_next_match(tlist.token_index(right),
+                                               T.Punctuation, ';')
+            tokens = tlist.tokens_between(left, right)[1:]
+            if not isinstance(left, cls):
+                new = cls([left])
+                new_idx = tlist.token_index(left)
+                tlist.tokens.remove(left)
+                tlist.tokens.insert(new_idx, new)
+                left = new
+            left.tokens.extend(tokens)
+            for t in tokens:
+                tlist.tokens.remove(t)
+            token = tlist.token_next_match(tlist.token_index(left)+1,
+                                           ttype, value)
+
+def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
+                    cls, include_semicolon=False, recurse=False):
+    def _find_matching(i, tl, stt, sva, ett, eva):
+        depth = 1
+        for t in tl.tokens[i:]:
+            if t.match(stt, sva):
+                depth += 1
+            elif t.match(ett, eva):
+                depth -= 1
+                if depth == 1:
+                    return t
+        return None
+    [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
+                     cls, include_semicolon) for sgroup in tlist.get_sublists()
+     if recurse]
+    if isinstance(tlist, cls):
+        idx = 1
+    else:
+        idx = 0
+    token = tlist.token_next_match(idx, start_ttype, start_value)
+    while token:
+        tidx = tlist.token_index(token)
+        end = _find_matching(tidx, tlist, start_ttype, start_value,
+                             end_ttype, end_value)
+        if end is None:
+            idx = tidx+1
+        else:
+            if include_semicolon:
+                next_ = tlist.token_next(tlist.token_index(end))
+                if next_ and next_.match(T.Punctuation, ';'):
+                    end = next_
+            group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
+            _group_matching(group, start_ttype, start_value,
+                            end_ttype, end_value, cls, include_semicolon)
+            idx = tlist.token_index(group)+1
+        token = tlist.token_next_match(idx, start_ttype, start_value)
+
+def group_if(tlist):
+    _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True)
+
+def group_for(tlist):
+    _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True)
+
+def group_as(tlist):
+    _group_left_right(tlist, T.Keyword, 'AS', Identifier)
+
+def group_assignment(tlist):
+    _group_left_right(tlist, T.Assignment, ':=', Assignment,
+                      include_semicolon=True)
+
+def group_comparsion(tlist):
+    _group_left_right(tlist, T.Operator, None, Comparsion)
+
+
+def group_case(tlist):
+    _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, True)
+
+
+def group_identifier(tlist):
+    def _consume_cycle(tl, i):
+        x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'),
+                             lambda y: y.ttype in (T.String.Symbol, T.Name)))
+        for t in tl.tokens[i:]:
+            if x.next()(t):
+                yield t
+            else:
+                raise StopIteration
+
+    # bottom up approach: group subgroups first
+    [group_identifier(sgroup) for sgroup in tlist.get_sublists()
+     if not isinstance(sgroup, Identifier)]
+
+    # real processing
+    idx = 0
+    token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+    while token:
+        identifier_tokens = [token]+list(
+            _consume_cycle(tlist,
+                           tlist.token_index(token)+1))
+        group = tlist.group_tokens(Identifier, identifier_tokens)
+        idx = tlist.token_index(group)+1
+        token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+
+
+def group_identifier_list(tlist):
+    [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
+     if not isinstance(sgroup, IdentifierList)]
+    idx = 0
+    token = tlist.token_next_by_instance(idx, Identifier)
+    while token:
+        tidx = tlist.token_index(token)
+        end = tlist.token_not_matching(tidx+1,
+                                       [lambda t: isinstance(t, Identifier),
+                                        lambda t: t.is_whitespace(),
+                                        lambda t: t.match(T.Punctuation,
+                                                          ',')
+                                        ])
+        if end is None:
+            idx = tidx + 1
+        else:
+            grp_tokens = tlist.tokens_between(token, end, exclude_end=True)
+            while grp_tokens and (grp_tokens[-1].is_whitespace()
+                                  or grp_tokens[-1].match(T.Punctuation, ',')):
+                grp_tokens.pop()
+            if len(grp_tokens) <= 1:
+                idx = tidx + 1
+            else:
+                group = tlist.group_tokens(IdentifierList, grp_tokens)
+                idx = tlist.token_index(group)
+        token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_parenthesis(tlist):
+    _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis)
+
+def group_comments(tlist):
+    [group_comments(sgroup) for sgroup in tlist.get_sublists()
+     if not isinstance(sgroup, Comment)]
+    idx = 0
+    token = tlist.token_next_by_type(idx, T.Comment)
+    while token:
+        tidx = tlist.token_index(token)
+        end = tlist.token_not_matching(tidx+1,
+                                       [lambda t: t.ttype in T.Comment,
+                                        lambda t: t.is_whitespace()])
+        if end is None:
+            idx = tidx + 1
+        else:
+            eidx = tlist.token_index(end)
+            grp_tokens = tlist.tokens_between(token,
+                                              tlist.token_prev(eidx, False))
+            group = tlist.group_tokens(Comment, grp_tokens)
+            idx = tlist.token_index(group)
+        token = tlist.token_next_by_type(idx, T.Comment)
+
+def group_where(tlist):
+    [group_where(sgroup) for sgroup in tlist.get_sublists()
+     if not isinstance(sgroup, Where)]
+    idx = 0
+    token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+    stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION')
+    while token:
+        tidx = tlist.token_index(token)
+        end = tlist.token_next_match(tidx+1, T.Keyword, stopwords)
+        if end is None:
+            end = tlist.tokens[-1]
+        else:
+            end = tlist.tokens[tlist.token_index(end)-1]
+        group = tlist.group_tokens(Where, tlist.tokens_between(token, end))
+        idx = tlist.token_index(group)
+        token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+
+def group_aliased(tlist):
+    [group_aliased(sgroup) for sgroup in tlist.get_sublists()
+     if not isinstance(sgroup, Identifier)]
+    idx = 0
+    token = tlist.token_next_by_instance(idx, Identifier)
+    while token:
+        next_ = tlist.token_next(tlist.token_index(token))
+        if next_ is not None and isinstance(next_, Identifier):
+            grp = tlist.tokens_between(token, next_)[1:]
+            token.tokens.extend(grp)
+            for t in grp:
+                tlist.tokens.remove(t)
+        idx = tlist.token_index(token)+1
+        token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_typecasts(tlist):
+    _group_left_right(tlist, T.Punctuation, '::', Identifier)
+
+
+def group(tlist):
+    for func in [group_parenthesis,
+                 group_comments,
+                 group_where,
+                 group_case,
+                 group_identifier,
+                 group_typecasts,
+                 group_as,
+                 group_aliased,
+                 group_assignment,
+                 group_comparsion,
+                 group_identifier_list,
+                 group_if,
+                 group_for,]:
+        func(tlist)
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
new file mode 100644
index 0000000..695b298
--- /dev/null
+++ b/sqlparse/filters.py
@@ -0,0 +1,432 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine import grouping
+from sqlparse import tokens as T
+
+
+class Filter(object):
+
+    def process(self, *args):
+        raise NotImplementedError
+
+
+class TokenFilter(Filter):
+
+    def process(self, stack, stream):
+        raise NotImplementedError
+
+
+# FIXME: Should be removed
+def rstrip(stream):
+    buff = []
+    for token in stream:
+        if token.is_whitespace() and '\n' in token.value:
+            # assuming there's only one \n in value
+            before, rest = token.value.split('\n', 1)
+            token.value = '\n%s' % rest
+            buff = []
+            yield token
+        elif token.is_whitespace():
+            buff.append(token)
+        elif token.is_group():
+            token.tokens = list(rstrip(token.tokens))
+            # process group and look if it starts with a nl
+            if token.tokens and token.tokens[0].is_whitespace():
+                before, rest = token.tokens[0].value.split('\n', 1)
+                token.tokens[0].value = '\n%s' % rest
+                buff = []
+            while buff:
+                yield buff.pop(0)
+            yield token
+        else:
+            while buff:
+                yield buff.pop(0)
+            yield token
+
+
+# --------------------------
+# token process
+
+class _CaseFilter(TokenFilter):
+
+    ttype = None
+
+    def __init__(self, case=None):
+        if case is None:
+            case = 'upper'
+        assert case in ['lower', 'upper', 'capitalize']
+        self.convert = getattr(unicode, case)
+
+    def process(self, stack, stream):
+        for ttype, value in stream:
+            if ttype in self.ttype:
+                value = self.convert(value)
+            yield ttype, value
+
+
+class KeywordCaseFilter(_CaseFilter):
+    ttype = T.Keyword
+
+
+class IdentifierCaseFilter(_CaseFilter):
+    ttype = (T.Name, T.String.Symbol)
+
+
+# ----------------------
+# statement process
+
+class StripCommentsFilter(Filter):
+
+    def _process(self, tlist):
+        idx = 0
+        clss = set([x.__class__ for x in tlist.tokens])
+        while grouping.Comment in clss:
+            token = tlist.token_next_by_instance(0, grouping.Comment)
+            tidx = tlist.token_index(token)
+            prev = tlist.token_prev(tidx, False)
+            next_ = tlist.token_next(tidx, False)
+            # Replace by whitespace if prev and next exist and if they're not
+            # whitespaces. This doesn't apply if prev or next is a paranthesis.
+            if (prev is not None and next_ is not None
+                and not prev.is_whitespace() and not next_.is_whitespace()
+                and not (prev.match(T.Punctuation, '(')
+                         or next_.match(T.Punctuation, ')'))):
+                tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ')
+            else:
+                tlist.tokens.pop(tidx)
+            clss = set([x.__class__ for x in tlist.tokens])
+
+    def process(self, stack, stmt):
+        [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+        self._process(stmt)
+
+
+class StripWhitespaceFilter(Filter):
+
+    def _stripws(self, tlist):
+        func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
+        func = getattr(self, func_name, self._stripws_default)
+        func(tlist)
+
+    def _stripws_default(self, tlist):
+        last_was_ws = False
+        for token in tlist.tokens:
+            if token.is_whitespace():
+                if last_was_ws:
+                    token.value = ''
+                else:
+                    token.value = ' '
+            last_was_ws = token.is_whitespace()
+
+    def _stripws_parenthesis(self, tlist):
+        if tlist.tokens[1].is_whitespace():
+            tlist.tokens.pop(1)
+        if tlist.tokens[-2].is_whitespace():
+            tlist.tokens.pop(-2)
+        self._stripws_default(tlist)
+
+    def process(self, stack, stmt):
+        [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+        self._stripws(stmt)
+        if stmt.tokens[-1].is_whitespace():
+            stmt.tokens.pop(-1)
+
+
+class ReindentFilter(Filter):
+
+    def __init__(self, width=2, char=' ', line_width=None):
+        self.width = width
+        self.char = char
+        self.indent = 0
+        self.offset = 0
+        self.line_width = line_width
+        self._curr_stmt = None
+        self._last_stmt = None
+
+    def _get_offset(self, token):
+        all_ = list(self._curr_stmt.flatten())
+        idx = all_.index(token)
+        raw = ''.join(unicode(x) for x in all_[:idx+1])
+        line = raw.splitlines()[-1]
+        # Now take current offset into account and return relative offset.
+        full_offset = len(line)-(len(self.char*(self.width*self.indent)))
+        return full_offset - self.offset
+
+    def nl(self):
+        # TODO: newline character should be configurable
+        ws = '\n'+(self.char*((self.indent*self.width)+self.offset))
+        return grouping.Token(T.Whitespace, ws)
+
+    def _split_kwds(self, tlist):
+        split_words = ('FROM', 'JOIN$', 'AND', 'OR',
+                       'GROUP', 'ORDER', 'UNION', 'VALUES')
+        idx = 0
+        token = tlist.token_next_match(idx, T.Keyword, split_words,
+                                       regex=True)
+        while token:
+            prev = tlist.token_prev(tlist.token_index(token), False)
+            offset = 1
+            if prev and prev.is_whitespace():
+                tlist.tokens.pop(tlist.token_index(prev))
+                offset += 1
+            nl = self.nl()
+            tlist.insert_before(token, nl)
+            token = tlist.token_next_match(tlist.token_index(nl)+offset,
+                                           T.Keyword, split_words, regex=True)
+
+    def _split_statements(self, tlist):
+        idx = 0
+        token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+        while token:
+            prev = tlist.token_prev(tlist.token_index(token), False)
+            if prev and prev.is_whitespace():
+                tlist.tokens.pop(tlist.token_index(prev))
+            # only break if it's not the first token
+            if prev:
+                nl = self.nl()
+                tlist.insert_before(token, nl)
+            token = tlist.token_next_by_type(tlist.token_index(token)+1,
+                                             (T.Keyword.DDL, T.Keyword.DML))
+
+    def _process(self, tlist):
+        func_name = '_process_%s' % tlist.__class__.__name__.lower()
+        func = getattr(self, func_name, self._process_default)
+        func(tlist)
+
+    def _process_where(self, tlist):
+        token = tlist.token_next_match(0, T.Keyword, 'WHERE')
+        tlist.insert_before(token, self.nl())
+        self.indent += 1
+        self._process_default(tlist)
+        self.indent -= 1
+
+    def _process_parenthesis(self, tlist):
+        first = tlist.token_next(0)
+        indented = False
+        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            self.indent += 1
+            tlist.tokens.insert(0, self.nl())
+            indented = True
+        num_offset = self._get_offset(tlist.token_next_match(0,
+                                                        T.Punctuation, '('))
+        self.offset += num_offset
+        self._process_default(tlist, stmts=not indented)
+        if indented:
+            self.indent -= 1
+        self.offset -= num_offset
+
+    def _process_identifierlist(self, tlist):
+        identifiers = tlist.get_identifiers()
+        if len(identifiers) > 1:
+            first = list(identifiers[0].flatten())[0]
+            num_offset = self._get_offset(first)-len(first.value)
+            self.offset += num_offset
+            for token in identifiers[1:]:
+                tlist.insert_before(token, self.nl())
+            self.offset -= num_offset
+        self._process_default(tlist)
+
+    def _process_case(self, tlist):
+        cases = tlist.get_cases()
+        is_first = True
+        num_offset = None
+        case = tlist.tokens[0]
+        outer_offset = self._get_offset(case)-len(case.value)
+        self.offset += outer_offset
+        for cond, value in tlist.get_cases():
+            if is_first:
+                is_first = False
+                num_offset = self._get_offset(cond[0])-len(cond[0].value)
+                self.offset += num_offset
+                continue
+            if cond is None:
+                token = value[0]
+            else:
+                token = cond[0]
+            tlist.insert_before(token, self.nl())
+        # Line breaks on group level are done. Now let's add an offset of
+        # 5 (=length of "when", "then", "else") and process subgroups.
+        self.offset += 5
+        self._process_default(tlist)
+        self.offset -= 5
+        if num_offset is not None:
+            self.offset -= num_offset
+        end = tlist.token_next_match(0, T.Keyword, 'END')
+        tlist.insert_before(end, self.nl())
+        self.offset -= outer_offset
+
+    def _process_default(self, tlist, stmts=True, kwds=True):
+        if stmts:
+            self._split_statements(tlist)
+        if kwds:
+            self._split_kwds(tlist)
+        [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+    def process(self, stack, stmt):
+        if isinstance(stmt, grouping.Statement):
+            self._curr_stmt = stmt
+        self._process(stmt)
+        if isinstance(stmt, grouping.Statement):
+            if self._last_stmt is not None:
+                if self._last_stmt.to_unicode().endswith('\n'):
+                    nl = '\n'
+                else:
+                    nl = '\n\n'
+                stmt.tokens.insert(0,
+                    grouping.Token(T.Whitespace, nl))
+            if self._last_stmt != stmt:
+                self._last_stmt = stmt
+
+
+# FIXME: Doesn't work ;)
+class RightMarginFilter(Filter):
+
+    keep_together = (
+#        grouping.TypeCast, grouping.Identifier, grouping.Alias,
+    )
+
+    def __init__(self, width=79):
+        self.width = width
+        self.line = ''
+
+    def _process(self, stack, group, stream):
+        for token in stream:
+            if token.is_whitespace() and '\n' in token.value:
+                if token.value.endswith('\n'):
+                    self.line = ''
+                else:
+                    self.line = token.value.splitlines()[-1]
+            elif (token.is_group()
+                  and not token.__class__ in self.keep_together):
+                token.tokens = self._process(stack, token, token.tokens)
+            else:
+                val = token.to_unicode()
+                if len(self.line) + len(val) > self.width:
+                    match = re.search('^ +', self.line)
+                    if match is not None:
+                        indent = match.group()
+                    else:
+                        indent = ''
+                    yield grouping.Token(T.Whitespace, '\n%s' % indent)
+                    self.line = indent
+                self.line += val
+            yield token
+
+    def process(self, stack, group):
+        return
+        group.tokens = self._process(stack, group, group.tokens)
+
+
+# ---------------------------
+# postprocess
+
+class SerializerUnicode(Filter):
+
+    def process(self, stack, stmt):
+        raw = stmt.to_unicode()
+        add_nl = raw.endswith('\n')
+        res = '\n'.join(line.rstrip() for line in raw.splitlines())
+        if add_nl:
+            res += '\n'
+        return res
+
+
+class OutputPythonFilter(Filter):
+
+    def __init__(self, varname='sql'):
+        self.varname = varname
+        self.cnt = 0
+
+    def _process(self, stream, varname, count, has_nl):
+        if count > 1:
+            yield grouping.Token(T.Whitespace, '\n')
+        yield grouping.Token(T.Name, varname)
+        yield grouping.Token(T.Whitespace, ' ')
+        yield grouping.Token(T.Operator, '=')
+        yield grouping.Token(T.Whitespace, ' ')
+        if has_nl:
+            yield grouping.Token(T.Operator, '(')
+        yield grouping.Token(T.Text, "'")
+        cnt = 0
+        for token in stream:
+            cnt += 1
+            if token.is_whitespace() and '\n' in token.value:
+                if cnt == 1:
+                    continue
+                after_lb = token.value.split('\n', 1)[1]
+                yield grouping.Token(T.Text, "'")
+                yield grouping.Token(T.Whitespace, '\n')
+                for i in range(len(varname)+4):
+                    yield grouping.Token(T.Whitespace, ' ')
+                yield grouping.Token(T.Text, "'")
+                if after_lb:  # it's the indendation
+                    yield grouping.Token(T.Whitespace, after_lb)
+                continue
+            elif token.value and "'" in token.value:
+                token.value = token.value.replace("'", "\\'")
+            yield grouping.Token(T.Text, token.value or '')
+        yield grouping.Token(T.Text, "'")
+        if has_nl:
+            yield grouping.Token(T.Operator, ')')
+
+    def process(self, stack, stmt):
+        self.cnt += 1
+        if self.cnt > 1:
+            varname = '%s%d' % (self.varname, self.cnt)
+        else:
+            varname = self.varname
+        has_nl = len(stmt.to_unicode().strip().splitlines()) > 1
+        stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl)
+        return stmt
+
+
+class OutputPHPFilter(Filter):
+
+    def __init__(self, varname='sql'):
+        self.varname = '$%s' % varname
+        self.count = 0
+
+    def _process(self, stream, varname):
+        if self.count > 1:
+            yield grouping.Token(T.Whitespace, '\n')
+        yield grouping.Token(T.Name, varname)
+        yield grouping.Token(T.Whitespace, ' ')
+        yield grouping.Token(T.Operator, '=')
+        yield grouping.Token(T.Whitespace, ' ')
+        yield grouping.Token(T.Text, '"')
+        cnt = 0
+        for token in stream:
+            if token.is_whitespace() and '\n' in token.value:
+                cnt += 1
+                if cnt == 1:
+                    continue
+                after_lb = token.value.split('\n', 1)[1]
+                yield grouping.Token(T.Text, '"')
+                yield grouping.Token(T.Operator, ';')
+                yield grouping.Token(T.Whitespace, '\n')
+                yield grouping.Token(T.Name, varname)
+                yield grouping.Token(T.Whitespace, ' ')
+                yield grouping.Token(T.Punctuation, '.')
+                yield grouping.Token(T.Operator, '=')
+                yield grouping.Token(T.Whitespace, ' ')
+                yield grouping.Token(T.Text, '"')
+                if after_lb:
+                    yield grouping.Token(T.Text, after_lb)
+                continue
+            elif '"' in token.value:
+                token.value = token.value.replace('"', '\\"')
+            yield grouping.Token(T.Text, token.value)
+        yield grouping.Token(T.Text, '"')
+        yield grouping.Token(T.Punctuation, ';')
+
+    def process(self, stack, stmt):
+        self.count += 1
+        if self.count > 1:
+            varname = '%s%d' % (self.varname, self.count)
+        else:
+            varname = self.varname
+        stmt.tokens = tuple(self._process(stmt.tokens, varname))
+        return stmt
+
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
new file mode 100644
index 0000000..9d443ca
--- /dev/null
+++ b/sqlparse/formatter.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL formatter"""
+
+import logging
+
+from sqlparse import SQLParseError
+from sqlparse import filters
+
+
+def validate_options(options):
+    """Validates options."""
+    kwcase = options.get('keyword_case', None)
+    if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+
+    idcase = options.get('identifier_case', None)
+    if idcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+
+    ofrmt = options.get('output_format', None)
+    if ofrmt not in [None, 'sql', 'python', 'php']:
+        raise SQLParseError('Unknown output format: %r' % ofrmt)
+
+    strip_comments = options.get('strip_comments', False)
+    if strip_comments not in [True, False]:
+        raise SQLParseError('Invalid value for strip_comments: %r'
+                            % strip_comments)
+
+    strip_ws = options.get('strip_whitespace', False)
+    if strip_ws not in [True, False]:
+        raise SQLParseError('Invalid value for strip_whitespace: %r'
+                            % strip_ws)
+
+    reindent = options.get('reindent', False)
+    if reindent not in [True, False]:
+        raise SQLParseError('Invalid value for reindent: %r'
+                            % reindent)
+    elif reindent:
+        options['strip_whitespace'] = True
+    indent_tabs = options.get('indent_tabs', False)
+    if indent_tabs not in [True, False]:
+        raise SQLParserError('Invalid value for indent_tabs: %r' % indent_tabs)
+    elif indent_tabs:
+        options['indent_char'] = '\t'
+    else:
+        options['indent_char'] = ' '
+    indent_width = options.get('indent_width', 2)
+    try:
+        indent_width = int(indent_width)
+    except (TypeError, ValueError):
+        raise SQLParseError('indent_width requires an integer')
+    if indent_width < 1:
+        raise SQLParseError('indent_width requires an positive integer')
+    options['indent_width'] = indent_width
+
+    right_margin = options.get('right_margin', None)
+    if right_margin is not None:
+        try:
+            right_margin = int(right_margin)
+        except (TypeError, ValueError):
+            raise SQLParseError('right_margin requires an integer')
+        if right_margin < 10:
+            raise SQLParseError('right_margin requires an integer > 10')
+    options['right_margin'] = right_margin
+
+    return options
+
+
+def build_filter_stack(stack, options):
+    """Setup and return a filter stack.
+
+    Args:
+      stack: :class:`~sqlparse.filters.FilterStack` instance
+      options: Dictionary with options validated by validate_options.
+    """
+    # Token filter
+    if 'keyword_case' in options:
+        stack.preprocess.append(
+            filters.KeywordCaseFilter(options['keyword_case']))
+
+    if 'identifier_case' in options:
+        stack.preprocess.append(
+            filters.IdentifierCaseFilter(options['identifier_case']))
+
+    # After grouping
+    if options.get('strip_comments', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripCommentsFilter())
+
+    if (options.get('strip_whitespace', False)
+        or options.get('reindent', False)):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+    if options.get('reindent', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.ReindentFilter(char=options['indent_char'],
+                                   width=options['indent_width']))
+
+    if options.get('right_margin', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.RightMarginFilter(width=options['right_margin']))
+
+    # Serializer
+    if options.get('output_format'):
+        frmt = options['output_format']
+        if frmt.lower() == 'php':
+            fltr = filters.OutputPHPFilter()
+        elif frmt.lower() == 'python':
+            fltr = filters.OutputPythonFilter()
+        else:
+            fltr = None
+        if fltr is not None:
+            stack.postprocess.append(fltr)
+
+    return stack
+
+
+def format(statement, **options):
+    import filters
+    logging.info('OPTIONS %r', options)
+    lexer = Lexer()
+#    lexer.add_filter('whitespace')
+    lexer.add_filter(filters.GroupFilter())
+    if options.get('reindent', False):
+        lexer.add_filter(filters.StripWhitespaceFilter())
+        lexer.add_filter(filters.IndentFilter(
+            n_indents=options.get('n_indents', 2)))
+    if options.get('ltrim', False):
+        lexer.add_filter(filters.LTrimFilter())
+    keyword_case = options.get('keyword_case', None)
+    if keyword_case is not None:
+        assert keyword_case in ('lower', 'upper', 'capitalize')
+        lexer.add_filter(filters.KeywordCaseFilter(case=keyword_case))
+    identifier_case = options.get('identifier_case', None)
+    if identifier_case is not None:
+        assert identifier_case in ('lower', 'upper', 'capitalize')
+        lexer.add_filter(filters.IdentifierCaseFilter(case=identifier_case))
+    if options.get('strip_comments', False):
+        lexer.add_filter(filters.StripCommentsFilter())
+    right_margin = options.get('right_margin', None)
+    if right_margin is not None:
+        right_margin = int(right_margin)
+        assert right_margin > 0
+        lexer.add_filter(filters.RightMarginFilter(margin=right_margin))
+    lexer.add_filter(filters.UngroupFilter())
+    if options.get('output_format', None):
+        ofrmt = options['output_format']
+        assert ofrmt in ('sql', 'python', 'php')
+        if ofrmt == 'python':
+            lexer.add_filter(filters.OutputPythonFilter())
+        elif ofrmt == 'php':
+            lexer.add_filter(filters.OutputPHPFilter())
+    tokens = []
+    for ttype, value in lexer.get_tokens(unicode(statement)):
+        tokens.append((ttype, value))
+    return statement.__class__(tokens)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
new file mode 100644
index 0000000..3f0632e
--- /dev/null
+++ b/sqlparse/keywords.py
@@ -0,0 +1,589 @@
+from sqlparse.tokens import *
+
+KEYWORDS = {
+    'ABORT': Keyword,
+    'ABS': Keyword,
+    'ABSOLUTE': Keyword,
+    'ACCESS': Keyword,
+    'ADA': Keyword,
+    'ADD': Keyword,
+    'ADMIN': Keyword,
+    'AFTER': Keyword,
+    'AGGREGATE': Keyword,
+    'ALIAS': Keyword,
+    'ALL': Keyword,
+    'ALLOCATE': Keyword,
+    'ANALYSE': Keyword,
+    'ANALYZE': Keyword,
+    'AND': Keyword,
+    'ANY': Keyword,
+    'ARE': Keyword,
+    'AS': Keyword,
+    'ASC': Keyword,
+    'ASENSITIVE': Keyword,
+    'ASSERTION': Keyword,
+    'ASSIGNMENT': Keyword,
+    'ASYMMETRIC': Keyword,
+    'AT': Keyword,
+    'ATOMIC': Keyword,
+    'AUTHORIZATION': Keyword,
+    'AVG': Keyword,
+
+    'BACKWARD': Keyword,
+    'BEFORE': Keyword,
+    'BEGIN': Keyword,
+    'BETWEEN': Keyword,
+    'BITVAR': Keyword,
+    'BIT_LENGTH': Keyword,
+    'BOTH': Keyword,
+    'BREADTH': Keyword,
+    'BY': Keyword,
+
+    'C': Keyword,
+    'CACHE': Keyword,
+    'CALL': Keyword,
+    'CALLED': Keyword,
+    'CARDINALITY': Keyword,
+    'CASCADE': Keyword,
+    'CASCADED': Keyword,
+    'CASE': Keyword,
+    'CAST': Keyword,
+    'CATALOG': Keyword,
+    'CATALOG_NAME': Keyword,
+    'CHAIN': Keyword,
+    'CHARACTERISTICS': Keyword,
+    'CHARACTER_LENGTH': Keyword,
+    'CHARACTER_SET_CATALOG': Keyword,
+    'CHARACTER_SET_NAME': Keyword,
+    'CHARACTER_SET_SCHEMA': Keyword,
+    'CHAR_LENGTH': Keyword,
+    'CHECK': Keyword,
+    'CHECKED': Keyword,
+    'CHECKPOINT': Keyword,
+    'CLASS': Keyword,
+    'CLASS_ORIGIN': Keyword,
+    'CLOB': Keyword,
+    'CLOSE': Keyword,
+    'CLUSTER': Keyword,
+    'COALSECE': Keyword,
+    'COBOL': Keyword,
+    'COLLATE': Keyword,
+    'COLLATION': Keyword,
+    'COLLATION_CATALOG': Keyword,
+    'COLLATION_NAME': Keyword,
+    'COLLATION_SCHEMA': Keyword,
+    'COLUMN': Keyword,
+    'COLUMN_NAME': Keyword,
+    'COMMAND_FUNCTION': Keyword,
+    'COMMAND_FUNCTION_CODE': Keyword,
+    'COMMENT': Keyword,
+    'COMMIT': Keyword,
+    'COMMITTED': Keyword,
+    'COMPLETION': Keyword,
+    'CONDITION_NUMBER': Keyword,
+    'CONNECT': Keyword,
+    'CONNECTION': Keyword,
+    'CONNECTION_NAME': Keyword,
+    'CONSTRAINT': Keyword,
+    'CONSTRAINTS': Keyword,
+    'CONSTRAINT_CATALOG': Keyword,
+    'CONSTRAINT_NAME': Keyword,
+    'CONSTRAINT_SCHEMA': Keyword,
+    'CONSTRUCTOR': Keyword,
+    'CONTAINS': Keyword,
+    'CONTINUE': Keyword,
+    'CONVERSION': Keyword,
+    'CONVERT': Keyword,
+    'COPY': Keyword,
+    'CORRESPONTING': Keyword,
+    'COUNT': Keyword,
+    'CREATEDB': Keyword,
+    'CREATEUSER': Keyword,
+    'CROSS': Keyword,
+    'CUBE': Keyword,
+    'CURRENT': Keyword,
+    'CURRENT_DATE': Keyword,
+    'CURRENT_PATH': Keyword,
+    'CURRENT_ROLE': Keyword,
+    'CURRENT_TIME': Keyword,
+    'CURRENT_TIMESTAMP': Keyword,
+    'CURRENT_USER': Keyword,
+    'CURSOR': Keyword,
+    'CURSOR_NAME': Keyword,
+    'CYCLE': Keyword,
+
+    'DATA': Keyword,
+    'DATABASE': Keyword,
+    'DATETIME_INTERVAL_CODE': Keyword,
+    'DATETIME_INTERVAL_PRECISION': Keyword,
+    'DAY': Keyword,
+    'DEALLOCATE': Keyword,
+    'DECLARE': Keyword,
+    'DEFAULT': Keyword,
+    'DEFAULTS': Keyword,
+    'DEFERRABLE': Keyword,
+    'DEFERRED': Keyword,
+    'DEFINED': Keyword,
+    'DEFINER': Keyword,
+    'DELIMITER': Keyword,
+    'DELIMITERS': Keyword,
+    'DEREF': Keyword,
+    'DESC': Keyword,
+    'DESCRIBE': Keyword,
+    'DESCRIPTOR': Keyword,
+    'DESTROY': Keyword,
+    'DESTRUCTOR': Keyword,
+    'DETERMINISTIC': Keyword,
+    'DIAGNOSTICS': Keyword,
+    'DICTIONARY': Keyword,
+    'DISCONNECT': Keyword,
+    'DISPATCH': Keyword,
+    'DISTINCT': Keyword,
+    'DO': Keyword,
+    'DOMAIN': Keyword,
+    'DYNAMIC': Keyword,
+    'DYNAMIC_FUNCTION': Keyword,
+    'DYNAMIC_FUNCTION_CODE': Keyword,
+
+    'EACH': Keyword,
+    'ELSE': Keyword,
+    'ENCODING': Keyword,
+    'ENCRYPTED': Keyword,
+    'END': Keyword,
+    'END-EXEC': Keyword,
+    'EQUALS': Keyword,
+    'ESCAPE': Keyword,
+    'EVERY': Keyword,
+    'EXCEPT': Keyword,
+    'ESCEPTION': Keyword,
+    'EXCLUDING': Keyword,
+    'EXCLUSIVE': Keyword,
+    'EXEC': Keyword,
+    'EXECUTE': Keyword,
+    'EXISTING': Keyword,
+    'EXISTS': Keyword,
+    'EXTERNAL': Keyword,
+    'EXTRACT': Keyword,
+
+    'FALSE': Keyword,
+    'FETCH': Keyword,
+    'FINAL': Keyword,
+    'FIRST': Keyword,
+    'FOR': Keyword,
+    'FORCE': Keyword,
+    'FOREIGN': Keyword,
+    'FORTRAN': Keyword,
+    'FORWARD': Keyword,
+    'FOUND': Keyword,
+    'FREE': Keyword,
+    'FREEZE': Keyword,
+    'FROM': Keyword,
+    'FULL': Keyword,
+    'FUNCTION': Keyword,
+
+    'G': Keyword,
+    'GENERAL': Keyword,
+    'GENERATED': Keyword,
+    'GET': Keyword,
+    'GLOBAL': Keyword,
+    'GO': Keyword,
+    'GOTO': Keyword,
+    'GRANT': Keyword,
+    'GRANTED': Keyword,
+    'GROUP': Keyword,
+    'GROUPING': Keyword,
+
+    'HANDLER': Keyword,
+    'HAVING': Keyword,
+    'HIERARCHY': Keyword,
+    'HOLD': Keyword,
+    'HOST': Keyword,
+
+    'IDENTITY': Keyword,
+    'IF': Keyword,
+    'IGNORE': Keyword,
+    'ILIKE': Keyword,
+    'IMMEDIATE': Keyword,
+    'IMMUTABLE': Keyword,
+
+    'IMPLEMENTATION': Keyword,
+    'IMPLICIT': Keyword,
+    'IN': Keyword,
+    'INCLUDING': Keyword,
+    'INCREMENT': Keyword,
+    'INDEX': Keyword,
+
+    'INDITCATOR': Keyword,
+    'INFIX': Keyword,
+    'INHERITS': Keyword,
+    'INITIALIZE': Keyword,
+    'INITIALLY': Keyword,
+    'INNER': Keyword,
+    'INOUT': Keyword,
+    'INPUT': Keyword,
+    'INSENSITIVE': Keyword,
+    'INSTANTIABLE': Keyword,
+    'INSTEAD': Keyword,
+    'INTERSECT': Keyword,
+    'INTO': Keyword,
+    'INVOKER': Keyword,
+    'IS': Keyword,
+    'ISNULL': Keyword,
+    'ISOLATION': Keyword,
+    'ITERATE': Keyword,
+
+    'JOIN': Keyword,
+
+    'K': Keyword,
+    'KEY': Keyword,
+    'KEY_MEMBER': Keyword,
+    'KEY_TYPE': Keyword,
+
+    'LANCOMPILER': Keyword,
+    'LANGUAGE': Keyword,
+    'LARGE': Keyword,
+    'LAST': Keyword,
+    'LATERAL': Keyword,
+    'LEADING': Keyword,
+    'LEFT': Keyword,
+    'LENGTH': Keyword,
+    'LESS': Keyword,
+    'LEVEL': Keyword,
+    'LIKE': Keyword,
+    'LILMIT': Keyword,
+    'LISTEN': Keyword,
+    'LOAD': Keyword,
+    'LOCAL': Keyword,
+    'LOCALTIME': Keyword,
+    'LOCALTIMESTAMP': Keyword,
+    'LOCATION': Keyword,
+    'LOCATOR': Keyword,
+    'LOCK': Keyword,
+    'LOWER': Keyword,
+
+    'M': Keyword,
+    'MAP': Keyword,
+    'MATCH': Keyword,
+    'MAX': Keyword,
+    'MAXVALUE': Keyword,
+    'MESSAGE_LENGTH': Keyword,
+    'MESSAGE_OCTET_LENGTH': Keyword,
+    'MESSAGE_TEXT': Keyword,
+    'METHOD': Keyword,
+    'MIN': Keyword,
+    'MINUTE': Keyword,
+    'MINVALUE': Keyword,
+    'MOD': Keyword,
+    'MODE': Keyword,
+    'MODIFIES': Keyword,
+    'MODIFY': Keyword,
+    'MONTH': Keyword,
+    'MORE': Keyword,
+    'MOVE': Keyword,
+    'MUMPS': Keyword,
+
+    'NAMES': Keyword,
+    'NATIONAL': Keyword,
+    'NATURAL': Keyword,
+    'NCHAR': Keyword,
+    'NCLOB': Keyword,
+    'NEW': Keyword,
+    'NEXT': Keyword,
+    'NO': Keyword,
+    'NOCREATEDB': Keyword,
+    'NOCREATEUSER': Keyword,
+    'NONE': Keyword,
+    'NOT': Keyword,
+    'NOTHING': Keyword,
+    'NOTIFY': Keyword,
+    'NOTNULL': Keyword,
+    'NULL': Keyword,
+    'NULLABLE': Keyword,
+    'NULLIF': Keyword,
+
+    'OBJECT': Keyword,
+    'OCTET_LENGTH': Keyword,
+    'OF': Keyword,
+    'OFF': Keyword,
+    'OFFSET': Keyword,
+    'OIDS': Keyword,
+    'OLD': Keyword,
+    'ON': Keyword,
+    'ONLY': Keyword,
+    'OPEN': Keyword,
+    'OPERATION': Keyword,
+    'OPERATOR': Keyword,
+    'OPTION': Keyword,
+    'OPTIONS': Keyword,
+    'OR': Keyword,
+    'ORDER': Keyword,
+    'ORDINALITY': Keyword,
+    'OUT': Keyword,
+    'OUTER': Keyword,
+    'OUTPUT': Keyword,
+    'OVERLAPS': Keyword,
+    'OVERLAY': Keyword,
+    'OVERRIDING': Keyword,
+    'OWNER': Keyword,
+
+    'PAD': Keyword,
+    'PARAMETER': Keyword,
+    'PARAMETERS': Keyword,
+    'PARAMETER_MODE': Keyword,
+    'PARAMATER_NAME': Keyword,
+    'PARAMATER_ORDINAL_POSITION': Keyword,
+    'PARAMETER_SPECIFIC_CATALOG': Keyword,
+    'PARAMETER_SPECIFIC_NAME': Keyword,
+    'PARAMATER_SPECIFIC_SCHEMA': Keyword,
+    'PARTIAL': Keyword,
+    'PASCAL': Keyword,
+    'PENDANT': Keyword,
+    'PLACING': Keyword,
+    'PLI': Keyword,
+    'POSITION': Keyword,
+    'POSTFIX': Keyword,
+    'PRECISION': Keyword,
+    'PREFIX': Keyword,
+    'PREORDER': Keyword,
+    'PREPARE': Keyword,
+    'PRESERVE': Keyword,
+    'PRIMARY': Keyword,
+    'PRIOR': Keyword,
+    'PRIVILEGES': Keyword,
+    'PROCEDURAL': Keyword,
+    'PROCEDURE': Keyword,
+    'PUBLIC': Keyword,
+
+    'RAISE': Keyword,
+    'READ': Keyword,
+    'READS': Keyword,
+    'RECHECK': Keyword,
+    'RECURSIVE': Keyword,
+    'REF': Keyword,
+    'REFERENCES': Keyword,
+    'REFERENCING': Keyword,
+    'REINDEX': Keyword,
+    'RELATIVE': Keyword,
+    'RENAME': Keyword,
+    'REPEATABLE': Keyword,
+    'REPLACE': Keyword,
+    'RESET': Keyword,
+    'RESTART': Keyword,
+    'RESTRICT': Keyword,
+    'RESULT': Keyword,
+    'RETURN': Keyword,
+    'RETURNED_LENGTH': Keyword,
+    'RETURNED_OCTET_LENGTH': Keyword,
+    'RETURNED_SQLSTATE': Keyword,
+    'RETURNS': Keyword,
+    'REVOKE': Keyword,
+    'RIGHT': Keyword,
+    'ROLE': Keyword,
+    'ROLLBACK': Keyword,
+    'ROLLUP': Keyword,
+    'ROUTINE': Keyword,
+    'ROUTINE_CATALOG': Keyword,
+    'ROUTINE_NAME': Keyword,
+    'ROUTINE_SCHEMA': Keyword,
+    'ROW': Keyword,
+    'ROWS': Keyword,
+    'ROW_COUNT': Keyword,
+    'RULE': Keyword,
+
+    'SAVE_POINT': Keyword,
+    'SCALE': Keyword,
+    'SCHEMA': Keyword,
+    'SCHEMA_NAME': Keyword,
+    'SCOPE': Keyword,
+    'SCROLL': Keyword,
+    'SEARCH': Keyword,
+    'SECOND': Keyword,
+    'SECURITY': Keyword,
+    'SELF': Keyword,
+    'SENSITIVE': Keyword,
+    'SERIALIZABLE': Keyword,
+    'SERVER_NAME': Keyword,
+    'SESSION': Keyword,
+    'SESSION_USER': Keyword,
+    'SETOF': Keyword,
+    'SETS': Keyword,
+    'SHARE': Keyword,
+    'SHOW': Keyword,
+    'SIMILAR': Keyword,
+    'SIMPLE': Keyword,
+    'SIZE': Keyword,
+    'SOME': Keyword,
+    'SOURCE': Keyword,
+    'SPACE': Keyword,
+    'SPECIFIC': Keyword,
+    'SPECIFICTYPE': Keyword,
+    'SPECIFIC_NAME': Keyword,
+    'SQL': Keyword,
+    'SQLCODE': Keyword,
+    'SQLERROR': Keyword,
+    'SQLEXCEPTION': Keyword,
+    'SQLSTATE': Keyword,
+    'SQLWARNINIG': Keyword,
+    'STABLE': Keyword,
+    'START': Keyword,
+    'STATE': Keyword,
+    'STATEMENT': Keyword,
+    'STATIC': Keyword,
+    'STATISTICS': Keyword,
+    'STDIN': Keyword,
+    'STDOUT': Keyword,
+    'STORAGE': Keyword,
+    'STRICT': Keyword,
+    'STRUCTURE': Keyword,
+    'STYPE': Keyword,
+    'SUBCLASS_ORIGIN': Keyword,
+    'SUBLIST': Keyword,
+    'SUBSTRING': Keyword,
+    'SUM': Keyword,
+    'SYMMETRIC': Keyword,
+    'SYSID': Keyword,
+    'SYSTEM': Keyword,
+    'SYSTEM_USER': Keyword,
+
+    'TABLE': Keyword,
+    'TABLE_NAME': Keyword,
+    ' TEMP': Keyword,
+    'TEMPLATE': Keyword,
+    'TEMPORARY': Keyword,
+    'TERMINATE': Keyword,
+    'THAN': Keyword,
+    'THEN': Keyword,
+    'TIMESTAMP': Keyword,
+    'TIMEZONE_HOUR': Keyword,
+    'TIMEZONE_MINUTE': Keyword,
+    'TO': Keyword,
+    'TOAST': Keyword,
+    'TRAILING': Keyword,
+    'TRANSATION': Keyword,
+    'TRANSACTIONS_COMMITTED': Keyword,
+    'TRANSACTIONS_ROLLED_BACK': Keyword,
+    'TRANSATION_ACTIVE': Keyword,
+    'TRANSFORM': Keyword,
+    'TRANSFORMS': Keyword,
+    'TRANSLATE': Keyword,
+    'TRANSLATION': Keyword,
+    'TREAT': Keyword,
+    'TRIGGER': Keyword,
+    'TRIGGER_CATALOG': Keyword,
+    'TRIGGER_NAME': Keyword,
+    'TRIGGER_SCHEMA': Keyword,
+    'TRIM': Keyword,
+    'TRUE': Keyword,
+    'TRUNCATE': Keyword,
+    'TRUSTED': Keyword,
+    'TYPE': Keyword,
+
+    'UNCOMMITTED': Keyword,
+    'UNDER': Keyword,
+    'UNENCRYPTED': Keyword,
+    'UNION': Keyword,
+    'UNIQUE': Keyword,
+    'UNKNOWN': Keyword,
+    'UNLISTEN': Keyword,
+    'UNNAMED': Keyword,
+    'UNNEST': Keyword,
+    'UNTIL': Keyword,
+    'UPPER': Keyword,
+    'USAGE': Keyword,
+    'USER': Keyword,
+    'USER_DEFINED_TYPE_CATALOG': Keyword,
+    'USER_DEFINED_TYPE_NAME': Keyword,
+    'USER_DEFINED_TYPE_SCHEMA': Keyword,
+    'USING': Keyword,
+
+    'VACUUM': Keyword,
+    'VALID': Keyword,
+    'VALIDATOR': Keyword,
+    'VALUES': Keyword,
+    'VARIABLE': Keyword,
+    'VERBOSE': Keyword,
+    'VERSION': Keyword,
+    'VIEW': Keyword,
+    'VOLATILE': Keyword,
+
+    'WHEN': Keyword,
+    'WHENEVER': Keyword,
+    'WHERE': Keyword,
+    'WITH': Keyword,
+    'WITHOUT': Keyword,
+    'WORK': Keyword,
+    'WRITE': Keyword,
+
+    'YEAR': Keyword,
+
+    'ZONE': Keyword,
+
+
+    'ARRAY': Name.Builtin,
+    'BIGINT': Name.Builtin,
+    'BINARY': Name.Builtin,
+    'BIT': Name.Builtin,
+    'BLOB': Name.Builtin,
+    'BOOLEAN': Name.Builtin,
+    'CHAR': Name.Builtin,
+    'CHARACTER': Name.Builtin,
+    'DATE': Name.Builtin,
+    'DEC': Name.Builtin,
+    'DECIMAL': Name.Builtin,
+    'FLOAT': Name.Builtin,
+    'INT': Name.Builtin,
+    'INTEGER': Name.Builtin,
+    'INTERVAL': Name.Builtin,
+    'NUMBER': Name.Builtin,
+    'NUMERIC': Name.Builtin,
+    'REAL': Name.Builtin,
+    'SERIAL': Name.Builtin,
+    'SMALLINT': Name.Builtin,
+    'VARCHAR': Name.Builtin,
+    'VARYING': Name.Builtin,
+    'INT8': Name.Builtin,
+    'SERIAL8': Name.Builtin,
+    'TEXT': Name.Builtin,
+    }
+
+
+KEYWORDS_COMMON = {
+    'SELECT': Keyword.DML,
+    'INSERT': Keyword.DML,
+    'DELETE': Keyword.DML,
+    'UPDATE': Keyword.DML,
+    'DROP': Keyword.DDL,
+    'CREATE': Keyword.DDL,
+    'ALTER': Keyword.DDL,
+
+    'WHERE': Keyword,
+    'FROM': Keyword,
+    'INNER': Keyword,
+    'JOIN': Keyword,
+    'AND': Keyword,
+    'OR': Keyword,
+    'LIKE': Keyword,
+    'ON': Keyword,
+    'IN': Keyword,
+
+    'BY': Keyword,
+    'GROUP': Keyword,
+    'ORDER': Keyword,
+    'LEFT': Keyword,
+    'OUTER': Keyword,
+
+    'IF': Keyword,
+    'END': Keyword,
+    'THEN': Keyword,
+    'LOOP': Keyword,
+    'AS': Keyword,
+    'ELSE': Keyword,
+    'FOR': Keyword,
+
+    'CASE': Keyword,
+    'WHEN': Keyword,
+    'MIN': Keyword,
+    'MAX': Keyword,
+    'DISTINCT': Keyword,
+
+    }
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
new file mode 100644
index 0000000..b635fc6
--- /dev/null
+++ b/sqlparse/lexer.py
@@ -0,0 +1,310 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL Lexer"""
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
+import re
+
+from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from sqlparse.tokens import *
+from sqlparse.tokens import _TokenType
+
+
+class include(str):
+    pass
+
+class combined(tuple):
+    """Indicates a state combined from multiple states."""
+
+    def __new__(cls, *args):
+        return tuple.__new__(cls, args)
+
+    def __init__(self, *args):
+        # tuple.__init__ doesn't do anything
+        pass
+
+def is_keyword(value):
+    test = value.upper()
+    return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value
+
+
+def apply_filters(stream, filters, lexer=None):
+    """
+    Use this method to apply an iterable of filters to
+    a stream. If lexer is given it's forwarded to the
+    filter, otherwise the filter receives `None`.
+    """
+    def _apply(filter_, stream):
+        for token in filter_.filter(lexer, stream):
+            yield token
+    for filter_ in filters:
+        stream = _apply(filter_, stream)
+    return stream
+
+
+class LexerMeta(type):
+    """
+    Metaclass for Lexer, creates the self._tokens attribute from
+    self.tokens on the first instantiation.
+    """
+
+    def _process_state(cls, unprocessed, processed, state):
+        assert type(state) is str, "wrong state name %r" % state
+        assert state[0] != '#', "invalid state name %r" % state
+        if state in processed:
+            return processed[state]
+        tokens = processed[state] = []
+        rflags = cls.flags
+        for tdef in unprocessed[state]:
+            if isinstance(tdef, include):
+                # it's a state reference
+                assert tdef != state, "circular state reference %r" % state
+                tokens.extend(cls._process_state(unprocessed, processed, str(tdef)))
+                continue
+
+            assert type(tdef) is tuple, "wrong rule def %r" % tdef
+
+            try:
+                rex = re.compile(tdef[0], rflags).match
+            except Exception, err:
+                raise ValueError("uncompilable regex %r in state %r of %r: %s" %
+                                 (tdef[0], state, cls, err))
+
+            assert type(tdef[1]) is _TokenType or callable(tdef[1]), \
+                   'token type must be simple type or callable, not %r' % (tdef[1],)
+
+            if len(tdef) == 2:
+                new_state = None
+            else:
+                tdef2 = tdef[2]
+                if isinstance(tdef2, str):
+                    # an existing state
+                    if tdef2 == '#pop':
+                        new_state = -1
+                    elif tdef2 in unprocessed:
+                        new_state = (tdef2,)
+                    elif tdef2 == '#push':
+                        new_state = tdef2
+                    elif tdef2[:5] == '#pop:':
+                        new_state = -int(tdef2[5:])
+                    else:
+                        assert False, 'unknown new state %r' % tdef2
+                elif isinstance(tdef2, combined):
+                    # combine a new state from existing ones
+                    new_state = '_tmp_%d' % cls._tmpname
+                    cls._tmpname += 1
+                    itokens = []
+                    for istate in tdef2:
+                        assert istate != state, 'circular state ref %r' % istate
+                        itokens.extend(cls._process_state(unprocessed,
+                                                          processed, istate))
+                    processed[new_state] = itokens
+                    new_state = (new_state,)
+                elif isinstance(tdef2, tuple):
+                    # push more than one state
+                    for state in tdef2:
+                        assert (state in unprocessed or
+                                state in ('#pop', '#push')), \
+                               'unknown new state ' + state
+                    new_state = tdef2
+                else:
+                    assert False, 'unknown new state def %r' % tdef2
+            tokens.append((rex, tdef[1], new_state))
+        return tokens
+
+    def process_tokendef(cls):
+        cls._all_tokens = {}
+        cls._tmpname = 0
+        processed = cls._all_tokens[cls.__name__] = {}
+        #tokendefs = tokendefs or cls.tokens[name]
+        for state in cls.tokens.keys():
+            cls._process_state(cls.tokens, processed, state)
+        return processed
+
+    def __call__(cls, *args, **kwds):
+        if not hasattr(cls, '_tokens'):
+            cls._all_tokens = {}
+            cls._tmpname = 0
+            if hasattr(cls, 'token_variants') and cls.token_variants:
+                # don't process yet
+                pass
+            else:
+                cls._tokens = cls.process_tokendef()
+
+        return type.__call__(cls, *args, **kwds)
+
+
+
+
+class Lexer:
+
+    __metaclass__ = LexerMeta
+
+    encoding = 'utf-8'
+    stripall = False
+    stripnl = False
+    tabsize = 0
+    flags = re.IGNORECASE
+
+    tokens = {
+        'root': [
+            (r'--.*?(\r|\n|\r\n)', Comment.Single),
+            (r'(\r|\n|\r\n)', Newline),
+            (r'\s+', Whitespace),
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r':=', Assignment),
+            (r'::', Punctuation),
+            (r'[*]', Wildcard),
+            (r'[+/<>=~!@#%^&|`?^-]', Operator),
+            (r'[0-9]+', Number.Integer),
+            # TODO: Backslash escapes?
+            (r"'(''|[^'])*'", String.Single),
+            (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
+            (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword),
+            (r'END( IF| LOOP)?', Keyword),
+            (r'CREATE( OR REPLACE)?', Keyword.DDL),
+            (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword),
+            (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin),
+            (r'[;:()\[\],\.]', Punctuation),
+        ],
+        'multiline-comments': [
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[^/\*]+', Comment.Multiline),
+            (r'[/*]', Comment.Multiline)
+        ]
+    }
+
+    def __init__(self):
+        self.filters = []
+
+    def add_filter(self, filter_, **options):
+        from sqlparse.filters import Filter
+        if not isinstance(filter_, Filter):
+            filter_ = filter_(**options)
+        self.filters.append(filter_)
+
+    def get_tokens(self, text, unfiltered=False):
+        """
+        Return an iterable of (tokentype, value) pairs generated from
+        `text`. If `unfiltered` is set to `True`, the filtering mechanism
+        is bypassed even if filters are defined.
+
+        Also preprocess the text, i.e. expand tabs and strip it if
+        wanted and applies registered filters.
+        """
+        if not isinstance(text, unicode):
+            if self.encoding == 'guess':
+                try:
+                    text = text.decode('utf-8')
+                    if text.startswith(u'\ufeff'):
+                        text = text[len(u'\ufeff'):]
+                except UnicodeDecodeError:
+                    text = text.decode('latin1')
+            elif self.encoding == 'chardet':
+                try:
+                    import chardet
+                except ImportError:
+                    raise ImportError('To enable chardet encoding guessing, '
+                                      'please install the chardet library '
+                                      'from http://chardet.feedparser.org/')
+                enc = chardet.detect(text)
+                text = text.decode(enc['encoding'])
+            else:
+                text = text.decode(self.encoding)
+        if self.stripall:
+            text = text.strip()
+        elif self.stripnl:
+            text = text.strip('\n')
+        if self.tabsize > 0:
+            text = text.expandtabs(self.tabsize)
+#        if not text.endswith('\n'):
+#            text += '\n'
+
+        def streamer():
+            for i, t, v in self.get_tokens_unprocessed(text):
+                yield t, v
+        stream = streamer()
+        if not unfiltered:
+            stream = apply_filters(stream, self.filters, self)
+        return stream
+
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        """
+        Split ``text`` into (tokentype, text) pairs.
+
+        ``stack`` is the inital stack (default: ``['root']``)
+        """
+        pos = 0
+        tokendefs = self._tokens
+        statestack = list(stack)
+        statetokens = tokendefs[statestack[-1]]
+        known_names = {}
+        while 1:
+            for rexmatch, action, new_state in statetokens:
+                m = rexmatch(text, pos)
+                if m:
+                    # print rex.pattern
+                    value = m.group()
+                    if value in known_names:
+                        yield pos, known_names[value], value
+                    elif type(action) is _TokenType:
+                        yield pos, action, value
+                    elif hasattr(action, '__call__'):
+                        ttype, value = action(value)
+                        known_names[value] = ttype
+                        yield pos, ttype, value
+                    else:
+                        for item in action(self, m):
+                            yield item
+                    pos = m.end()
+                    if new_state is not None:
+                        # state transition
+                        if isinstance(new_state, tuple):
+                            for state in new_state:
+                                if state == '#pop':
+                                    statestack.pop()
+                                elif state == '#push':
+                                    statestack.append(statestack[-1])
+                                else:
+                                    statestack.append(state)
+                        elif isinstance(new_state, int):
+                            # pop
+                            del statestack[new_state:]
+                        elif new_state == '#push':
+                            statestack.append(statestack[-1])
+                        else:
+                            assert False, "wrong state def: %r" % new_state
+                        statetokens = tokendefs[statestack[-1]]
+                    break
+            else:
+                try:
+                    if text[pos] == '\n':
+                        # at EOL, reset state to "root"
+                        pos += 1
+                        statestack = ['root']
+                        statetokens = tokendefs['root']
+                        yield pos, Text, u'\n'
+                        continue
+                    yield pos, Error, text[pos]
+                    pos += 1
+                except IndexError:
+                    break
+
+
+def tokenize(sql):
+    """Tokenize sql.
+
+    Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
+    of ``(token type, value)`` items.
+    """
+    lexer = Lexer()
+    return lexer.get_tokens(sql)
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
new file mode 100644
index 0000000..2c63c41
--- /dev/null
+++ b/sqlparse/tokens.py
@@ -0,0 +1,131 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+# The Token implementation is based on pygment's token system written
+# by Georg Brandl.
+# http://pygments.org/
+
+"""Tokens"""
+
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+
+class _TokenType(tuple):
+    parent = None
+
+    def split(self):
+        buf = []
+        node = self
+        while node is not None:
+            buf.append(node)
+            node = node.parent
+        buf.reverse()
+        return buf
+
+    def __init__(self, *args):
+        # no need to call super.__init__
+        self.subtypes = set()
+
+    def __contains__(self, val):
+        return self is val or (
+            type(val) is self.__class__ and
+            val[:len(self)] == self
+        )
+
+    def __getattr__(self, val):
+        if not val or not val[0].isupper():
+            return tuple.__getattribute__(self, val)
+        new = _TokenType(self + (val,))
+        setattr(self, val, new)
+        self.subtypes.add(new)
+        new.parent = self
+        return new
+
+    def __hash__(self):
+        return hash(tuple(self))
+
+    def __repr__(self):
+        return 'Token' + (self and '.' or '') + '.'.join(self)
+
+
+Token       = _TokenType()
+
+# Special token types
+Text        = Token.Text
+Whitespace  = Text.Whitespace
+Newline     = Whitespace.Newline
+Error       = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
+Other       = Token.Other
+
+# Common token types for source code
+Keyword     = Token.Keyword
+Name        = Token.Name
+Literal     = Token.Literal
+String      = Literal.String
+Number      = Literal.Number
+Punctuation = Token.Punctuation
+Operator    = Token.Operator
+Wildcard    = Token.Wildcard
+Comment     = Token.Comment
+Assignment  = Token.Assignement
+
+# Generic types for non-source code
+Generic     = Token.Generic
+
+# String and some others are not direct childs of Token.
+# alias them:
+Token.Token = Token
+Token.String = String
+Token.Number = Number
+
+# SQL specific tokens
+DML = Keyword.DML
+DDL = Keyword.DDL
+Command = Keyword.Command
+
+Group = Token.Group
+Group.Parenthesis = Token.Group.Parenthesis
+Group.Comment = Token.Group.Comment
+Group.Where = Token.Group.Where
+
+
+def is_token_subtype(ttype, other):
+    """
+    Return True if ``ttype`` is a subtype of ``other``.
+
+    exists for backwards compatibility. use ``ttype in other`` now.
+    """
+    return ttype in other
+
+
+def string_to_tokentype(s):
+    """
+    Convert a string into a token type::
+
+        >>> string_to_token('String.Double')
+        Token.Literal.String.Double
+        >>> string_to_token('Token.Literal.Number')
+        Token.Literal.Number
+        >>> string_to_token('')
+        Token
+
+    Tokens that are already tokens are returned unchanged:
+
+        >>> string_to_token(String)
+        Token.Literal.String
+    """
+    if isinstance(s, _TokenType):
+        return s
+    if not s:
+        return Token
+    node = Token
+    for item in s.split('.'):
+        node = getattr(node, item)
+    return node
+
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/files/begintag.sql b/tests/files/begintag.sql
new file mode 100644
index 0000000..699b365
--- /dev/null
+++ b/tests/files/begintag.sql
@@ -0,0 +1,4 @@
+begin;
+update foo
+       set bar = 1;
+commit;
+\ No newline at end of file
diff --git a/tests/files/dashcomment.sql b/tests/files/dashcomment.sql
new file mode 100644
index 0000000..0d5ac62
--- /dev/null
+++ b/tests/files/dashcomment.sql
@@ -0,0 +1,5 @@
+select * from user;
+--select * from host;
+select * from user;
+select * -- foo;
+from foo;
+\ No newline at end of file
diff --git a/tests/files/function.sql b/tests/files/function.sql
new file mode 100644
index 0000000..d19227f
--- /dev/null
+++ b/tests/files/function.sql
@@ -0,0 +1,13 @@
+CREATE OR REPLACE FUNCTION foo(
+	p_in1 VARCHAR
+	, p_in2 INTEGER
+) RETURNS INTEGER AS
+
+  DECLARE
+	v_foo INTEGER;  
+  BEGIN
+  	SELECT *
+  	FROM foo
+  	INTO v_foo;
+  	RETURN v_foo.id;
+  END;
+\ No newline at end of file
diff --git a/tests/files/function_psql.sql b/tests/files/function_psql.sql
new file mode 100644
index 0000000..e485f7a
--- /dev/null
+++ b/tests/files/function_psql.sql
@@ -0,0 +1,72 @@
+CREATE OR REPLACE FUNCTION public.delete_data (
+    p_tabelle VARCHAR
+  , p_key VARCHAR
+  , p_value INTEGER
+) RETURNS INTEGER AS
+$$
+DECLARE
+    p_retval                INTEGER;
+    v_constraint            RECORD;
+    v_count                 INTEGER;
+    v_data                  RECORD;
+    v_fieldname             VARCHAR;
+    v_sql                   VARCHAR;
+    v_key                   VARCHAR;
+    v_value                 INTEGER;
+BEGIN
+    v_sql := 'SELECT COUNT(*) FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+    --RAISE NOTICE '%', v_sql;
+    EXECUTE v_sql INTO v_count;
+    IF v_count::integer != 0 THEN
+        SELECT att.attname
+            INTO v_key
+            FROM pg_attribute att
+                LEFT JOIN pg_constraint con ON con.conrelid = att.attrelid 
+                    AND con.conkey[1] = att.attnum 
+                    AND con.contype = 'p', pg_type typ, pg_class rel, pg_namespace ns
+            WHERE att.attrelid = rel.oid
+                AND att.attnum > 0 
+                AND typ.oid = att.atttypid
+                AND att.attisdropped = false
+                AND rel.relname = p_tabelle
+                AND con.conkey[1] = 1
+                AND ns.oid = rel.relnamespace
+                AND ns.nspname = 'public'
+            ORDER BY att.attnum;
+        v_sql := 'SELECT ' || v_key || ' AS id FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+        FOR v_data IN EXECUTE v_sql
+        LOOP
+            --RAISE NOTICE ' -> % %', p_tabelle, v_data.id;
+            FOR v_constraint IN SELECT t.constraint_name
+                                , t.constraint_type
+                                , t.table_name
+                                , c.column_name
+                                FROM public.v_table_constraints t
+                                    , public.v_constraint_columns c
+                                WHERE t.constraint_name = c.constraint_name
+                                    AND t.constraint_type = 'FOREIGN KEY'
+                                    AND c.table_name = p_tabelle
+                                    AND t.table_schema = 'public'
+                                    AND c.table_schema = 'public'
+            LOOP
+                v_fieldname := substring(v_constraint.constraint_name from 1 for length(v_constraint.constraint_name) - length(v_constraint.column_name) - 1);
+                IF (v_constraint.table_name = p_tabelle) AND (p_value = v_data.id) THEN
+                    --RAISE NOTICE 'Skip (Selbstverweis)';
+                    CONTINUE;
+                ELSE
+                    PERFORM delete_data(v_constraint.table_name::varchar, v_fieldname::varchar, v_data.id::integer);
+                END IF;
+            END LOOP;
+        END LOOP;
+        v_sql := 'DELETE FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+        --RAISE NOTICE '%', v_sql;
+        EXECUTE v_sql;
+        p_retval := 1;
+    ELSE
+        --RAISE NOTICE ' -> Keine Sätze gefunden';
+        p_retval := 0;
+    END IF;
+    RETURN p_retval;
+END;
+$$
+LANGUAGE plpgsql;
+\ No newline at end of file
diff --git a/tests/files/function_psql2.sql b/tests/files/function_psql2.sql
new file mode 100644
index 0000000..b5d494c
--- /dev/null
+++ b/tests/files/function_psql2.sql
@@ -0,0 +1,7 @@
+CREATE OR REPLACE FUNCTION update_something() RETURNS void AS
+$body$
+BEGIN
+    raise notice 'foo';
+END;
+$body$
+LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
+\ No newline at end of file
diff --git a/tests/files/function_psql3.sql b/tests/files/function_psql3.sql
new file mode 100644
index 0000000..b25d818
--- /dev/null
+++ b/tests/files/function_psql3.sql
@@ -0,0 +1,8 @@
+CREATE OR REPLACE FUNCTION foo() RETURNS integer AS
+$body$
+DECLARE
+BEGIN
+ select * from foo;
+END;
+$body$
+LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
+\ No newline at end of file
diff --git a/tests/run_tests.py b/tests/run_tests.py
new file mode 100755
index 0000000..1c7960e
--- /dev/null
+++ b/tests/run_tests.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Test runner for sqlparse."""
+
+import os
+import sys
+import unittest
+
+sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../'))
+
+
+def main():
+    """Create a TestSuite and run it."""
+    loader = unittest.TestLoader()
+    suite = unittest.TestSuite()
+    fnames = [os.path.split(f)[-1] for f in sys.argv[1:]]
+    for fname in os.listdir(os.path.dirname(__file__)):
+        if (not fname.startswith('test_') or not fname.endswith('.py')
+            or (fnames and fname not in fnames)):
+            continue
+        modname = os.path.splitext(fname)[0]
+        mod = __import__(os.path.splitext(fname)[0])
+        suite.addTests(loader.loadTestsFromModule(mod))
+    unittest.TextTestRunner(verbosity=2).run(suite)
+
+
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_format.py b/tests/test_format.py
new file mode 100644
index 0000000..5748704
--- /dev/null
+++ b/tests/test_format.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+from tests.utils import TestCaseBase
+
+import sqlparse
+
+
+class TestFormat(TestCaseBase):
+
+    def test_keywordcase(self):
+        sql = 'select * from bar; -- select foo\n'
+        res = sqlparse.format(sql, keyword_case='upper')
+        self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- select foo\n')
+        res = sqlparse.format(sql, keyword_case='capitalize')
+        self.ndiffAssertEqual(res, 'Select * From bar; -- select foo\n')
+        res = sqlparse.format(sql.upper(), keyword_case='lower')
+        self.ndiffAssertEqual(res, 'select * from BAR; -- SELECT FOO\n')
+        self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql,
+                          keyword_case='foo')
+
+    def test_identifiercase(self):
+        sql = 'select * from bar; -- select foo\n'
+        res = sqlparse.format(sql, identifier_case='upper')
+        self.ndiffAssertEqual(res, 'select * from BAR; -- select foo\n')
+        res = sqlparse.format(sql, identifier_case='capitalize')
+        self.ndiffAssertEqual(res, 'select * from Bar; -- select foo\n')
+        res = sqlparse.format(sql.upper(), identifier_case='lower')
+        self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- SELECT FOO\n')
+        self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql,
+                          identifier_case='foo')
+        sql = 'select * from "foo"."bar"'
+        res = sqlparse.format(sql, identifier_case="upper")
+        self.ndiffAssertEqual(res, 'select * from "FOO"."BAR"')
+
+    def test_strip_comments_single(self):
+        sql = 'select *-- statement starts here\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select * from foo')
+        sql = 'select * -- statement starts here\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select * from foo')
+        sql = 'select-- foo\nfrom -- bar\nwhere'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select from where')
+
+    def test_strip_comments_multi(self):
+        sql = '/* sql starts here */\nselect'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select')
+        sql = '/* sql starts here */ select'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select')
+        sql = '/*\n * sql starts here\n */\nselect'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select')
+        sql = 'select (/* sql starts here */ select 2)'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select (select 2)')
+
+    def test_strip_ws(self):
+        f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
+        s = 'select\n* from      foo\n\twhere  ( 1 = 2 )\n'
+        self.ndiffAssertEqual(f(s), 'select * from foo where (1 = 2)')
+        s = 'select -- foo\nfrom    bar\n'
+        self.ndiffAssertEqual(f(s), 'select -- foo\nfrom bar')
+
+
+class TestFormatReindent(TestCaseBase):
+
+    def test_stmts(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select foo; select bar'
+        self.ndiffAssertEqual(f(s), 'select foo;\n\nselect bar')
+        s = 'select foo'
+        self.ndiffAssertEqual(f(s), 'select foo')
+        s = 'select foo; -- test\n select bar'
+        self.ndiffAssertEqual(f(s), 'select foo; -- test\n\nselect bar')
+
+    def test_keywords(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select * from foo union select * from bar;'
+        self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+                                               'from foo',
+                                               'union',
+                                               'select *',
+                                               'from bar;']))
+
+    def test_parenthesis(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select count(*) from (select * from foo);'
+        self.ndiffAssertEqual(f(s),
+                              '\n'.join(['select count(*)',
+                                         'from',
+                                         '  (select *',
+                                         '   from foo);',
+                                         ])
+                              )
+
+    def test_where(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;'
+        self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
+                                     'where bar = 1\n'
+                                     '  and baz = 2\n'
+                                     '  or bzz = 3;'))
+        s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);'
+        self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
+                                     'where bar = 1\n'
+                                     '  and (baz = 2\n'
+                                     '       or bzz = 3);'))
+
+    def test_join(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select * from foo join bar on 1 = 2'
+        self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+                                               'from foo',
+                                               'join bar on 1 = 2']))
+        s = 'select * from foo inner join bar on 1 = 2'
+        self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+                                               'from foo',
+                                               'inner join bar on 1 = 2']))
+        s = 'select * from foo left outer join bar on 1 = 2'
+        self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+                                               'from foo',
+                                               'left outer join bar on 1 = 2']
+                                              ))
+
+    def test_identifier_list(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'select foo, bar, baz from table1, table2 where 1 = 2'
+        self.ndiffAssertEqual(f(s), '\n'.join(['select foo,',
+                                               '       bar,',
+                                               '       baz',
+                                               'from table1,',
+                                               '     table2',
+                                               'where 1 = 2']))
+
+    def test_case(self):
+        f = lambda sql: sqlparse.format(sql, reindent=True)
+        s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end'
+        self.ndiffAssertEqual(f(s), '\n'.join(['case when foo = 1 then 2',
+                                               '     when foo = 3 then 4',
+                                               '     else 5',
+                                               'end']))
+
+
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
new file mode 100644
index 0000000..fc3bea5
--- /dev/null
+++ b/tests/test_grouping.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+import sqlparse
+from sqlparse import tokens as T
+from sqlparse.engine.grouping import *
+
+from tests.utils import TestCaseBase
+
+
+class TestGrouping(TestCaseBase):
+
+    def test_parenthesis(self):
+        s ='x1 (x2 (x3) x2) foo (y2) bar'
+        parsed = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, str(parsed))
+        self.assertEqual(len(parsed.tokens), 9)
+        self.assert_(isinstance(parsed.tokens[2], Parenthesis))
+        self.assert_(isinstance(parsed.tokens[-3], Parenthesis))
+        self.assertEqual(len(parsed.tokens[2].tokens), 7)
+        self.assert_(isinstance(parsed.tokens[2].tokens[3], Parenthesis))
+        self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
+
+    def test_comments(self):
+        s = '/*\n * foo\n */   \n  bar'
+        parsed = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, unicode(parsed))
+        self.assertEqual(len(parsed.tokens), 2)
+
+    def test_identifiers(self):
+        s = 'select foo.bar from "myscheme"."table" where fail. order'
+        parsed = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, parsed.to_unicode())
+        self.assert_(isinstance(parsed.tokens[2], Identifier))
+        self.assert_(isinstance(parsed.tokens[6], Identifier))
+        self.assert_(isinstance(parsed.tokens[8], Where))
+        s = 'select * from foo where foo.id = 1'
+        parsed = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, parsed.to_unicode())
+        self.assert_(isinstance(parsed.tokens[-1].tokens[2], Identifier))
+        s = 'select * from (select "foo"."id" from foo)'
+        parsed = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, parsed.to_unicode())
+        self.assert_(isinstance(parsed.tokens[-1].tokens[3], Identifier))
+
+    def test_where(self):
+        s = 'select * from foo where bar = 1 order by id desc'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertTrue(len(p.tokens), 16)
+        s = 'select x from (select y from foo where bar = 1) z'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertTrue(isinstance(p.tokens[-3].tokens[-1], Where))
+
+    def test_typecast(self):
+        s = 'select foo::integer from bar'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[2].get_typecast(), 'integer')
+        self.assertEqual(p.tokens[2].get_name(), 'foo')
+        s = 'select (current_database())::information_schema.sql_identifier'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[2].get_typecast(),
+                         'information_schema.sql_identifier')
+
+    def test_alias(self):
+        s = 'select foo as bar from mytable'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[2].get_real_name(), 'foo')
+        self.assertEqual(p.tokens[2].get_alias(), 'bar')
+        s = 'select foo from mytable t1'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[6].get_real_name(), 'mytable')
+        self.assertEqual(p.tokens[6].get_alias(), 't1')
+        s = 'select foo::integer as bar from mytable'
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[2].get_alias(), 'bar')
+        s = ('SELECT DISTINCT '
+             '(current_database())::information_schema.sql_identifier AS view')
+        p = sqlparse.parse(s)[0]
+        self.ndiffAssertEqual(s, p.to_unicode())
+        self.assertEqual(p.tokens[4].get_alias(), 'view')
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..536b6f6
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+"""Tests sqlparse function."""
+
+from utils import TestCaseBase
+
+import sqlparse
+
+
+class SQLParseTest(TestCaseBase):
+    """Tests sqlparse.parse()."""
+
+    def test_tokenize(self):
+        sql = 'select * from foo;'
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 1)
+        self.assertEqual(str(stmts[0]), sql)
+
+    def test_multistatement(self):
+        sql1 = 'select * from foo;'
+        sql2 = 'select * from bar;'
+        stmts = sqlparse.parse(sql1+sql2)
+        self.assertEqual(len(stmts), 2)
+        self.assertEqual(str(stmts[0]), sql1)
+        self.assertEqual(str(stmts[1]), sql2)
+
+    def test_newlines(self):
+        sql = u'select\n*from foo;'
+        p = sqlparse.parse(sql)[0]
+        self.assertEqual(unicode(p), sql)
+        sql = u'select\r\n*from foo'
+        p = sqlparse.parse(sql)[0]
+        self.assertEqual(unicode(p), sql)
+        sql = u'select\r*from foo'
+        p = sqlparse.parse(sql)[0]
+        self.assertEqual(unicode(p), sql)
+        sql = u'select\r\n*from foo\n'
+        p = sqlparse.parse(sql)[0]
+        self.assertEqual(unicode(p), sql)
diff --git a/tests/test_split.py b/tests/test_split.py
new file mode 100644
index 0000000..782b226
--- /dev/null
+++ b/tests/test_split.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# Tests splitting functions.
+
+import unittest
+
+from utils import load_file, TestCaseBase
+
+import sqlparse
+
+
+class SQLSplitTest(TestCaseBase):
+    """Tests sqlparse.sqlsplit()."""
+
+    _sql1 = 'select * from foo;'
+    _sql2 = 'select * from bar;'
+
+    def test_split_semicolon(self):
+        sql2 = 'select * from foo where bar = \'foo;bar\';'
+        stmts = sqlparse.parse(''.join([self._sql1, sql2]))
+        self.assertEqual(len(stmts), 2)
+        self.ndiffAssertEqual(unicode(stmts[0]), self._sql1)
+        self.ndiffAssertEqual(unicode(stmts[1]), sql2)
+
+    def test_create_function(self):
+        sql = load_file('function.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 1)
+        self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+    def test_create_function_psql(self):
+        sql = load_file('function_psql.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 1)
+        self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+    def test_create_function_psql3(self):
+        sql = load_file('function_psql3.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 1)
+        self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+    def test_create_function_psql2(self):
+        sql = load_file('function_psql2.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 1)
+        self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+    def test_dashcomments(self):
+        sql = load_file('dashcomment.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 3)
+        self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+    def test_begintag(self):
+        sql = load_file('begintag.sql')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 3)
+        self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+    def test_dropif(self):
+        sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;'
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 2)
+        self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+    def test_comment_with_umlaut(self):
+        sql = (u'select * from foo;\n'
+               u'-- Testing an umlaut: ä\n'
+               u'select * from bar;')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 2)
+        self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+    def test_comment_end_of_line(self):
+        sql = ('select * from foo; -- foo\n'
+               'select * from bar;')
+        stmts = sqlparse.parse(sql)
+        self.assertEqual(len(stmts), 2)
+        self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+        # make sure the comment belongs to first query
+        self.ndiffAssertEqual(unicode(stmts[0]), 'select * from foo; -- foo\n')
+
+    def test_casewhen(self):
+        sql = ('SELECT case when val = 1 then 2 else null end as foo;\n'
+               'comment on table actor is \'The actor table.\';')
+        stmts = sqlparse.split(sql)
+        self.assertEqual(len(stmts), 2)
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
new file mode 100644
index 0000000..7106b3c
--- /dev/null
+++ b/tests/test_tokenize.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import types
+
+from sqlparse import lexer
+from sqlparse.tokens import *
+
+
+class TestTokenize(unittest.TestCase):
+
+    def test_simple(self):
+        sql = 'select * from foo;'
+        stream = lexer.tokenize(sql)
+        self.assert_(type(stream) is types.GeneratorType)
+        tokens = list(stream)
+        self.assertEqual(len(tokens), 8)
+        self.assertEqual(len(tokens[0]), 2)
+        self.assertEqual(tokens[0], (Keyword.DML, u'select'))
+        self.assertEqual(tokens[-1], (Punctuation, u';'))
+
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..a78b460
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+"""Helpers for testing."""
+
+import codecs
+import difflib
+import os
+import unittest
+from StringIO import StringIO
+
+NL = '\n'
+DIR_PATH = os.path.abspath(os.path.dirname(__file__))
+PARENT_DIR = os.path.dirname(DIR_PATH)
+FILES_DIR = os.path.join(DIR_PATH, 'files')
+
+
+def load_file(filename, encoding='utf-8'):
+    """Opens filename with encoding and return it's contents."""
+    f = codecs.open(os.path.join(FILES_DIR, filename), 'r', encoding)
+    data = f.read()
+    f.close()
+    return data
+
+
+class TestCaseBase(unittest.TestCase):
+    """Base class for test cases with some additional checks."""
+
+    # Adopted from Python's tests.
+    def ndiffAssertEqual(self, first, second):
+        """Like failUnlessEqual except use ndiff for readable output."""
+        if first <> second:
+            sfirst = unicode(first)
+            ssecond = unicode(second)
+            diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines())
+            fp = StringIO()
+            print >> fp, NL, NL.join(diff)
+            print fp.getvalue()
+            raise self.failureException, fp.getvalue()