summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS5
-rw-r--r--CHANGES3
-rw-r--r--COPYING25
-rw-r--r--MANIFEST.in3
-rw-r--r--Makefile18
-rw-r--r--README45
-rw-r--r--TODO1
-rwxr-xr-xbin/sqlformat103
-rw-r--r--docs/Makefile88
-rw-r--r--docs/source/api.rst12
-rw-r--r--docs/source/changes.rst7
-rw-r--r--docs/source/cmdline.rst8
-rw-r--r--docs/source/conf.py197
-rw-r--r--docs/source/index.rst26
-rw-r--r--docs/source/intro.rst89
-rw-r--r--docs/tango/static/bgfooter.pngbin0 -> 434 bytes
-rw-r--r--docs/tango/static/bgtop.pngbin0 -> 430 bytes
-rw-r--r--docs/tango/static/default.css_t90
-rw-r--r--docs/tango/theme.conf28
-rw-r--r--extras/appengine/Makefile51
-rw-r--r--extras/appengine/README22
-rw-r--r--extras/appengine/__init__.py0
-rw-r--r--extras/appengine/app.yaml27
-rw-r--r--extras/appengine/examples/customers.sql1
-rw-r--r--extras/appengine/examples/multiple_inserts.sql1
-rw-r--r--extras/appengine/examples/pg_view.sql1
-rw-r--r--extras/appengine/examples/subquery.sql1
-rw-r--r--extras/appengine/examples/subquery2.sql1
-rw-r--r--extras/appengine/index.yaml0
-rw-r--r--extras/appengine/main.py131
-rwxr-xr-xextras/appengine/make_release.sh49
-rw-r--r--extras/appengine/settings.py37
-rw-r--r--extras/appengine/sqlformat/__init__.py0
-rw-r--r--extras/appengine/sqlformat/urls.py11
-rw-r--r--extras/appengine/sqlformat/views.py204
-rw-r--r--extras/appengine/static/bg_options.pngbin0 -> 202 bytes
-rw-r--r--extras/appengine/static/bgfieldset.pngbin0 -> 227 bytes
-rw-r--r--extras/appengine/static/bgfooter.pngbin0 -> 434 bytes
-rw-r--r--extras/appengine/static/bgtop.pngbin0 -> 430 bytes
-rw-r--r--extras/appengine/static/blank.gifbin0 -> 64 bytes
-rw-r--r--extras/appengine/static/canvas.html114
-rw-r--r--extras/appengine/static/hotkeys.js1
-rw-r--r--extras/appengine/static/img_loading.gifbin0 -> 1348 bytes
-rw-r--r--extras/appengine/static/jquery.textarearesizer.compressed.js1
-rw-r--r--extras/appengine/static/loading.gifbin0 -> 4331 bytes
-rw-r--r--extras/appengine/static/lynx_screenshot.pngbin0 -> 66017 bytes
-rw-r--r--extras/appengine/static/pygments.css59
-rw-r--r--extras/appengine/static/resize-grip.pngbin0 -> 167 bytes
-rw-r--r--extras/appengine/static/robots.txt8
-rw-r--r--extras/appengine/static/rpc_relay.html1
-rw-r--r--extras/appengine/static/script.js103
-rw-r--r--extras/appengine/static/sqlformat_client_example.py17
-rw-r--r--extras/appengine/static/styles.css245
-rw-r--r--extras/appengine/templates/about.html44
-rw-r--r--extras/appengine/templates/api.html50
-rw-r--r--extras/appengine/templates/index.html107
-rw-r--r--extras/appengine/templates/master.html103
-rw-r--r--extras/appengine/templates/python-client-example.html17
-rw-r--r--extras/appengine/templates/source.html56
-rw-r--r--extras/sqlformat.pngbin0 -> 5359 bytes
-rw-r--r--extras/sqlformat.svg115
-rwxr-xr-xsetup.py28
-rw-r--r--sqlparse/__init__.py65
-rw-r--r--sqlparse/dialects.py88
-rw-r--r--sqlparse/engine/__init__.py81
-rw-r--r--sqlparse/engine/_grouping.py499
-rw-r--r--sqlparse/engine/filter.py98
-rw-r--r--sqlparse/engine/grouping.py537
-rw-r--r--sqlparse/filters.py432
-rw-r--r--sqlparse/formatter.py163
-rw-r--r--sqlparse/keywords.py589
-rw-r--r--sqlparse/lexer.py310
-rw-r--r--sqlparse/tokens.py131
-rw-r--r--tests/__init__.py0
-rw-r--r--tests/files/begintag.sql4
-rw-r--r--tests/files/dashcomment.sql5
-rw-r--r--tests/files/function.sql13
-rw-r--r--tests/files/function_psql.sql72
-rw-r--r--tests/files/function_psql2.sql7
-rw-r--r--tests/files/function_psql3.sql8
-rwxr-xr-xtests/run_tests.py31
-rw-r--r--tests/test_format.py146
-rw-r--r--tests/test_grouping.py86
-rw-r--r--tests/test_parse.py39
-rw-r--r--tests/test_split.py88
-rw-r--r--tests/test_tokenize.py21
-rw-r--r--tests/utils.py38
87 files changed, 5905 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..4aec012
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,5 @@
+python-sqlparse is written and maintained by Andi Albrecht <albrecht.andi@gmail.com>.
+
+This module contains code (namely the lexer and filter mechanism) from
+the pygments project that was written by Georg Brandl.
+
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..4dfa2e5
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,3 @@
+Release 0.1.0 (In Development)
+------------------------------
+ * Initial. \ No newline at end of file
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..0334d9f
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,25 @@
+Copyright (c) 2008, Andi Albrecht <albrecht.andi@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the authors nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..60ab652
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include COPYING
+include test.py
+recursive-include docs *.rst \ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..868043f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+# Makefile to simplify some common development tasks.
+# Run 'make help' for a list of commands.
+
+PYTHON=`which python`
+
+default: help
+
+help:
+ @echo "Available commands:"
+ @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort
+
+test:
+ $(PYTHON) tests/run_tests.py
+
+clean:
+ $(PYTHON) setup.py clean
+ find . -name '*.pyc' -delete
+ find . -name '*~' -delete \ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..10c8c1a
--- /dev/null
+++ b/README
@@ -0,0 +1,45 @@
+python-sqlparse - Parse SQL statements
+======================================
+
+sqlparse currently support only one feature: You can split a series
+of SQL statements into single statements.
+
+Parts of the code are based on pygments written by Georg Brandl and others.
+pygments-Homepage: http://pygments.org/
+
+Run Tests
+---------
+
+ python test/run_tests.py
+
+
+Create Documentation
+--------------------
+
+You need `Sphinx`_ installed on your system to build the documentation.
+
+ cd docs/
+ make html
+
+
+Install
+-------
+
+Run
+
+ python setup.py install
+
+with root privileges to install python-sqlparse on your system.
+
+
+Links
+-----
+
+Source code: http://github.com/andialbrecht/python-sqlparse
+
+
+python-sqlparse is licensed under the BSD license:
+http://github.com/andialbrecht/python-sqlparse
+
+
+.. _Sphinx: http://sphinx.pocoo.org/
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..77f09f5
--- /dev/null
+++ b/TODO
@@ -0,0 +1 @@
+- Add GQL dialect. \ No newline at end of file
diff --git a/bin/sqlformat b/bin/sqlformat
new file mode 100755
index 0000000..8d219a5
--- /dev/null
+++ b/bin/sqlformat
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+import optparse
+import os
+import sys
+
+import sqlparse
+
+
+_CASE_CHOICES = ['upper', 'lower', 'capitalize']
+
+
+parser = optparse.OptionParser(usage='%prog [OPTIONS] FILE, ...',
+ version='%%prog %s' % sqlparse.__version__)
+parser.set_description(('Format FILE according to OPTIONS. Use "-" as FILE '
+ 'to read from stdin.'))
+parser.add_option('-v', '--verbose', dest='verbose', action='store_true')
+parser.add_option('-o', '--outfile', dest='outfile', metavar='FILE',
+ help='write output to FILE (defaults to stdout)')
+group = parser.add_option_group('Formatting Options')
+group.add_option('-k', '--keywords', metavar='CHOICE',
+ dest='keyword_case', choices=_CASE_CHOICES,
+ help=('change case of keywords, CHOICE is one of %s'
+ % ', '.join('"%s"' % x for x in _CASE_CHOICES)))
+group.add_option('-i', '--identifiers', metavar='CHOICE',
+ dest='identifier_case', choices=_CASE_CHOICES,
+ help=('change case of identifiers, CHOICE is one of %s'
+ % ', '.join('"%s"' % x for x in _CASE_CHOICES)))
+group.add_option('-l', '--language', metavar='LANG',
+ dest='output_format', choices=['python', 'php'],
+ help=('output a snippet in programming language LANG, '
+ 'choices are "python", "php"'))
+group.add_option('--strip-comments', dest='strip_comments',
+ action='store_true', default=False,
+ help='remove comments')
+group.add_option('-r', '--reindent', dest='reindent',
+ action='store_true', default=False,
+ help='reindent statements')
+group.add_option('--indent_width', dest='indent_width', default=2,
+ help='indentation width (defaults to 2 spaces)')
+
+_FORMATTING_GROUP = group
+
+
+def _error(msg, exit_=None):
+ """Print msg and optionally exit with return code exit_."""
+ print >>sys.stderr, '[ERROR] %s' % msg
+ if exit_ is not None:
+ sys.exit(exit_)
+
+
+def _build_formatter_opts(options):
+ """Convert command line options to dictionary."""
+ d = {}
+ for option in _FORMATTING_GROUP.option_list:
+ d[option.dest] = getattr(options, option.dest)
+ return d
+
+
+def main():
+ options, args = parser.parse_args()
+ if options.verbose:
+ print >>sys.stderr, 'Verbose mode'
+
+ if len(args) != 1:
+ _error('No input data.')
+ parser.print_usage()
+ sys.exit(1)
+
+ if '-' in args: # read from stdin
+ data = sys.stdin.read()
+ else:
+ try:
+ data = '\n'.join(open(args[0]).readlines())
+ except OSError, err:
+ _error('Failed to read %s: %s' % (args[0], err), exit_=1)
+
+ if options.outfile:
+ try:
+ stream = open(options.outfile, 'w')
+ except OSError, err:
+ _error('Failed to open %s: %s' % (options.outfile, err), exit_=1)
+ else:
+ stream = sys.stdout
+
+ formatter_opts = _build_formatter_opts(options)
+ try:
+ formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
+ except sqlparse.SQLParseError, err:
+ _error('Invalid options: %s' % err, exit_=1)
+
+ stream.write(sqlparse.format(data, **formatter_opts).encode('utf-8',
+ 'replace'))
+ stream.flush()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..cb23107
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,88 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ -rm -rf build/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
+ @echo
+ @echo "Build finished. The HTML pages are in build/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) build/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in build/dirhtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in build/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) build/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in build/qthelp, like this:"
+ @echo "# qcollectiongenerator build/qthelp/python-sqlparse.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile build/qthelp/python-sqlparse.qhc"
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in build/latex."
+ @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+ "run these through (pdf)latex."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
+ @echo
+ @echo "The overview file is in build/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in build/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) build/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in build/doctest/output.txt."
diff --git a/docs/source/api.rst b/docs/source/api.rst
new file mode 100644
index 0000000..3bce389
--- /dev/null
+++ b/docs/source/api.rst
@@ -0,0 +1,12 @@
+:mod:`sqlparse` -- Parse SQL statements
+=======================================
+
+The :mod:`sqlparse` module provides the following functions on module-level.
+
+.. autofunction:: sqlparse.split
+
+.. autofunction:: sqlparse.format
+
+.. autofunction:: sqlparse.parse
+
+
diff --git a/docs/source/changes.rst b/docs/source/changes.rst
new file mode 100644
index 0000000..5811c57
--- /dev/null
+++ b/docs/source/changes.rst
@@ -0,0 +1,7 @@
+.. _changes:
+
+Changes in python-sqlparse
+==========================
+
+.. include:: ../../CHANGES
+
diff --git a/docs/source/cmdline.rst b/docs/source/cmdline.rst
new file mode 100644
index 0000000..93a35a7
--- /dev/null
+++ b/docs/source/cmdline.rst
@@ -0,0 +1,8 @@
+``sqlformat`` -- Command Line Script
+===========================================
+
+The :mod:`sqlparse` module is shipped with the script
+:program:`sqlformat` which provides a command line interface to the formatting
+functions.
+
+.. todo:: Describe options and example usage.
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..b84245a
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+#
+# python-sqlparse documentation build configuration file, created by
+# sphinx-quickstart on Thu Feb 26 08:19:28 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.append(os.path.abspath('.'))
+sys.path.append(os.path.abspath('../../'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage',
+ 'sphinx.ext.autosummary']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'python-sqlparse'
+copyright = u'2009, Andi Albrecht'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.1.0'
+# The full version, including alpha/beta/rc tags.
+release = '0.1.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+html_theme = 'sphinxdoc'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = [os.path.abspath('../')]
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'python-sqlparsedoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('index', 'python-sqlparse.tex', ur'python-sqlparse Documentation',
+ ur'Andi Albrecht', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
+todo_include_todos = True
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..40e99e0
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,26 @@
+.. python-sqlparse documentation master file, created by
+ sphinx-quickstart on Thu Feb 26 08:19:28 2009.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome to python-sqlparse's documentation!
+===========================================
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+ intro
+ api
+ cmdline
+ changes
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
new file mode 100644
index 0000000..3d7a888
--- /dev/null
+++ b/docs/source/intro.rst
@@ -0,0 +1,89 @@
+Introduction
+============
+
+:mod:`sqlparse` is a non-validating SQL parser for Python.
+
+It provides support for parsing, splitting and formatting SQL statements.
+
+:mod:`sqlparse` is released under the terms of the
+`New BSD license <http://www.opensource.org/licenses/bsd-license.php>`_.
+
+Visit http://sqlformat.appspot.com to try it's formatting features.
+
+
+Download & Installation
+-----------------------
+
+To download and install :mod:`sqlparse` on your system run the following
+commands:
+
+.. code-block:: bash
+
+ $ git clone git://github.com/andialbrecht/python-sqlparse.git
+ $ cd python-sqlparse.git/
+ $ sudo python setup.py install
+
+A tarball of the current sources is available under the following URL:
+http://github.com/andialbrecht/python-sqlparse/tarball/master
+
+
+Example Usage
+-------------
+
+Here are some usage examples of this module.
+
+Splitting statements::
+
+ >>> import sqlparse
+ >>> sql = 'select * from foo; select * from bar;'
+ >>> sqlparse.split(sql)
+ <<< [u'select * from foo; ', u'select * from bar;']
+
+Formatting statemtents::
+
+ >>> sql = 'select * from foo where id in (select id from bar);'
+ >>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
+ SELECT *
+ FROM foo
+ WHERE id IN
+ (SELECT id
+ FROM bar);
+
+Now, let's have a deeper look at the internals::
+
+ >>> sql = 'select * from "someschema"."mytable" where id = 1'
+ >>> pared = sqlparse.parse(sql)
+ >>> pared
+ <<< (<Statement 'select...' at 0x9ad08ec>,)
+ >>> stmt = parsed[0]
+ >>> stmt.to_unicode() # converting it back to unicode
+ <<< u'select * from "someschema"."mytable" where id = 1'
+ >>> # This is how the internal representation looks like:
+ >>> stmt.tokens
+ <<<
+ (<DML 'select' at 0x9b63c34>,
+ <Whitespace ' ' at 0x9b63e8c>,
+ <Operator '*' at 0x9b63e64>,
+ <Whitespace ' ' at 0x9b63c5c>,
+ <Keyword 'from' at 0x9b63c84>,
+ <Whitespace ' ' at 0x9b63cd4>,
+ <Identifier '"somes...' at 0x9b5c62c>,
+ <Whitespace ' ' at 0x9b63f04>,
+ <Where 'where ...' at 0x9b5caac>)
+ >>>
+
+
+.. todo:: Describe general concepts
+ Why non-validating? Processing stages (tokens, groups,...), filter,
+
+
+Development & Contributing
+--------------------------
+
+Please file bug reports and feature requests on the project site at
+http://code.google.com/p/python-sqlparse/issues/entry or if you have
+code to contribute upload it to http://codereview.appspot.com and
+add albrecht.andi@googlemail.com as reviewer.
+
+For more information about the review tool and how to use it visit
+it's project page: http://code.google.com/p/rietveld.
diff --git a/docs/tango/static/bgfooter.png b/docs/tango/static/bgfooter.png
new file mode 100644
index 0000000..9ce5bdd
--- /dev/null
+++ b/docs/tango/static/bgfooter.png
Binary files differ
diff --git a/docs/tango/static/bgtop.png b/docs/tango/static/bgtop.png
new file mode 100644
index 0000000..a0d4709
--- /dev/null
+++ b/docs/tango/static/bgtop.png
Binary files differ
diff --git a/docs/tango/static/default.css_t b/docs/tango/static/default.css_t
new file mode 100644
index 0000000..c86759a
--- /dev/null
+++ b/docs/tango/static/default.css_t
@@ -0,0 +1,90 @@
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+@import url("basic.css");
+
+body {
+ color: #000000;
+ font-family: "Free Sans", Arial, Verdana, sans;
+ font-size: 11pt;
+ width: 70em;
+ margin: 0;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+
+div.related {
+ background: #eeeeec; /* #d3d7cf; */
+ color: white;
+ padding-bottom: .6em;
+ padding-top: .6em;
+}
+
+div.related a {
+ color: #2e3436;
+ text-decoration: none;
+}
+
+div.document {
+ background: #eeeeec;
+}
+
+div.body {
+ background: white;
+ padding: .5em;
+ -moz-border-radius: 0px 10px 10px 10px;
+ border-right: 1px solid #eeeeec;
+}
+
+div.document h1, h2, h3, h4 {
+ color: #204a87;
+ font-weight: normal;
+ letter-spacing: .05em;
+}
+
+div.document p, dl, div.highlight {
+ margin-left: 25px;
+}
+
+div.document dd p, dd {
+ margin-left: .5em;
+}
+
+div.document a {
+ color: #f57900;
+}
+
+div.sphinxsidebarwrapper {
+ background: url(bgfooter.png) top left repeat-x #eeeeec;
+ padding-right: .5em;
+}
+
+div.sphinxsidebar ul {
+ margin: 0;
+ padding: 0;
+}
+
+div.sphinxsidebar a {
+ color: #888a85;
+ text-decoration: none;
+}
+
+div.footer {
+ font-size: .8em;
+ color: #888a85;
+ text-align: right;
+ padding: 10px;
+}
+
+div.footer a {
+ color: #888a85;
+}
+
+div.highlight pre {
+ background-color: #eeeeec;
+ border: 1px solid #babdb6;
+ padding: 7px;
+} \ No newline at end of file
diff --git a/docs/tango/theme.conf b/docs/tango/theme.conf
new file mode 100644
index 0000000..812330f
--- /dev/null
+++ b/docs/tango/theme.conf
@@ -0,0 +1,28 @@
+[theme]
+inherit = basic
+stylesheet = default.css
+pygments_style = sphinx
+
+[options]
+rightsidebar = false
+stickysidebar = false
+
+footerbgcolor = #11303d
+footertextcolor = #ffffff
+sidebarbgcolor = #1c4e63
+sidebartextcolor = #ffffff
+sidebarlinkcolor = #98dbcc
+relbarbgcolor = #133f52
+relbartextcolor = #ffffff
+relbarlinkcolor = #ffffff
+bgcolor = #ffffff
+textcolor = #000000
+headbgcolor = #f2f2f2
+headtextcolor = #20435c
+headlinkcolor = #c60f0f
+linkcolor = #355f7c
+codebgcolor = #eeffcc
+codetextcolor = #333333
+
+bodyfont = sans-serif
+headfont = 'Trebuchet MS', sans-serif
diff --git a/extras/appengine/Makefile b/extras/appengine/Makefile
new file mode 100644
index 0000000..28380cf
--- /dev/null
+++ b/extras/appengine/Makefile
@@ -0,0 +1,51 @@
+# Makefile to simplify some common AppEngine actions.
+# Use 'make help' for a list of commands.
+
+PYTHON=`which python2.5`
+DEV_APPSERVER=$(PYTHON) `which dev_appserver.py`
+APPCFG=$(PYTHON) `which appcfg.py`
+PORT=8080
+
+
+default: help
+
+help:
+ @echo "Available commands:"
+ @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort
+
+serve:
+ $(DEV_APPSERVER) --port=$(PORT) .
+
+serve_remote:
+ $(DEV_APPSERVER) --port=$(PORT) --address 0.0.0.0 .
+
+serve_email:
+ $(DEV_APPSERVER) --port=$(PORT) --enable_sendmail .
+
+serve_remote_email:
+ $(DEV_APPSERVER) --port=$(PORT) --enable_sendmail --address 0.0.0.0 .
+
+release: make_release.sh django/.svn
+ sh make_release.sh
+
+update: release
+ $(APPCFG) update release
+
+upload: update
+
+update_indexes:
+ $(APPCFG) update_indexes .
+
+vacuum_indexes:
+ $(APPCFG) vacuum_indexes .
+
+all: django pygments sqlparse
+
+django:
+ svn co http://code.djangoproject.com/svn/django/trunk/django
+
+pygments:
+ ln -s `python -c "import pygments,os; print os.path.dirname(pygments.__file__)"` .
+
+sqlparse:
+ ln -s ../../sqlparse .
diff --git a/extras/appengine/README b/extras/appengine/README
new file mode 100644
index 0000000..4762faa
--- /dev/null
+++ b/extras/appengine/README
@@ -0,0 +1,22 @@
+gae-sqlformat - An SQL formatting tool runnging on App Engine
+=============================================================
+
+
+To set up this application run
+
+ make all
+
+This command fetches Django from Subversion and symlinks Pygments
+and sqlparse. Note: You'll need Pygments installed somewhere in your
+PYTHONPATH.
+
+For a manual setup have a look at the Makefile ;-)
+
+
+To run the development server run
+
+ make serve
+
+
+Homepage: http://sqlformat.appspot.com
+
diff --git a/extras/appengine/__init__.py b/extras/appengine/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/__init__.py
diff --git a/extras/appengine/app.yaml b/extras/appengine/app.yaml
new file mode 100644
index 0000000..6aa08b2
--- /dev/null
+++ b/extras/appengine/app.yaml
@@ -0,0 +1,27 @@
+application: sqlformat
+version: 2
+runtime: python
+api_version: 1
+
+default_expiration: 7d # This is good for images, which never change
+
+handlers:
+
+- url: /(robots.txt|favicon.ico)
+ static_files: static/\1
+ upload: static/(robots.txt|favicon.ico)
+
+- url: /google7a062e78b56854c0.html
+ static_files: static/robots.txt
+ upload: static/robots.txt
+
+- url: /static/(script.js|styles.css|upload.py)
+ static_files: static/\1
+ upload: static/(script.js|styles.css|upload.py)
+ expiration: 1h # Shorter expiration, these change often
+
+- url: /static
+ static_dir: static
+
+- url: .*
+ script: main.py
diff --git a/extras/appengine/examples/customers.sql b/extras/appengine/examples/customers.sql
new file mode 100644
index 0000000..8b73850
--- /dev/null
+++ b/extras/appengine/examples/customers.sql
@@ -0,0 +1 @@
+USE mydatabase;SELECT orders.customer, orders.day_of_order, orders.product, orders.quantity as number_ordered, inventory.quantity as number_instock, inventory.price FROM orders JOIN inventory ON orders.product = inventory.product; \ No newline at end of file
diff --git a/extras/appengine/examples/multiple_inserts.sql b/extras/appengine/examples/multiple_inserts.sql
new file mode 100644
index 0000000..cf49d5d
--- /dev/null
+++ b/extras/appengine/examples/multiple_inserts.sql
@@ -0,0 +1 @@
+insert into customer (id, name) values (1, 'John');insert into customer (id, name) values (2, 'Jack');insert into customer (id, name) values (3, 'Jane');insert into customer (id, name) values (4, 'Jim');insert into customer (id, name) values (5, 'Jerry');insert into customer (id, name) values (1, 'Joe'); \ No newline at end of file
diff --git a/extras/appengine/examples/pg_view.sql b/extras/appengine/examples/pg_view.sql
new file mode 100644
index 0000000..edf9f06
--- /dev/null
+++ b/extras/appengine/examples/pg_view.sql
@@ -0,0 +1 @@
+SELECT DISTINCT (current_database())::information_schema.sql_identifier AS view_catalog, (nv.nspname)::information_schema.sql_identifier AS view_schema, (v.relname)::information_schema.sql_identifier AS view_name, (current_database())::information_schema.sql_identifier AS table_catalog, (nt.nspname)::information_schema.sql_identifier AS table_schema, (t.relname)::information_schema.sql_identifier AS table_name FROM pg_namespace nv, pg_class v, pg_depend dv, pg_depend dt, pg_class t, pg_namespace nt WHERE ((((((((((((((nv.oid = v.relnamespace) AND (v.relkind = 'v'::"char")) AND (v.oid = dv.refobjid)) AND (dv.refclassid = ('pg_class'::regclass)::oid)) AND (dv.classid = ('pg_rewrite'::regclass)::oid)) AND (dv.deptype = 'i'::"char")) AND (dv.objid = dt.objid)) AND (dv.refobjid <> dt.refobjid)) AND (dt.classid = ('pg_rewrite'::regclass)::oid)) AND (dt.refclassid = ('pg_class'::regclass)::oid)) AND (dt.refobjid = t.oid)) AND (t.relnamespace = nt.oid)) AND (t.relkind = ANY (ARRAY['r'::"char", 'v'::"char"]))) AND pg_has_role(t.relowner, 'USAGE'::text)) ORDER BY (current_database())::information_schema.sql_identifier, (nv.nspname)::information_schema.sql_identifier, (v.relname)::information_schema.sql_identifier, (current_database())::information_schema.sql_identifier, (nt.nspname)::information_schema.sql_identifier, (t.relname)::information_schema.sql_identifier;
diff --git a/extras/appengine/examples/subquery.sql b/extras/appengine/examples/subquery.sql
new file mode 100644
index 0000000..dd4bbc1
--- /dev/null
+++ b/extras/appengine/examples/subquery.sql
@@ -0,0 +1 @@
+select sum(a1.Sales) from Store_Information a1 where a1.Store_name in (select store_name from Geography a2 where a2.store_name = a1.store_name); \ No newline at end of file
diff --git a/extras/appengine/examples/subquery2.sql b/extras/appengine/examples/subquery2.sql
new file mode 100644
index 0000000..6c00a87
--- /dev/null
+++ b/extras/appengine/examples/subquery2.sql
@@ -0,0 +1 @@
+select user_id, count(*) as how_many from bboard where not exists (select 1 from bboard_authorized_maintainers bam where bam.user_id = bboard.user_id) and posting_time + 60 > sysdate group by user_id order by how_many desc; \ No newline at end of file
diff --git a/extras/appengine/index.yaml b/extras/appengine/index.yaml
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/index.yaml
diff --git a/extras/appengine/main.py b/extras/appengine/main.py
new file mode 100644
index 0000000..d0a8418
--- /dev/null
+++ b/extras/appengine/main.py
@@ -0,0 +1,131 @@
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Main program for Rietveld.
+
+This is also a template for running a Django app under Google App
+Engine, especially when using a newer version of Django than provided
+in the App Engine standard library.
+
+The site-specific code is all in other files: urls.py, models.py,
+views.py, settings.py.
+"""
+
+# Standard Python imports.
+import os
+import sys
+import logging
+
+# Log a message each time this module get loaded.
+logging.info('Loading %s, app version = %s',
+ __name__, os.getenv('CURRENT_VERSION_ID'))
+
+# Delete the preloaded copy of Django.
+for key in [key for key in sys.modules if key.startswith('django')]:
+ del sys.modules[key]
+
+os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
+
+# Force sys.path to have our own directory first, so we can import from it.
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+# Import Django from a zipfile.
+sys.path.insert(0, os.path.abspath('django.zip'))
+
+# Fail early if we can't import Django. Log identifying information.
+import django
+logging.info('django.__file__ = %r, django.VERSION = %r',
+ django.__file__, django.VERSION)
+assert django.VERSION[0] >= 1, "This Django version is too old"
+
+# AppEngine imports.
+from google.appengine.ext.webapp import util
+
+
+# Helper to enter the debugger. This passes in __stdin__ and
+# __stdout__, because stdin and stdout are connected to the request
+# and response streams. You must import this from __main__ to use it.
+# (I tried to make it universally available via __builtin__, but that
+# doesn't seem to work for some reason.)
+def BREAKPOINT():
+ import pdb
+ p = pdb.Pdb(None, sys.__stdin__, sys.__stdout__)
+ p.set_trace()
+
+
+# Custom Django configuration.
+from django.conf import settings
+settings._target = None
+
+# Import various parts of Django.
+import django.core.handlers.wsgi
+import django.core.signals
+import django.db
+import django.dispatch.dispatcher
+import django.forms
+
+# Work-around to avoid warning about django.newforms in djangoforms.
+django.newforms = django.forms
+
+
+def log_exception(*args, **kwds):
+ """Django signal handler to log an exception."""
+ cls, err = sys.exc_info()[:2]
+ logging.exception('Exception in request: %s: %s', cls.__name__, err)
+
+
+# Log all exceptions detected by Django.
+django.core.signals.got_request_exception.connect(log_exception)
+
+# Unregister Django's default rollback event handler.
+django.core.signals.got_request_exception.disconnect(
+ django.db._rollback_on_exception)
+
+
+def real_main():
+ """Main program."""
+ # Create a Django application for WSGI.
+ application = django.core.handlers.wsgi.WSGIHandler()
+ # Run the WSGI CGI handler with that application.
+ util.run_wsgi_app(application)
+
+
+def profile_main():
+ """Main program for profiling."""
+ import cProfile
+ import pstats
+ import StringIO
+
+ prof = cProfile.Profile()
+ prof = prof.runctx('real_main()', globals(), locals())
+ stream = StringIO.StringIO()
+ stats = pstats.Stats(prof, stream=stream)
+ # stats.strip_dirs() # Don't; too many modules are named __init__.py.
+ stats.sort_stats('time') # 'time', 'cumulative' or 'calls'
+ stats.print_stats() # Optional arg: how many to print
+ # The rest is optional.
+ # stats.print_callees()
+ # stats.print_callers()
+ print '\n<hr>'
+ print '<h1>Profile</h1>'
+ print '<pre>'
+ print stream.getvalue()[:1000000]
+ print '</pre>'
+
+# Set this to profile_main to enable profiling.
+main = real_main
+
+
+if __name__ == '__main__':
+ main()
diff --git a/extras/appengine/make_release.sh b/extras/appengine/make_release.sh
new file mode 100755
index 0000000..adb8a5c
--- /dev/null
+++ b/extras/appengine/make_release.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+# Script to create a "release" subdirectory. This is a subdirectory
+# containing a bunch of symlinks, from which the app can be updated.
+# The main reason for this is to import Django from a zipfile, which
+# saves dramatically in upload time: statting and computing the SHA1
+# for 1000s of files is slow. Even if most of those files don't
+# actually need to be uploaded, they still add to the work done for
+# each update.
+
+ZIPFILE=django.zip
+RELEASE=release
+FILES="app.yaml index.yaml __init__.py main.py settings.py"
+DIRS="static templates sqlparse pygments sqlformat examples"
+
+# Remove old $ZIPFILE file.
+rm -rf $ZIPFILE
+
+# Create new $ZIPFILE file.
+# We prune:
+# - .svn subdirectories for obvious reasons.
+# - contrib/gis/ and related files because it's huge and unneeded.
+# - *.po and *.mo files because they are bulky and unneeded.
+# - *.pyc and *.pyo because they aren't used by App Engine anyway.
+zip -q $ZIPFILE `find django/ \
+ -name .svn -prune -o \
+ -name gis -prune -o \
+ -name admin -prune -o \
+ -name localflavor -prune -o \
+ -name mysql -prune -o \
+ -name mysql_old -prune -o \
+ -name oracle -prune -o \
+ -name postgresql-prune -o \
+ -name postgresql_psycopg2 -prune -o \
+ -name sqlite3 -prune -o \
+ -name test -prune -o \
+ -type f ! -name \*.py[co] ! -name \*.[pm]o -print`
+
+# Remove old $RELEASE directory.
+rm -rf $RELEASE
+
+# Create new $RELEASE directory.
+mkdir $RELEASE
+
+# Create symbolic links.
+for x in $FILES $DIRS $ZIPFILE
+do
+ ln -s ../$x $RELEASE/$x
+done
diff --git a/extras/appengine/settings.py b/extras/appengine/settings.py
new file mode 100644
index 0000000..fad6bb5
--- /dev/null
+++ b/extras/appengine/settings.py
@@ -0,0 +1,37 @@
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Minimal Django settings."""
+
+import os
+
+APPEND_SLASH = False
+DEBUG = os.environ['SERVER_SOFTWARE'].startswith('Dev')
+INSTALLED_APPS = (
+ 'sqlformat',
+)
+MIDDLEWARE_CLASSES = (
+ 'django.middleware.common.CommonMiddleware',
+ 'django.middleware.http.ConditionalGetMiddleware',
+# 'codereview.middleware.AddUserToRequestMiddleware',
+)
+ROOT_URLCONF = 'sqlformat.urls'
+TEMPLATE_CONTEXT_PROCESSORS = ()
+TEMPLATE_DEBUG = DEBUG
+TEMPLATE_DIRS = (
+ os.path.join(os.path.dirname(__file__), 'templates'),
+ )
+TEMPLATE_LOADERS = (
+ 'django.template.loaders.filesystem.load_template_source',
+ )
diff --git a/extras/appengine/sqlformat/__init__.py b/extras/appengine/sqlformat/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/extras/appengine/sqlformat/__init__.py
diff --git a/extras/appengine/sqlformat/urls.py b/extras/appengine/sqlformat/urls.py
new file mode 100644
index 0000000..c83290e
--- /dev/null
+++ b/extras/appengine/sqlformat/urls.py
@@ -0,0 +1,11 @@
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns(
+ 'sqlformat.views',
+ (r'^$', 'index'),
+ (r'^source/$', 'source'),
+ (r'^about/$', 'about'),
+ (r'^api/$', 'api'),
+ (r'^format/$', 'format'),
+ (r'^load_example', 'load_example'),
+)
diff --git a/extras/appengine/sqlformat/views.py b/extras/appengine/sqlformat/views.py
new file mode 100644
index 0000000..d135c44
--- /dev/null
+++ b/extras/appengine/sqlformat/views.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import md5
+import os
+import time
+
+from django import forms
+from django.http import HttpResponse
+from django.shortcuts import render_to_response
+from django.utils import simplejson as json
+
+from google.appengine.api import users
+
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import SqlLexer, PythonLexer, PhpLexer
+
+import sqlparse
+
+
+INITIAL_SQL = "select * from foo join bar on val1 = val2 where id = 123;"
+EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), '../examples')
+
+def _get_user_image(user):
+ if user is None:
+ return None
+ digest = md5.new(user.email().lower()).hexdigest()
+ if os.environ['SERVER_SOFTWARE'].startswith('Dev'):
+ host = 'localhost%3A8080'
+ else:
+ host = 'sqlformat.appspot.com'
+ default = 'http%3A%2F%2F'+host+'%2Fstatic%2Fblank.gif'
+ return 'http://gravatar.com/avatar/%s?s=32&d=%s' % (digest, default)
+
+def _get_examples():
+ fnames = os.listdir(EXAMPLES_DIR)
+ fnames.sort()
+ return fnames
+
+
+class FormOptions(forms.Form):
+ data = forms.CharField(widget=forms.Textarea({'class': 'resizable'}),
+ initial=INITIAL_SQL, required=False)
+ datafile = forms.FileField(required=False)
+ highlight = forms.BooleanField(initial=True, required=False,
+ widget=forms.CheckboxInput(),
+ label='Enable syntax highlighting')
+ remove_comments = forms.BooleanField(initial=False, required=False,
+ widget=forms.CheckboxInput(),
+ label='Remove comments')
+ keyword_case = forms.CharField(
+ widget=forms.Select(choices=(('', 'Unchanged'),
+ ('lower', 'Lower case'),
+ ('upper', 'Upper case'),
+ ('capitalize', 'Capitalize'))),
+ required=False, initial='upper', label='Keywords')
+ identifier_case = forms.CharField(
+ widget=forms.Select(choices=(('', 'Unchanged'),
+ ('lower', 'Lower case'),
+ ('upper', 'Upper case'),
+ ('capitalize', 'Capitalize'))),
+ required=False, initial='', label='Identifiers')
+ n_indents = forms.IntegerField(min_value=0, max_value=30,
+ initial=2, required=False,
+ label='spaces',
+ widget=forms.TextInput({'size': 2,
+ 'maxlength': 2}))
+# right_margin = forms.IntegerField(min_value=10, max_value=500,
+# initial=60, required=False,
+# label='characters',
+# widget=forms.TextInput({'size': 3,
+# 'maxlength': 3}))
+ output_format = forms.CharField(
+ widget=forms.Select(choices=(('sql', 'SQL'),
+ ('python', 'Python'),
+ ('php', 'PHP'),
+ )),
+ required=False, initial='sql', label='Language')
+
+ def clean(self):
+ super(FormOptions, self).clean()
+ data = self.cleaned_data.get('data')
+ logging.info(self.files)
+ if 'datafile' in self.files:
+ self._datafile = self.files['datafile'].read()
+ else:
+ self._datafile = None
+ if not data and not self._datafile:
+ raise forms.ValidationError('Whoops, I need a file or text!')
+ elif data and self._datafile:
+ raise forms.ValidationError('Whoops, I need a file OR text!')
+ return self.cleaned_data
+
+ def get_data(self):
+ data = self.cleaned_data.get('data')
+ if self._datafile:
+ return self._datafile
+ else:
+ return data
+
+
+def format_sql(form, format='html'):
+ data = form.cleaned_data
+ popts = {}
+ sql = form.get_data()
+ if data.get('remove_comments'):
+ popts['strip_comments'] = True
+ if data.get('keyword_case'):
+ popts['keyword_case'] = data.get('keyword_case')
+ if data.get('identifier_case'):
+ popts['identifier_case'] = data.get('identifier_case')
+ if data.get('n_indents', None) is not None:
+ popts['reindent'] = True
+ popts['indent_width'] = data.get('n_indents')
+ if data.get('right_margin', None) is not None:
+ popts['right_margin'] = data.get('right_margin')
+ if data.get('output_format', None) is not None:
+ popts['output_format'] = data.get('output_format')
+ sql = sqlparse.format(sql, **popts)
+ if format in ('html', 'json'):
+ if data.get('highlight', False):
+ if popts['output_format'] == 'python':
+ lexer = PythonLexer()
+ elif popts['output_format'] == 'php':
+ lexer = PhpLexer()
+ else:
+ lexer = SqlLexer()
+ sql = highlight(sql, lexer, HtmlFormatter())
+ else:
+ sql = ('<textarea class="resizable" '
+ 'style="height: 350px; margin-top: 1em;">%s</textarea>'
+ % sql)
+ return sql
+
+
+def index(request):
+ output = None
+ data = {}
+ proc_time = None
+ if request.method == 'POST':
+ logging.debug(request.POST)
+ form = FormOptions(request.POST, request.FILES)
+ if form.is_valid():
+ start = time.time()
+ output = format_sql(form,
+ format=request.POST.get('format', 'html'))
+ proc_time = time.time()-start
+ else:
+ form = FormOptions()
+ if request.POST.get('format', None) == 'json':
+ logging.warning(form.errors)
+ data['errors'] = str(form.errors)
+ data['output'] = output
+ logging.info('%r', proc_time)
+ data['proc_time'] = '%.3f' % proc_time or 0.0
+ data = json.dumps(data)
+ return HttpResponse(data, content_type='text/x-json')
+ elif request.POST.get('format', None) == 'text':
+ if not form.is_valid():
+ data = str(form.errors) # XXX convert to plain text
+ else:
+ data = output
+ return HttpResponse(data, content_type='text/plain')
+ return render_to_response('index.html',
+ {'form': form, 'output': output,
+ 'proc_time': proc_time and '%.3f' % proc_time or None,
+ 'user': users.get_current_user(),
+ 'login_url': users.create_login_url('/'),
+ 'logout_url': users.create_logout_url('/'),
+ 'userimg': _get_user_image(users.get_current_user()),
+ 'examples': _get_examples()})
+
+
+def format(request):
+ if request.method == 'POST':
+ form = FormOptions(request.POST)
+ if form.is_valid():
+ response = format_sql(form, format='text')
+ else:
+ response = 'ERROR: %s' % str(form.errors)
+ else:
+ response = 'POST request required'
+ return HttpResponse(response, content_type='text/plain')
+
+def source(request):
+ return render_to_response('source.html')
+
+def about(request):
+ return render_to_response('about.html')
+
+def api(request):
+ return render_to_response('api.html')
+
+def load_example(request):
+ fname = request.POST.get('fname')
+ if fname is None:
+ answer = 'Uups, I\'ve got no filename...'
+ elif fname not in _get_examples():
+ answer = 'Hmm, I think you don\'t want to do that.'
+ else:
+ answer = open(os.path.join(EXAMPLES_DIR, fname)).read()
+ data = json.dumps({'answer': answer})
+ return HttpResponse(data, content_type='text/x-json')
diff --git a/extras/appengine/static/bg_options.png b/extras/appengine/static/bg_options.png
new file mode 100644
index 0000000..bc1a6ed
--- /dev/null
+++ b/extras/appengine/static/bg_options.png
Binary files differ
diff --git a/extras/appengine/static/bgfieldset.png b/extras/appengine/static/bgfieldset.png
new file mode 100644
index 0000000..4d55f4a
--- /dev/null
+++ b/extras/appengine/static/bgfieldset.png
Binary files differ
diff --git a/extras/appengine/static/bgfooter.png b/extras/appengine/static/bgfooter.png
new file mode 100644
index 0000000..9ce5bdd
--- /dev/null
+++ b/extras/appengine/static/bgfooter.png
Binary files differ
diff --git a/extras/appengine/static/bgtop.png b/extras/appengine/static/bgtop.png
new file mode 100644
index 0000000..a0d4709
--- /dev/null
+++ b/extras/appengine/static/bgtop.png
Binary files differ
diff --git a/extras/appengine/static/blank.gif b/extras/appengine/static/blank.gif
new file mode 100644
index 0000000..3be2119
--- /dev/null
+++ b/extras/appengine/static/blank.gif
Binary files differ
diff --git a/extras/appengine/static/canvas.html b/extras/appengine/static/canvas.html
new file mode 100644
index 0000000..ab642d0
--- /dev/null
+++ b/extras/appengine/static/canvas.html
@@ -0,0 +1,114 @@
+<html>
+
+<head>
+<style type="text/css">
+ /*
+ These styles are customizable.
+ Provide .canvas-gadget (the div that holds the canvas mode gadget)
+ at least 500px width so that the gadget has sufficient screen real estate
+*/
+body {
+ margin: 0;
+ font-family:arial, sans-serif;
+ text-align:center;
+}
+.container {
+ width:652px;
+ margin:0 auto;
+ text-align:left;
+}
+.fc-sign-in-header {
+ text-align:left;
+ font-size: 13px;
+ padding:3px 10px;
+ border-bottom:1px solid #000000;
+}
+.signin {
+ text-align:left;
+ float:right;
+ font-size: 13px;
+ height: 32px;
+}
+.go-back {
+ text-align:left;
+ margin:5px auto 15px auto;
+}
+.go-back a, .go-back a:visited {
+ font-weight:bold;
+}
+.canvas-gadget {
+ text-align:left;
+ width:650px; /* ALLOW AT LEAST 500px WIDTH*/
+ margin:10px auto 10px auto;
+ border:1px solid #cccccc;
+}
+.site-header {
+ margin-top: 10px;
+}
+.section-title {
+ font-size: 2em;
+}
+.clear {
+ clear:both;
+ font-size:1px;
+ height:1px;
+ line-height:0;
+ margin:0;
+ padding:0;
+}
+</style>
+<script type="text/javascript" src="http://www.google.com/friendconnect/script/friendconnect.js"></script>
+</head>
+<body>
+<div class="container">
+ <div class="fc-sign-in-header">
+ <!--REQUIRED SO VISITORS CAN SIGN IN-->
+ <div class="signin" id="gadget-signin"></div>
+ <script type="text/javascript">
+ var skin = {};
+ skin['BORDER_COLOR'] = '#cccccc';
+ skin['ENDCAP_BG_COLOR'] = '#e0ecff';
+ skin['ENDCAP_TEXT_COLOR'] = '#333333';
+ skin['ENDCAP_LINK_COLOR'] = '#0000cc';
+ skin['ALTERNATE_BG_COLOR'] = '#ffffff';
+ skin['CONTENT_BG_COLOR'] = '#ffffff';
+ skin['CONTENT_LINK_COLOR'] = '#0000cc';
+ skin['CONTENT_TEXT_COLOR'] = '#333333';
+ skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc';
+ skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666';
+ skin['CONTENT_HEADLINE_COLOR'] = '#333333';
+ skin['ALIGNMENT'] = 'right';
+ google.friendconnect.container.renderCanvasSignInGadget({'id': 'gadget-signin'}, skin);
+ </script>
+ <!--END REQUIRED-->
+ <div class="clear"></div>
+ </div>
+
+ <div class="site-header"><span class="section-title">Site Name</span></div>
+ <div class="go-back">
+ <!--REQUIRED SO VISITORS CAN RETURN TO REFERRING PAGE-->
+ <a href="javascript:google.friendconnect.container.goBackToSite();">
+ &lsaquo;&lsaquo; Return home</a>
+ <!--END REQUIRED-->
+ </div>
+ <!-- REQUIRED - THIS IS WHERE THE GADGET IS PRESENTED. ALLOW AT LEAST 500px WIDTH -->
+ <div id="gadget-canvas" class="canvas-gadget"></div>
+ <script type="text/javascript">
+ var skin = {};
+ skin['BORDER_COLOR'] = '#cccccc';
+ skin['ENDCAP_BG_COLOR'] = '#e0ecff';
+ skin['ENDCAP_TEXT_COLOR'] = '#333333';
+ skin['ENDCAP_LINK_COLOR'] = '#0000cc';
+ skin['ALTERNATE_BG_COLOR'] = '#ffffff';
+ skin['CONTENT_BG_COLOR'] = '#ffffff';
+ skin['CONTENT_LINK_COLOR'] = '#0000cc';
+ skin['CONTENT_TEXT_COLOR'] = '#333333';
+ skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc';
+ skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666';
+ skin['CONTENT_HEADLINE_COLOR'] = '#333333';
+ google.friendconnect.container.renderUrlCanvasGadget({'id': 'gadget-canvas'}, skin);
+ </script>
+ <!--END REQUIRED-->
+</div>
+</body>
+</html> \ No newline at end of file
diff --git a/extras/appengine/static/hotkeys.js b/extras/appengine/static/hotkeys.js
new file mode 100644
index 0000000..0e62a92
--- /dev/null
+++ b/extras/appengine/static/hotkeys.js
@@ -0,0 +1 @@
+(function(B){B.fn.__bind__=B.fn.bind;B.fn.__unbind__=B.fn.unbind;B.fn.__find__=B.fn.find;var A={version:"0.7.8",override:/keydown|keypress|keyup/g,triggersMap:{},specialKeys:{27:"esc",9:"tab",32:"space",13:"return",8:"backspace",145:"scroll",20:"capslock",144:"numlock",19:"pause",45:"insert",36:"home",46:"del",35:"end",33:"pageup",34:"pagedown",37:"left",38:"up",39:"right",40:"down",112:"f1",113:"f2",114:"f3",115:"f4",116:"f5",117:"f6",118:"f7",119:"f8",120:"f9",121:"f10",122:"f11",123:"f12"},shiftNums:{"`":"~","1":"!","2":"@","3":"#","4":"$","5":"%","6":"^","7":"&","8":"*","9":"(","0":")","-":"_","=":"+",";":":","'":'"',",":"<",".":">","/":"?","\\":"|"},newTrigger:function(E,D,F){var C={};C[E]={};C[E][D]={cb:F,disableInInput:false};return C}};if(B.browser.mozilla){A.specialKeys=B.extend(A.specialKeys,{96:"0",97:"1",98:"2",99:"3",100:"4",101:"5",102:"6",103:"7",104:"8",105:"9"})}B.fn.find=function(C){this.query=C;return B.fn.__find__.apply(this,arguments)};B.fn.unbind=function(H,E,G){if(B.isFunction(E)){G=E;E=null}if(E&&typeof E==="string"){var F=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();var D=H.split(" ");for(var C=0;C<D.length;C++){delete A.triggersMap[F][D[C]][E]}}return this.__unbind__(H,G)};B.fn.bind=function(J,F,K){var H=J.match(A.override);if(B.isFunction(F)||!H){return this.__bind__(J,F,K)}else{var N=null,I=B.trim(J.replace(A.override,""));if(I){N=this.__bind__(I,F,K)}if(typeof F==="string"){F={combi:F}}if(F.combi){for(var M=0;M<H.length;M++){var D=H[M];var G=F.combi.toLowerCase(),E=A.newTrigger(D,G,K),L=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();E[D][G].disableInInput=F.disableInInput;if(!A.triggersMap[L]){A.triggersMap[L]=E}else{if(!A.triggersMap[L][D]){A.triggersMap[L][D]=E[D]}}var C=A.triggersMap[L][D][G];if(!C){A.triggersMap[L][D][G]=[E[D][G]]}else{if(C.constructor!==Array){A.triggersMap[L][D][G]=[C]}else{A.triggersMap[L][D][G][C.length]=E[D][G]}}this.each(function(){var O=B(this);if(O.attr("hkId")&&O.attr("hkId")!==L){L=O.attr("hkId")+";"+L}O.attr("hkId",L)});N=this.__bind__(H.join(" "),F,A.handler)}}return N}};A.findElement=function(C){if(!B(C).attr("hkId")){if(B.browser.opera||B.browser.safari){while(!B(C).attr("hkId")&&C.parentNode){C=C.parentNode}}}return C};A.handler=function(E){var O=A.findElement(E.currentTarget),I=B(O),D=I.attr("hkId");if(D){D=D.split(";");var G=E.which,Q=E.type,P=A.specialKeys[G],N=!P&&String.fromCharCode(G).toLowerCase(),H=E.shiftKey,C=E.ctrlKey,M=E.altKey||E.originalEvent.altKey,F=null;for(var R=0;R<D.length;R++){if(A.triggersMap[D[R]][Q]){F=A.triggersMap[D[R]][Q];break}}if(F){var J;if(!H&&!C&&!M){J=F[P]||(N&&F[N])}else{var L="";if(M){L+="alt+"}if(C){L+="ctrl+"}if(H){L+="shift+"}J=F[L+P];if(!J){if(N){J=F[L+N]||F[L+A.shiftNums[N]]||(L==="shift+"&&F[A.shiftNums[N]])}}}if(J){var S=false;for(var R=0;R<J.length;R++){if(J[R].disableInInput){var K=B(E.target);if(I.is("input")||I.is("textarea")||K.is("input")||K.is("textarea")){return true}}S=S||J[R].cb.apply(this,[E])}return S}}}};window.hotkeys=A;return B})(jQuery); \ No newline at end of file
diff --git a/extras/appengine/static/img_loading.gif b/extras/appengine/static/img_loading.gif
new file mode 100644
index 0000000..6465823
--- /dev/null
+++ b/extras/appengine/static/img_loading.gif
Binary files differ
diff --git a/extras/appengine/static/jquery.textarearesizer.compressed.js b/extras/appengine/static/jquery.textarearesizer.compressed.js
new file mode 100644
index 0000000..5464ae6
--- /dev/null
+++ b/extras/appengine/static/jquery.textarearesizer.compressed.js
@@ -0,0 +1 @@
+(function($){var textarea,staticOffset;var iLastMousePos=0;var iMin=32;var grip;$.fn.TextAreaResizer=function(){return this.each(function(){textarea=$(this).addClass('processed'),staticOffset=null;$(this).wrap('<div class="resizable-textarea"><span></span></div>').parent().append($('<div class="grippie"></div>').bind("mousedown",{el:this},startDrag));var grippie=$('div.grippie',$(this).parent())[0];grippie.style.marginRight=(grippie.offsetWidth-$(this)[0].offsetWidth)+'px'})};function startDrag(e){textarea=$(e.data.el);textarea.blur();iLastMousePos=mousePosition(e).y;staticOffset=textarea.height()-iLastMousePos;textarea.css('opacity',0.25);$(document).mousemove(performDrag).mouseup(endDrag);return false}function performDrag(e){var iThisMousePos=mousePosition(e).y;var iMousePos=staticOffset+iThisMousePos;if(iLastMousePos>=(iThisMousePos)){iMousePos-=5}iLastMousePos=iThisMousePos;iMousePos=Math.max(iMin,iMousePos);textarea.height(iMousePos+'px');if(iMousePos<iMin){endDrag(e)}return false}function endDrag(e){$(document).unbind('mousemove',performDrag).unbind('mouseup',endDrag);textarea.css('opacity',1);textarea.focus();textarea=null;staticOffset=null;iLastMousePos=0}function mousePosition(e){return{x:e.clientX+document.documentElement.scrollLeft,y:e.clientY+document.documentElement.scrollTop}}})(jQuery); \ No newline at end of file
diff --git a/extras/appengine/static/loading.gif b/extras/appengine/static/loading.gif
new file mode 100644
index 0000000..a879bed
--- /dev/null
+++ b/extras/appengine/static/loading.gif
Binary files differ
diff --git a/extras/appengine/static/lynx_screenshot.png b/extras/appengine/static/lynx_screenshot.png
new file mode 100644
index 0000000..d1592ac
--- /dev/null
+++ b/extras/appengine/static/lynx_screenshot.png
Binary files differ
diff --git a/extras/appengine/static/pygments.css b/extras/appengine/static/pygments.css
new file mode 100644
index 0000000..da02807
--- /dev/null
+++ b/extras/appengine/static/pygments.css
@@ -0,0 +1,59 @@
+.c { color: #408080; font-style: italic } /* Comment */
+.err { border: 1px solid #FF0000 } /* Error */
+.k { color: #008000; font-weight: bold } /* Keyword */
+.o { color: #666666 } /* Operator */
+.cm { color: #408080; font-style: italic } /* Comment.Multiline */
+.cp { color: #BC7A00 } /* Comment.Preproc */
+.c1 { color: #408080; font-style: italic } /* Comment.Single */
+.cs { color: #408080; font-style: italic } /* Comment.Special */
+.gd { color: #A00000 } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #FF0000 } /* Generic.Error */
+.gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.gi { color: #00A000 } /* Generic.Inserted */
+.go { color: #808080 } /* Generic.Output */
+.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.gt { color: #0040D0 } /* Generic.Traceback */
+.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
+.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
+.kp { color: #008000 } /* Keyword.Pseudo */
+.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
+.kt { color: #B00040 } /* Keyword.Type */
+.m { color: #666666 } /* Literal.Number */
+.s { color: #BA2121 } /* Literal.String */
+.na { color: #7D9029 } /* Name.Attribute */
+.nb { color: #008000 } /* Name.Builtin */
+.nc { color: #0000FF; font-weight: bold } /* Name.Class */
+.no { color: #880000 } /* Name.Constant */
+.nd { color: #AA22FF } /* Name.Decorator */
+.ni { color: #999999; font-weight: bold } /* Name.Entity */
+.ne { color: #D2413A; font-weight: bold } /* Name.Exception */
+.nf { color: #0000FF } /* Name.Function */
+.nl { color: #A0A000 } /* Name.Label */
+.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
+.nt { color: #008000; font-weight: bold } /* Name.Tag */
+.nv { color: #19177C } /* Name.Variable */
+.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
+.w { color: #bbbbbb } /* Text.Whitespace */
+.mf { color: #666666 } /* Literal.Number.Float */
+.mh { color: #666666 } /* Literal.Number.Hex */
+.mi { color: #666666 } /* Literal.Number.Integer */
+.mo { color: #666666 } /* Literal.Number.Oct */
+.sb { color: #BA2121 } /* Literal.String.Backtick */
+.sc { color: #BA2121 } /* Literal.String.Char */
+.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
+.s2 { color: #BA2121 } /* Literal.String.Double */
+.se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
+.sh { color: #BA2121 } /* Literal.String.Heredoc */
+.si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
+.sx { color: #008000 } /* Literal.String.Other */
+.sr { color: #BB6688 } /* Literal.String.Regex */
+.s1 { color: #BA2121 } /* Literal.String.Single */
+.ss { color: #19177C } /* Literal.String.Symbol */
+.bp { color: #008000 } /* Name.Builtin.Pseudo */
+.vc { color: #19177C } /* Name.Variable.Class */
+.vg { color: #19177C } /* Name.Variable.Global */
+.vi { color: #19177C } /* Name.Variable.Instance */
+.il { color: #666666 } /* Literal.Number.Integer.Long */
diff --git a/extras/appengine/static/resize-grip.png b/extras/appengine/static/resize-grip.png
new file mode 100644
index 0000000..cae2a4e
--- /dev/null
+++ b/extras/appengine/static/resize-grip.png
Binary files differ
diff --git a/extras/appengine/static/robots.txt b/extras/appengine/static/robots.txt
new file mode 100644
index 0000000..c033917
--- /dev/null
+++ b/extras/appengine/static/robots.txt
@@ -0,0 +1,8 @@
+# Directions for web crawlers.
+# See http://www.robotstxt.org/wc/norobots.html.
+
+User-agent: HTTrack
+User-agent: puf
+User-agent: MSIECrawler
+User-agent: Nutch
+Disallow: /
diff --git a/extras/appengine/static/rpc_relay.html b/extras/appengine/static/rpc_relay.html
new file mode 100644
index 0000000..c602043
--- /dev/null
+++ b/extras/appengine/static/rpc_relay.html
@@ -0,0 +1 @@
+<html><head><script type="text/javascript" src="http://www.google.com/friendconnect/script/rpc_relay.js"></script></head></html> \ No newline at end of file
diff --git a/extras/appengine/static/script.js b/extras/appengine/static/script.js
new file mode 100644
index 0000000..71bbabb
--- /dev/null
+++ b/extras/appengine/static/script.js
@@ -0,0 +1,103 @@
+var initialized = false;
+
+function update_output() {
+ data = {}
+ data.data = $('#id_data').val();
+ data.format = 'json';
+ if ( $('#id_remove_comments').attr('checked') ) {
+ data.remove_comments = 1
+ }
+ if ( $('#id_highlight').attr('checked') ) { data.highlight = 1 }
+ data.keyword_case = $('#id_keyword_case').val();
+ data.identifier_case = $('#id_identifier_case').val();
+ data.n_indents = $('#id_n_indents').val();
+ data.right_margin = $('#id_right_margin').val();
+ data.output_format = $('#id_output_format').val();
+ form = document.getElementById('form_options');
+ $(form.elements).attr('disabled', 'disabled');
+ $('#response').addClass('loading');
+ $.post('/', data,
+ function(data) {
+ if ( data.output ) {
+ $('#response').html(data.output);
+ proc_time = 'Processed in '+data.proc_time+' seconds.';
+ } else {
+ $('#response').html('An error occured: '+data.errors);
+ proc_time = '';
+ }
+ $('#proc_time').html(proc_time);
+ $(form.elements).each( function(idx) {
+ obj = $(this);
+ if ( ! obj.is('.keep-disabled') ) {
+ obj.removeAttr('disabled');
+ }
+ });
+ $('#response').removeClass('loading');
+ }, 'json');
+ return false;
+}
+
+function toggle_fieldset(event) {
+ id = $(this).attr('id');
+ $('#'+id+'_content').slideDown();
+ $('legend').each(function(idx) {
+ obj = $('#'+this.id+'_content');
+ if ( this.id != id ) {
+ obj.slideUp();
+ }
+ });
+}
+
+
+function textarea_grab_focus(evt) {
+ evt.stopPropagation();
+ evt.preventDefault();
+ $('#id_data').focus();
+ return false;
+}
+
+
+function show_help() {
+ $('#help').toggle();
+ return false;
+}
+
+
+function hide_help() {
+ $('#help').hide();
+ return false;
+}
+
+function load_example() {
+ fname = $('#sel_example').val();
+ data = {fname: fname};
+ $.post('/load_example', data,
+ function(data) {
+ $('#id_data').val(data.answer);
+ }, 'json');
+}
+
+
+function init() {
+ if (initialized) { return }
+ //$('legend').bind('click', toggle_fieldset);
+ // $('legend').each(function(idx) {
+ // obj = $('#'+this.id+'_content');
+ // if ( this.id != 'general' ) {
+ // obj.hide();
+ // }
+ // });
+ $(document).bind('keydown', {combi:'Ctrl+f'},
+ update_output);
+ $('#btn_format').val('Format SQL [Ctrl+F]');
+ $(document).bind('keydown', {combi: 'h', disableInInput: true},
+ show_help);
+ $(document).bind('keydown', 'Esc', hide_help);
+ $(document).bind('keydown', {combi: 't', disableInInput: true},
+ textarea_grab_focus);
+ initialized = true;
+ /* jQuery textarea resizer plugin usage */
+ $(document).ready(function() {
+ $('textarea.resizable:not(.processed)').TextAreaResizer();
+ });
+} \ No newline at end of file
diff --git a/extras/appengine/static/sqlformat_client_example.py b/extras/appengine/static/sqlformat_client_example.py
new file mode 100644
index 0000000..3b3bf0f
--- /dev/null
+++ b/extras/appengine/static/sqlformat_client_example.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import urllib
+import urllib2
+
+payload = (
+ ('data', 'select * from foo join bar on val1 = val2 where id = 123;'),
+ ('format', 'text'),
+ ('keyword_case', 'upper'),
+ ('reindent', True),
+ ('n_indents', 2),
+ )
+
+response = urllib2.urlopen('http://sqlformat.appspot.com/format/',
+ urllib.urlencode(payload))
+print response.read()
+
diff --git a/extras/appengine/static/styles.css b/extras/appengine/static/styles.css
new file mode 100644
index 0000000..41a540a
--- /dev/null
+++ b/extras/appengine/static/styles.css
@@ -0,0 +1,245 @@
+body {
+ color: #000000;
+ background: #eeeeec;
+ font-family: "Free Sans", Arial, Verdana, sans;
+ font-size: 10pt;
+ margin: 0;
+ padding: 0;
+}
+
+#header {
+ background: url(/static/bgtop.png) top left repeat-x;
+ border-bottom: 3px solid #2e3436;
+}
+
+#header-inner, #main-inner, #footer-inner {
+ width: 70em;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+
+#header-inner h1 {
+ margin: 0;
+ padding: 0;
+ margin-bottom: .2em;
+ font-weight: normal;
+ float: left;
+ font-size: 2em;
+ letter-spacing: .07em;
+}
+
+#header-inner .q {
+ color: #f57900;
+ padding-right: 3px;
+}
+
+#header-inner .q2 {
+ font-family: Georgia, "Times New Roman", serif;
+}
+
+#header-inner h1 a {
+ text-decoration: none;
+ color: #eeeeec;
+}
+
+#header-inner #slogan {
+ float: left;
+ color: #babdb6;
+ font-size: 1.4em;
+ margin-left: 1em;
+ letter-spacing: .18em;
+ padding-top: .2em;
+}
+
+
+#topmenu {
+ color: #729fcf;
+ clear: left;
+ padding-top: .5em;
+ padding-bottom: .5em;
+ font-size: 1.1em;
+}
+
+#topmenu a {
+ color: #eeeeec;
+ text-decoration: none;
+}
+
+#topmenu a:hover {
+ color: #ce5c00;
+}
+
+
+#main {
+ padding: 10px;
+ background: white;
+ line-height: 1.5em;
+ text-align: justify;
+}
+
+#main form ul {
+ margin: 0;
+ padding: 0;
+ list-style-type: none;
+}
+
+#main p, #main ol, #main .example, #main dl {
+ font-size: 12pt;
+ margin-left: 2em;
+}
+
+#main dt {
+ font-weight: bold;
+}
+
+#main li {
+ margin-bottom: .7em;
+}
+
+#main a {
+ color: #f57900;
+}
+
+#main h1, h2, h3, h4 {
+ color: #204a87;
+ font-weight: normal;
+ letter-spacing: .05em;
+}
+
+#main pre, #main code.pre {
+ font-size: 10pt;
+ line-height: 1em;
+ padding: 4px;
+ background-color: #eeeeec;
+ border: 1px solid #babdb6;
+}
+
+#input {
+ width: 50em;
+ float: right;
+ margin-left: 2em;
+}
+
+#options {
+ width: 18em;
+ float: left;
+ color: #2e3436;
+ margin-top: .75em;
+ text-align: left;
+}
+
+#options fieldset {
+ border: 1px solid #dddddd;
+ margin-bottom: .6em;
+ background: url(/static/bgfieldset.png) bottom left repeat-x;
+ -moz-border-radius: 3px;
+}
+
+
+#options input, select {
+ border: 1px solid #dddddd;
+}
+
+#options .help {
+ font-size: .9em;
+ color: #888a85;
+ margin-bottom: .6em;
+}
+
+
+#footer {
+ background: url(/static/bgfooter.png) top left repeat-x;
+ padding: 10px;
+ min-height: 80px;
+ border-top: 4px solid #babdb6;
+}
+
+#footer-inner {
+ width: 70em;
+ margin-left: auto;
+ margin-right: auto;
+ color: #888a85;
+}
+
+#footer-inner a {
+ color: #888a85;
+}
+
+#footer-inner a:hover {
+ color: #555753;
+}
+
+.clearfix {
+ clear: both;
+}
+
+.skip {
+ display: none;
+}
+
+textarea {
+ border: 1px solid #cccccc;
+ border-bottom: none;
+ padding: 4px;
+ font-size: 12pt;
+ width: 100%;
+}
+
+textarea:focus {
+ background-color: #eeeeec;
+}
+
+div.grippie {
+ background: url(/static/resize-grip.png) bottom right no-repeat #eeeeec;
+ border-color: #cccccc;
+ border-style: solid;
+ border-width: 0pt 1px 1px;
+ cursor: se-resize;
+ height: 14px;
+ overflow: hidden;
+}
+
+#help {
+ display: none;
+ position: fixed;
+ right: 10%;
+ left: 10%;
+ top: 0;
+ opacity: 0.85;
+ -moz-opacity: 0.85;
+ -khtml-opacity: 0.85;
+ filter: alpha(opacity=85);
+ -moz-border-radius: 0px 0px 10px 10px;
+
+ background: #2e3436;
+ color: white;
+ font-weight: bold;
+
+ padding: 1em;
+ z-index: 1;
+ overflow-x: hidden;
+ overflow-y: auto;
+}
+
+#help .shortcut {
+ color: #f57900;
+ font-weight: bold;
+ width: 20px;
+ display: inline;
+}
+
+.loading {
+ background: url(/static/loading.gif) top left no-repeat;
+}
+
+.dev {
+ color: #cc0000;
+ font-size: .9em;
+ letter-spacing: 1;
+}
+
+#proc_time {
+ color: #888a85;
+ font-size: .85em;
+} \ No newline at end of file
diff --git a/extras/appengine/templates/about.html b/extras/appengine/templates/about.html
new file mode 100644
index 0000000..3222b97
--- /dev/null
+++ b/extras/appengine/templates/about.html
@@ -0,0 +1,44 @@
+{% extends "master.html" %}
+
+{% block main %}
+<h1>About this Application</h1>
+<p>
+ This application is a online SQL formatting tool.
+</p>
+<p>
+ Basically it's a playground for a Python module to parse and format
+ SQL statements. Sometimes it's easier to combine the available
+ options and to see the resulting output using a web front-end than
+ on the command line ;-)
+</p>
+<p>
+ To get started, enter a SQL statement in the text box on the top,
+ choose some options and click on &quot;Format SQL&quot; (Ctrl+F)
+ to see the result.
+</p>
+<p>
+ <em>Note:</em> The SQL formatter and parser is in an early stage
+ of development. If you're looking for a mature tool, try one of
+ <a href="http://www.google.com/search?q=online+sql+formatter">these</a>.
+</p>
+<h2>Using it from the Command Line</h2>
+<p>
+ There are three ways to use this SQL formatter from the command line:
+</p>
+<ol>
+ <li>Grab the <a href="/source/">sources</a> and use the module in your
+ Python scripts.</li>
+ <li>
+ Write a little script in your favorite language that sends a POST
+ request to this application.<br/>
+ Read the <a href="/api/">API Documentation</a> for more information.
+ </li>
+ <li>Use
+ <a href="/static/lynx_screenshot.png"
+ alt="Lynx Screenshot" target="_blank"
+ title="Screenshot: sqlformat.appspot.com on Lynx">Lynx
+ </a>
+ </li>
+</ol>
+
+{% endblock %}
diff --git a/extras/appengine/templates/api.html b/extras/appengine/templates/api.html
new file mode 100644
index 0000000..b9aaae7
--- /dev/null
+++ b/extras/appengine/templates/api.html
@@ -0,0 +1,50 @@
+{% extends "master.html" %}
+
+{% block main %}
+<h1>API Documentation</h1>
+
+<p>
+ Using the API for this application is pretty simple. Just send a
+ <code>POST</code> request to
+</p>
+<p>
+ <code>http://sqlformat.appspot.com/format/</code>
+</p>
+
+<h2>Options</h2>
+<p>
+ The <code>POST</code> request accepts various options to control
+ formatting. Only the <em>data</em> option is required. All others
+ are optional.
+</p>
+
+<dl>
+ <dt>data</dt>
+ <dd>The SQL statement to format.</dd>
+ <dt>remove_comments</dt>
+ <dd>Set to 1 to remove comments.</dd>
+ <dt>keyword_case</dt>
+ <dd>How to convert keywords. Allowed values are 'lower', 'upper',
+ 'capitalize'.</dd>
+ <dt>identifier_case</dt>
+ <dd>How to convert identifiers. Allowed values are 'lower', 'upper',
+ 'capitalize'.</dd>
+ <dt>n_indents</dt>
+ <dd>An integer indicating the indendation depth.</dd>
+ <dt>right_margin</dt>
+ <dd>An integer indicating the maximum line length.</dd>
+ <dt>output_format</dt>
+ <dd>Transfer the statement into another programming language.
+ Allowed values are 'python', 'php'</dd>
+</dl>
+
+<h2>Example</h2>
+<p>
+ Here's a example in Python:
+</p>
+{% include "python-client-example.html" %}
+<p>
+ <a href="/static/sqlformat_client_example.py">Download sqlformat_example_client.py</a>
+</p>
+
+{% endblock %}
diff --git a/extras/appengine/templates/index.html b/extras/appengine/templates/index.html
new file mode 100644
index 0000000..57e7ed3
--- /dev/null
+++ b/extras/appengine/templates/index.html
@@ -0,0 +1,107 @@
+{% extends "master.html" %}
+
+{% block main %}
+
+{% if output %}
+ <a href="#output" class="skip">Jump to formatted query</a>
+{% endif %}
+
+<form method="post" action="" id="form_options" enctype="multipart/form-data">
+ <div id="input">
+ {% if form.non_field_errors %}{{form.non_field_errors}}{% endif %}
+ <div>
+ <strong>Type your SQL here:</strong><br />
+ {{form.data}}
+ {% if form.data.errors %}{{form.data.errors}}{% endif %}
+ </div>
+ <div style="margin-top: .5em;">
+ <strong>...or upload a file:</strong>
+ {{form.datafile}}
+ </div>
+ <div id="examples" style="margin-top: .5em;"></div>
+ <div id="actions" style="margin-top: .5em;">
+ <input type="submit" value="Format SQL" id="btn_format" />
+ </div>
+ {% if output %}<a name="output"></a>
+ <div id="response">{{output|safe}}</div>
+ {% else %}
+ <div id="response"></div>
+ {% endif %}
+ <div id="proc_time">
+ {% if proc_time %}Processed in {{proc_time}} seconds.{% endif %}
+ </div>
+ <div style="margin-top: 1em;">
+ <script type="text/javascript">
+ <!--
+ google_ad_client = "pub-8870624642249726";
+ /* 468x60, Erstellt 07.03.09 */
+ google_ad_slot = "9840041509";
+ google_ad_width = 468;
+ google_ad_height = 60;
+ //-->
+ </script>
+ <script type="text/javascript"
+ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
+ </script>
+ </div>
+
+ </div>
+ <div id="options">
+ <h1 class="skip">Options</h1>
+ <fieldset><legend id="general"><strong>General Options</strong></legend>
+ <div id="general_content" class="content">
+ {{form.remove_comments}}
+ <label for="id_remove_comments">{{form.remove_comments.label}}</label>
+ <br />
+ {{form.highlight}}
+ <label for="id_highlight">{{form.highlight.label}}</label>
+ {% if form.highlight.errors %}
+ <ul class="errors">{{form.highlight.errors</ul>
+ {% endif %}
+ </div>
+ </fieldset>
+ <fieldset><legend id="kwcase">
+ <strong>Keywords &amp; Identifiers</strong></legend>
+ <div>
+ {{form.keyword_case.label}}: {{form.keyword_case}}
+ </div>
+ <div>
+ {{form.identifier_case.label}}: {{form.identifier_case}}
+ </div>
+ </fieldset>
+ <fieldset><legend id="indent"><strong>Indentation &amp; Margins</strong>
+ </legend>
+ <div id="indent_content" class="content">
+ <label for="id_n_indents">Indentation: </label>
+ {{form.n_indents}} {{form.n_indents.label}}
+ <div class="help">Empty field means leave indentation unchanged.</div>
+<!--
+ <label for="id_right_margin">Right margin: </label>
+ {{form.right_margin}} {{form.right_margin.label}}
+ <div class="help">Empty field means don't mind right margin.</div>
+-->
+ </div>
+ </fieldset>
+ <fieldset><legend id="output"><strong>Output Format</strong></legend>
+ <label for="id_output_format">Language: </label>
+ {{form.output_format}}
+ </fieldset>
+
+ <div class="dev">This software is in development.</div>
+
+ </div>
+ <div class="clearfix"></div>
+</form>
+
+<script language="javascript">
+html = '<strong>...or select an example:</strong> ';
+html = html + '<select onchange="load_example();" id="sel_example">';
+html = html + '<option value="">-- Choose Example --</option>';
+{% for ex in examples %}
+ html = html + '<option value="{{ex}}">{{ex}}</option>';
+{% endfor %}
+html = html + '</select>';
+$('#examples').html(html);
+</script>
+{% endblock %}
+
diff --git a/extras/appengine/templates/master.html b/extras/appengine/templates/master.html
new file mode 100644
index 0000000..4294cf4
--- /dev/null
+++ b/extras/appengine/templates/master.html
@@ -0,0 +1,103 @@
+<html>
+ <head>
+ <title>SQLFormat - Online SQL Formatting Service</title>
+ <meta name="keywords" content="SQL, format, parse, python, beautify" />
+ <link rel="stylesheet" href="/static/pygments.css" />
+ <link rel="stylesheet" href="/static/styles.css" />
+ <script src="http://www.google.com/jsapi"></script>
+ <script>
+ google.load("jquery", "1.2.6");
+ </script>
+ <script src="/static/hotkeys.js"></script>
+ <script type="text/javascript"
+ src="/static/jquery.textarearesizer.compressed.js"></script>
+ <script src="/static/script.js"></script>
+ </head>
+ <body>
+
+ <div id="help">
+ <p>Keyboard Shortcuts</p>
+ <p>
+ <span class="shortcut">H</span> - Show / hide this help window<br/>
+ <span class="shortcut">Ctrl+F</span> - Format SQL and display result<br/>
+ <span class="shortcut">O</span> - Show / hide options<br/>
+ <span class="shortcut">T</span> - Set focus on SQL input<br/>
+ </p>
+ </div>
+
+ <div id="header">
+ <div id="header-inner">
+ {% if user %}<img src="{{userimg}}" border="0" align="right" style="padding-top:.4em;"/>{% endif %}
+ <h1>
+ <a href="/">
+ <span class="q">S<span class="q2">Q</span>L</span>Format
+ </a>
+ </h1>
+ <div id="slogan">Online SQL formatting service</div>
+ <div id="topmenu">
+ <a href="/">Home</a>
+ |
+ <a href="/about/">About</a>
+ |
+ <a href="/source/">Source Code</a>
+ |
+ <a href="/api/">API</a>
+<!--
+ |
+ {% if user %}
+ <a href="{{logout_url}}">Sign out</a>
+ {% else %}
+ <a href="{{login_url}}">Sign in</a>
+ <span style="color: #babdb6;">with your Google account
+ to save preferred settings.</span>
+ {% endif %}
+-->
+ </div>
+ </div>
+ </div>
+
+ <div id="main">
+ <div id="main-inner">
+ {% block main %}MAIN CONTENT GOES HERE{% endblock %}
+ </div>
+ </div>
+
+ <div id="footer">
+ <div id="footer-inner">
+ <div style="float: left; font-size: .85em;">
+ <div>&copy; 2009 Andi Albrecht&nbsp;&nbsp;
+ <code>&lt;albrecht dot andi gmail&gt;</code>
+ </div>
+ <div>
+ <a href="/">Home</a>
+ |
+ <a href="/about/">About</a>
+ |
+ <a href="/source/">Source Code</a>
+ |
+ <a href="/api/">API</a>
+ |
+ <a href="http://andialbrecht.wordpress.com/">Blog</a>
+ </div>
+ </div>
+ <div style="float: right;">
+ <img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"
+ alt="Powered by Google App Engine" />
+ </div>
+ <div class="clearfix"></div>
+ </div>
+ </div>
+
+ <script type="text/javascript">
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+ </script>
+ <script type="text/javascript">
+ try {
+ var pageTracker = _gat._getTracker("UA-3535525-2");
+ pageTracker._trackPageview();
+ } catch(err) {}</script>
+ <script>init();</script>
+
+ </body>
+</html>
diff --git a/extras/appengine/templates/python-client-example.html b/extras/appengine/templates/python-client-example.html
new file mode 100644
index 0000000..68bf820
--- /dev/null
+++ b/extras/appengine/templates/python-client-example.html
@@ -0,0 +1,17 @@
+<div class="highlight example"><pre><span class="c">#!/usr/bin/env python</span>
+
+<span class="k">import</span> <span class="nn">urllib</span>
+<span class="k">import</span> <span class="nn">urllib2</span>
+
+<span class="n">payload</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="p">(</span><span class="s">&#39;data&#39;</span><span class="p">,</span> <span class="s">&#39;select * from foo join bar on val1 = val2 where id = 123;&#39;</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">&#39;format&#39;</span><span class="p">,</span> <span class="s">&#39;text&#39;</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">&#39;keyword_case&#39;</span><span class="p">,</span> <span class="s">&#39;upper&#39;</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">&#39;reindent&#39;</span><span class="p">,</span> <span class="bp">True</span><span class="p">),</span>
+ <span class="p">(</span><span class="s">&#39;n_indents&#39;</span><span class="p">,</span> <span class="mf">2</span><span class="p">),</span>
+ <span class="p">)</span>
+
+<span class="n">response</span> <span class="o">=</span> <span class="n">urllib2</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s">&#39;http://sqlformat.appspot.com/format/&#39;</span><span class="p">,</span>
+ <span class="n">urllib</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">payload</span><span class="p">))</span>
+<span class="k">print</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+</pre></div>
diff --git a/extras/appengine/templates/source.html b/extras/appengine/templates/source.html
new file mode 100644
index 0000000..4988f2f
--- /dev/null
+++ b/extras/appengine/templates/source.html
@@ -0,0 +1,56 @@
+{% extends "master.html" %}
+
+{% block main %}
+<div id="response">
+ <h1>Source Code</h1>
+
+ <h2>Python Module</h2>
+ <p>
+ The sources for the SQL parser and formatter module are currently
+ hosted on Gitorious.
+ To clone the repository run:
+ <p>
+ <code class="pre">git clone git://github.com/andialbrecht/python-sqlparse.git</code>
+ </p>
+ <p>
+ <a href="http://python-sqlparse.googlecode.com">Visit the project page</a>
+ |
+ <a href="http://gitorious.org/projects/python-sqlparse/repos/mainline/trees/master">Browse the sources online</a>
+ </p>
+ <p>
+ Some relevant parts of the Python module contain code from the
+ <a href="http://pygments.org/">pygments</a> syntax highlighter.
+ The underlying Python module uses a non-validating SQL parser.
+ This approach makes it possible to parse even syntactically incorrect
+ SQL statements.
+ </p>
+
+ <p>
+ Currently the parser module is used by
+ <a href="http://crunchyfrog.googlecode.com/">CrunchyFrog</a> - a
+ database front-end for Gnome.
+ </p>
+
+ <p>
+ The <code>sqlparse</code> module is released under the terms of the
+ <a href="http://www.opensource.org/licenses/bsd-license.php">New BSD License</a>.
+ </p>
+
+ <h2>App Engine Application</h2>
+ <p>
+ The source code for this App Engine application is available in the
+ <code>examples</code> directory of the Python module
+ (but it's really nothing special ;-).
+ </p>
+
+ <h2>Contributing</h2>
+ <p>
+ Please file bug reports and feature requests on the project site at
+ <a href="http://code.google.com/p/python-sqlparse/issues/entry">http://code.google.com/p/python-sqlparse/issues/entry</a>
+ or if you have code to contribute upload it to
+ <a href="http://codereview.appspot.com">http://codereview.appspot.com</a>
+ and add albrecht.andi@googlemail.com as reviewer.
+ </p>
+
+</div>
+{% endblock %}
diff --git a/extras/sqlformat.png b/extras/sqlformat.png
new file mode 100644
index 0000000..4189bc4
--- /dev/null
+++ b/extras/sqlformat.png
Binary files differ
diff --git a/extras/sqlformat.svg b/extras/sqlformat.svg
new file mode 100644
index 0000000..59e1183
--- /dev/null
+++ b/extras/sqlformat.svg
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:xlink="http://www.w3.org/1999/xlink"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="120"
+ height="60"
+ id="svg2"
+ sodipodi:version="0.32"
+ inkscape:version="0.46"
+ version="1.0"
+ sodipodi:docname="sqlformat.svg"
+ inkscape:output_extension="org.inkscape.output.svg.inkscape">
+ <defs
+ id="defs4">
+ <linearGradient
+ id="linearGradient3163">
+ <stop
+ id="stop3165"
+ offset="0"
+ style="stop-color:#888a85;stop-opacity:1;" />
+ <stop
+ id="stop3167"
+ offset="1"
+ style="stop-color:#565854;stop-opacity:1;" />
+ </linearGradient>
+ <inkscape:perspective
+ sodipodi:type="inkscape:persp3d"
+ inkscape:vp_x="0 : 526.18109 : 1"
+ inkscape:vp_y="0 : 1000 : 0"
+ inkscape:vp_z="744.09448 : 526.18109 : 1"
+ inkscape:persp3d-origin="372.04724 : 350.78739 : 1"
+ id="perspective10" />
+ <linearGradient
+ inkscape:collect="always"
+ xlink:href="#linearGradient3163"
+ id="linearGradient3161"
+ x1="57.357143"
+ y1="1.6964277"
+ x2="57.464291"
+ y2="58.125"
+ gradientUnits="userSpaceOnUse" />
+ </defs>
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="5.6"
+ inkscape:cx="58.539628"
+ inkscape:cy="24.488765"
+ inkscape:document-units="px"
+ inkscape:current-layer="layer1"
+ showgrid="false"
+ inkscape:window-width="1436"
+ inkscape:window-height="720"
+ inkscape:window-x="209"
+ inkscape:window-y="171" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Ebene 1"
+ inkscape:groupmode="layer"
+ id="layer1">
+ <rect
+ style="fill:url(#linearGradient3161);fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.5103448;fill-opacity:1"
+ id="rect2383"
+ width="118.21429"
+ height="58.035713"
+ x="1.0714285"
+ y="1.0714284"
+ ry="3.5714285" />
+ <text
+ xml:space="preserve"
+ style="font-size:41.68727112px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#f57900;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans"
+ x="3.8212376"
+ y="38.731842"
+ id="text3169"
+ transform="scale(1.3303696,0.7516708)"><tspan
+ sodipodi:role="line"
+ id="tspan3171"
+ x="3.8212376"
+ y="38.731842">S<tspan
+ style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Georgia;-inkscape-font-specification:Georgia Italic"
+ id="tspan3173">Q</tspan>L</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:29.59413719px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#babdb6;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans"
+ x="5.0176215"
+ y="60.359047"
+ id="text3177"
+ transform="scale(1.1410205,0.8764084)"><tspan
+ sodipodi:role="line"
+ id="tspan3179"
+ x="5.0176215"
+ y="60.359047">Format</tspan></text>
+ </g>
+</svg>
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..6ed5178
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This setup script is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+from distutils.core import setup
+
+setup(
+ name='sqlparse',
+ version='0.1.0',
+ py_modules=['sqlparse'],
+ description='Provides simple SQL parsing',
+ author='Andi Albrecht',
+ author_email='albrecht.andi@gmail.com',
+ #long_description=release.long_description,
+ license='BSD',
+ url='http://python-sqlparse.googlecode.com/',
+ classifiers = [
+ 'Development Status :: 3 - Alpha',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: BSD License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python',
+ 'Topic :: Database',
+ 'Topic :: Software Development'
+ ],
+ scripts=['bin/sqlformat'],
+)
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
new file mode 100644
index 0000000..01b3bd8
--- /dev/null
+++ b/sqlparse/__init__.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""Parse SQL statements."""
+
+__version__ = '0.1.0'
+
+import logging
+import os
+
+
+if 'SQLPARSE_DEBUG' in os.environ:
+ logging.basicConfig(level=logging.DEBUG)
+
+
+class SQLParseError(Exception):
+ """Base class for exceptions in this module."""
+
+
+# Setup namespace
+from sqlparse import engine
+from sqlparse import filters
+from sqlparse import formatter
+
+
+def parse(sql):
+ """Parse sql and return a list of statements.
+
+ *sql* is a single string containting one or more SQL statements.
+
+ The returned :class:`~sqlparse.parser.Statement` are fully analyzed.
+
+ Returns a list of :class:`~sqlparse.parser.Statement` instances.
+ """
+ stack = engine.FilterStack()
+ stack.full_analyze()
+ return tuple(stack.run(sql))
+
+
+def format(sql, **options):
+ """Format *sql* according to *options*.
+
+ Returns a list of :class:`~sqlparse.parse.Statement` instances like
+ :meth:`parse`, but the statements are formatted according to *options*.
+
+ Available options are documented in the :mod:`~sqlparse.format` module.
+ """
+ stack = engine.FilterStack()
+ options = formatter.validate_options(options)
+ stack = formatter.build_filter_stack(stack, options)
+ stack.postprocess.append(filters.SerializerUnicode())
+ return ''.join(stack.run(sql))
+
+
+def split(sql):
+ """Split *sql* into separate statements.
+
+ Returns a list of strings.
+ """
+ stack = engine.FilterStack()
+ stack.split_statements = True
+ return [unicode(stmt) for stmt in stack.run(sql)]
+
diff --git a/sqlparse/dialects.py b/sqlparse/dialects.py
new file mode 100644
index 0000000..cabe503
--- /dev/null
+++ b/sqlparse/dialects.py
@@ -0,0 +1,88 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""This module contains classes that represent SQL dialects."""
+
+from tokens import *
+
+
+class Dialect(object):
+ """Base class for SQL dialect implementations."""
+
+ def handle_token(self, tokentype, text):
+ """Handle a token.
+
+ Arguments:
+ tokentype: A token type.
+ text: Text representation of the token.
+
+ Returns:
+ A tuple of three items: tokentype, text, splitlevel.
+ splitlevel is either -1, 0 or 1 and describes an identation level.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """Reset Dialect state."""
+ pass
+
+
+class DefaultDialect(Dialect):
+
+ def __init__(self):
+ self._in_declare = False
+ self._stmt_type = None
+
+ def get_statement_type(self):
+ return self._stmt_type
+
+ def set_statement_type(self, type_):
+ self._stmt_type = type_
+
+ def handle_token(self, tokentype, text):
+ if not tokentype == Keyword:
+ return tokentype, text, 0
+ unified = text.upper()
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return tokentype, text, 1
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return tokentype, text, 0
+ return tokentype, text, 0
+ if unified == 'END':
+ return tokentype, text, -1
+ # TODO: Use a constant here
+ if unified in ('IF', 'FOR') and self._stmt_type == 6:
+ return tokentype, text, 1
+ return tokentype, text, 0
+
+ def reset(self):
+ self._in_declare = False
+
+
+class PSQLDialect(DefaultDialect):
+
+ def __init__(self):
+ super(PSQLDialect, self).__init__()
+ self._in_dbldollar = False
+
+ def handle_token(self, tokentype, text):
+ if (tokentype == Name.Builtin
+ and text.startswith('$') and text.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return tokentype, text, -1
+ else:
+ self._in_dbldollar = True
+ return tokentype, text, 1
+ elif self._in_dbldollar:
+ return tokentype, text, 0
+ else:
+ return super(PSQLDialect, self).handle_token(tokentype, text)
+
+ def reset(self):
+ self._dollar_started = False
+ self._in_dbldollar = False
diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py
new file mode 100644
index 0000000..5cac528
--- /dev/null
+++ b/sqlparse/engine/__init__.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""filter"""
+
+import logging
+import re
+
+from sqlparse import lexer, SQLParseError
+from sqlparse.engine import grouping
+from sqlparse.engine.filter import StatementFilter
+
+# XXX remove this when cleanup is complete
+Filter = object
+
+
+class FilterStack(object):
+
+ def __init__(self):
+ self.preprocess = []
+ self.stmtprocess = []
+ self.postprocess = []
+ self.split_statements = False
+ self._grouping = False
+
+ def _flatten(self, stream):
+ for token in stream:
+ if token.is_group():
+ for t in self._flatten(token.tokens):
+ yield t
+ else:
+ yield token
+
+ def enable_grouping(self):
+ self._grouping = True
+
+ def full_analyze(self):
+ self.enable_grouping()
+
+ def run(self, sql):
+ stream = lexer.tokenize(sql)
+ # Process token stream
+ if self.preprocess:
+ for filter_ in self.preprocess:
+ stream = filter_.process(self, stream)
+
+ if (self.stmtprocess or self.postprocess or self.split_statements
+ or self._grouping):
+ splitter = StatementFilter()
+ stream = splitter.process(self, stream)
+
+ if self._grouping:
+ def _group(stream):
+ for stmt in stream:
+ grouping.group(stmt)
+ yield stmt
+ stream = _group(stream)
+
+ if self.stmtprocess:
+ def _run(stream):
+ ret = []
+ for stmt in stream:
+ for filter_ in self.stmtprocess:
+ filter_.process(self, stmt)
+ ret.append(stmt)
+ return ret
+ stream = _run(stream)
+
+ if self.postprocess:
+ def _run(stream):
+ for stmt in stream:
+ stmt.tokens = list(self._flatten(stmt.tokens))
+ for filter_ in self.postprocess:
+ stmt = filter_.process(self, stmt)
+ yield stmt
+ stream = _run(stream)
+
+ return stream
+
diff --git a/sqlparse/engine/_grouping.py b/sqlparse/engine/_grouping.py
new file mode 100644
index 0000000..512c590
--- /dev/null
+++ b/sqlparse/engine/_grouping.py
@@ -0,0 +1,499 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine.filter import TokenFilter
+from sqlparse import tokens as T
+
+class _Base(object):
+
+ __slots__ = ('to_unicode', 'to_str', '_get_repr_name')
+
+ def __unicode__(self):
+ return 'Unkown _Base object'
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def __repr__(self):
+ raw = unicode(self)
+ if len(raw) > 7:
+ short = raw[:6]+u'...'
+ else:
+ short = raw
+ short = re.sub('\s+', ' ', short)
+ return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+ short, id(self))
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def to_unicode(self):
+ return unicode(self)
+
+ def to_str(self):
+ return str(self)
+
+
+class Token(_Base):
+
+ __slots__ = ('value', 'ttype')
+
+ def __init__(self, ttype, value):
+ self.value = value
+ self.ttype = ttype
+
+ def __unicode__(self):
+ return self.value
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def match(self, ttype, values):
+ if self.ttype is not ttype:
+ return False
+ if isinstance(values, basestring):
+ values = [values]
+ if self.ttype is T.Keyword:
+ return self.value.upper() in [v.upper() for v in values]
+ else:
+ return self.value in values
+
+ def is_group(self):
+ return False
+
+ def is_whitespace(self):
+ return self.ttype and self.ttype is T.Whitespace
+
+
+class _Group(Token):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def __init__(self, tokens=None):
+ super(_Group, self).__init__(None, None)
+ if tokens is None:
+ tokens = []
+ self._tokens = tokens
+
+ def _set_tokens(self, tokens):
+ self._tokens = tokens
+ def _get_tokens(self):
+ if type(self._tokens) is not types.TupleType:
+ self._tokens = tuple(self._tokens)
+ return self._tokens
+ tokens = property(fget=_get_tokens, fset=_set_tokens)
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def _pprint_tree(self, depth=0):
+ """Pretty-print the object tree."""
+ indent = ' '*(depth*2)
+ for token in self.tokens:
+ print '%s%r' % (indent, token)
+ if token.is_group():
+ token._pprint_tree(depth+1)
+
+ def __unicode__(self):
+ return u''.join(unicode(t) for t in self.tokens)
+
+ @property
+ def subgroups(self):
+ #return [x for x in self.tokens if isinstance(x, _Group)]
+ for item in self.tokens:
+ if item.is_group():
+ yield item
+
+ def is_group(self):
+ return True
+
+
+class Statement(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Parenthesis(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Where(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class CommentMulti(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Identifier(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class TypeCast(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+ @property
+ def casted_object(self):
+ return self.tokens[0]
+
+ @property
+ def casted_type(self):
+ return self.tokens[-1]
+
+
+class Alias(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+ @property
+ def aliased_object(self):
+ return self.tokens[0]
+
+ @property
+ def alias(self):
+ return self.tokens[-1]
+
+
+
+
+# - Filter
+
+class StatementFilter(TokenFilter):
+
+ def __init__(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _reset(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _change_splitlevel(self, ttype, value):
+ # PostgreSQL
+ if (ttype == T.Name.Builtin
+ and value.startswith('$') and value.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return -1
+ else:
+ self._in_dbldollar = True
+ return 1
+ elif self._in_dbldollar:
+ return 0
+
+ # ANSI
+ if ttype is not T.Keyword:
+ return 0
+
+ unified = value.upper()
+
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return 0
+ return 0
+
+ if unified == 'END':
+ return -1
+
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+
+ if unified in ('IF', 'FOR') and self._is_create:
+ return 1
+
+ # Default
+ return 0
+
+ def process(self, stack, stream):
+ splitlevel = 0
+ stmt = None
+ consume_ws = False
+ stmt_tokens = []
+ for ttype, value in stream:
+ # Before appending the token
+ if (consume_ws and ttype is not T.Whitespace
+ and ttype is not T.Comment.Single):
+ consume_ws = False
+ stmt.tokens = stmt_tokens
+ yield stmt
+ self._reset()
+ stmt = None
+ splitlevel = 0
+ if stmt is None:
+ stmt = Statement()
+ stmt_tokens = []
+ splitlevel += self._change_splitlevel(ttype, value)
+ # Append the token
+ stmt_tokens.append(Token(ttype, value))
+ # After appending the token
+ if (not splitlevel and ttype is T.Punctuation
+ and value == ';'):
+ consume_ws = True
+ if stmt is not None:
+ stmt.tokens = stmt_tokens
+ yield stmt
+
+
+class GroupFilter(object):
+
+ def process(self, stream):
+ pass
+
+
+class GroupParenthesis(GroupFilter):
+ """Group parenthesis groups."""
+
+ def _finish_group(self, group):
+ start = group[0]
+ end = group[-1]
+ tokens = list(self._process(group[1:-1]))
+ return [start]+tokens+[end]
+
+ def _process(self, stream):
+ group = None
+ depth = 0
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.match(T.Punctuation, '('):
+ if depth == 0:
+ group = []
+ depth += 1
+ if group is not None:
+ group.append(token)
+ if token.match(T.Punctuation, ')'):
+ depth -= 1
+ if depth == 0:
+ yield Parenthesis(self._finish_group(group))
+ group = None
+ continue
+ if group is None:
+ yield token
+
+ def process(self, group):
+ if not isinstance(group, Parenthesis):
+ group.tokens = self._process(group.tokens)
+
+
+class GroupWhere(GroupFilter):
+
+ def _process(self, stream):
+ group = None
+ depth = 0
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.match(T.Keyword, 'WHERE'):
+ if depth == 0:
+ group = []
+ depth += 1
+ # Process conditions here? E.g. "A =|!=|in|is|... B"...
+ elif (token.ttype is T.Keyword
+ and token.value.upper() in ('ORDER', 'GROUP',
+ 'LIMIT', 'UNION')):
+ depth -= 1
+ if depth == 0:
+ yield Where(group)
+ group = None
+ if depth < 0:
+ depth = 0
+ if group is not None:
+ group.append(token)
+ else:
+ yield token
+ if group is not None:
+ yield Where(group)
+
+ def process(self, group):
+ if not isinstance(group, Where):
+ group.tokens = self._process(group.tokens)
+
+
+class GroupMultiComments(GroupFilter):
+ """Groups Comment.Multiline and adds trailing whitespace up to first lb."""
+
+ def _process(self, stream):
+ new_tokens = []
+ grp = None
+ consume_ws = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.ttype is T.Comment.Multiline:
+ if grp is None:
+ grp = []
+ consume_ws = True
+ grp.append(token)
+ elif consume_ws and token.ttype is not T.Whitespace:
+ yield CommentMulti(grp)
+ grp = None
+ consume_ws = False
+ yield token
+ elif consume_ws:
+ lines = token.value.splitlines(True)
+ grp.append(Token(T.Whitespace, lines[0]))
+ if lines[0].endswith('\n'):
+ yield CommentMulti(grp)
+ grp = None
+ consume_ws = False
+ if lines[1:]:
+ yield Token(T.Whitespace, ''.join(lines[1:]))
+ else:
+ yield token
+
+ def process(self, group):
+ if not isinstance(group, CommentMulti):
+ group.tokens = self._process(group.tokens)
+
+
+## class GroupIdentifier(GroupFilter):
+
+## def _process(self, stream):
+## buff = []
+## expect_dot = False
+## for token in stream:
+## if token.is_group():
+## token.tokens = self._process(token.tokens)
+## if (token.ttype is T.String.Symbol or token.ttype is T.Name
+## and not expect_dot):
+## buff.append(token)
+## expect_dot = True
+## elif expect_dot and token.match(T.Punctuation, '.'):
+## buff.append(token)
+## expect_dot = False
+## else:
+## if expect_dot == False:
+## # something's wrong, it ends with a dot...
+## while buff:
+## yield buff.pop(0)
+## expect_dot = False
+## elif buff:
+## idt = Identifier()
+## idt.tokens = buff
+## yield idt
+## buff = []
+## yield token
+## if buff and expect_dot:
+## idt = Identifier()
+## idt.tokens = buff
+## yield idt
+## buff = []
+## while buff:
+## yield buff.pop(0)
+
+## def process(self, group):
+## if not isinstance(group, Identifier):
+## group.tokens = self._process(group.tokens)
+
+
+class AddTypeCastFilter(GroupFilter):
+
+ def _process(self, stream):
+ buff = []
+ expect_colon = False
+ has_colons = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if ((isinstance(token, Parenthesis)
+ or isinstance(token, Identifier))
+ and not expect_colon):
+ buff.append(token)
+ expect_colon = True
+ elif expect_colon and token.match(T.Punctuation, ':'):
+ buff.append(token)
+ has_colons = True
+ elif (expect_colon
+ and (token.ttype in T.Name
+ or isinstance(token, Identifier))
+ ):
+ if not has_colons:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ else:
+ buff.append(token)
+ grp = TypeCast()
+ grp.tokens = buff
+ buff = []
+ yield grp
+ expect_colons = has_colons = False
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ while buff:
+ yield buff.pop(0)
+
+ def process(self, group):
+ if not isinstance(group, TypeCast):
+ group.tokens = self._process(group.tokens)
+
+
+class AddAliasFilter(GroupFilter):
+
+ def _process(self, stream):
+ buff = []
+ search_alias = False
+ lazy = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if search_alias and (isinstance(token, Identifier)
+ or token.ttype in (T.Name,
+ T.String.Symbol)
+ or (lazy and not token.is_whitespace())):
+ buff.append(token)
+ search_alias = lazy = False
+ grp = Alias()
+ grp.tokens = buff
+ buff = []
+ yield grp
+ elif (isinstance(token, (Identifier, TypeCast))
+ or token.ttype in (T.Name, T.String.Symbol)):
+ buff.append(token)
+ search_alias = True
+ elif search_alias and (token.is_whitespace()
+ or token.match(T.Keyword, 'as')):
+ buff.append(token)
+ if token.match(T.Keyword, 'as'):
+ lazy = True
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ search_alias = False
+ while buff:
+ yield buff.pop(0)
+
+ def process(self, group):
+ if not isinstance(group, Alias):
+ group.tokens = self._process(group.tokens)
+
+
+GROUP_FILTER = (GroupParenthesis(),
+ GroupMultiComments(),
+ GroupWhere(),
+ GroupIdentifier(),
+ AddTypeCastFilter(),
+ AddAliasFilter(),
+ )
+
+import types
+def group_tokens(group):
+ def _materialize(g):
+ if type(g.tokens) is not types.TupleType:
+ g.tokens = tuple(g.tokens)
+ for sg in g.subgroups:
+ _materialize(sg)
+ for groupfilter in GROUP_FILTER:
+ groupfilter.process(group)
+# _materialize(group)
+# group.tokens = tuple(group.tokens)
+# for subgroup in group.subgroups:
+# group_tokens(subgroup)
diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py
new file mode 100644
index 0000000..146690c
--- /dev/null
+++ b/sqlparse/engine/filter.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+from sqlparse import tokens as T
+from sqlparse.engine.grouping import Statement, Token
+
+
+class TokenFilter(object):
+
+ def __init__(self, **options):
+ self.options = options
+
+ def process(self, stack, stream):
+ """Process token stream."""
+ raise NotImplementedError
+
+
+class StatementFilter(TokenFilter):
+
+ def __init__(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _reset(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _change_splitlevel(self, ttype, value):
+ # PostgreSQL
+ if (ttype == T.Name.Builtin
+ and value.startswith('$') and value.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return -1
+ else:
+ self._in_dbldollar = True
+ return 1
+ elif self._in_dbldollar:
+ return 0
+
+ # ANSI
+ if ttype is not T.Keyword:
+ return 0
+
+ unified = value.upper()
+
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return 0
+ return 0
+
+ if unified == 'END':
+ # Should this respect a preceeding BEGIN?
+ # In CASE ... WHEN ... END this results in a split level -1.
+ return -1
+
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+
+ if unified in ('IF', 'FOR') and self._is_create:
+ return 1
+
+ # Default
+ return 0
+
+ def process(self, stack, stream):
+ splitlevel = 0
+ stmt = None
+ consume_ws = False
+ stmt_tokens = []
+ for ttype, value in stream:
+ # Before appending the token
+ if (consume_ws and ttype is not T.Whitespace
+ and ttype is not T.Comment.Single):
+ consume_ws = False
+ stmt.tokens = stmt_tokens
+ yield stmt
+ self._reset()
+ stmt = None
+ splitlevel = 0
+ if stmt is None:
+ stmt = Statement()
+ stmt_tokens = []
+ splitlevel += self._change_splitlevel(ttype, value)
+ # Append the token
+ stmt_tokens.append(Token(ttype, value))
+ # After appending the token
+ if (splitlevel <= 0 and ttype is T.Punctuation
+ and value == ';'):
+ consume_ws = True
+ if stmt is not None:
+ stmt.tokens = stmt_tokens
+ yield stmt
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
new file mode 100644
index 0000000..433f539
--- /dev/null
+++ b/sqlparse/engine/grouping.py
@@ -0,0 +1,537 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+import re
+import types
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+
+ __slots__ = ('value', 'ttype')
+
+ def __init__(self, ttype, value):
+ self.value = value
+ self.ttype = ttype
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def __repr__(self):
+ short = self._get_repr_value()
+ return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+ short, id(self))
+
+ def __unicode__(self):
+ return self.value
+
+ def to_unicode(self):
+ return unicode(self)
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def _get_repr_value(self):
+ raw = unicode(self)
+ if len(raw) > 7:
+ short = raw[:6]+u'...'
+ else:
+ short = raw
+ return re.sub('\s+', ' ', short)
+
+ def match(self, ttype, values, regex=False):
+ if self.ttype is not ttype:
+ return False
+ if values is None:
+ return self.ttype is ttype
+ if isinstance(values, basestring):
+ values = [values]
+ if regex:
+ if self.ttype is T.Keyword:
+ values = [re.compile(v, re.IGNORECASE) for v in values]
+ else:
+ values = [re.compile(v) for v in values]
+ for pattern in values:
+ if pattern.search(self.value):
+ return True
+ return False
+ else:
+ if self.ttype is T.Keyword:
+ return self.value.upper() in [v.upper() for v in values]
+ else:
+ return self.value in values
+
+ def is_group(self):
+ return False
+
+ def is_whitespace(self):
+ return self.ttype and self.ttype in T.Whitespace
+
+
+class TokenList(Token):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def __init__(self, tokens=None):
+ if tokens is None:
+ tokens = []
+ self.tokens = tokens
+ Token.__init__(self, None, None)
+
+ def __unicode__(self):
+ return ''.join(unicode(x) for x in self.flatten())
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def _pprint_tree(self, max_depth=None, depth=0):
+ """Pretty-print the object tree."""
+ indent = ' '*(depth*2)
+ for token in self.tokens:
+ if token.is_group():
+ pre = ' | '
+ else:
+ pre = ' | '
+ print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(),
+ token._get_repr_value())
+ if (token.is_group() and max_depth is not None
+ and depth < max_depth):
+ token._pprint_tree(max_depth, depth+1)
+
+ def flatten(self):
+ for token in self.tokens:
+ if isinstance(token, TokenList):
+ for item in token.flatten():
+ yield item
+ else:
+ yield token
+
+ def is_group(self):
+ return True
+
+ def get_sublists(self):
+ return [x for x in self.tokens if isinstance(x, TokenList)]
+
+ def token_first(self, ignore_whitespace=True):
+ for token in self.tokens:
+ if ignore_whitespace and token.is_whitespace():
+ continue
+ return token
+ return None
+
+ def token_next_by_instance(self, idx, clss):
+ if type(clss) not in (types.ListType, types.TupleType):
+ clss = (clss,)
+ if type(clss) is not types.TupleType:
+ clss = tuple(clss)
+ for token in self.tokens[idx:]:
+ if isinstance(token, clss):
+ return token
+ return None
+
+ def token_next_by_type(self, idx, ttypes):
+ if not isinstance(ttypes, (types.TupleType, types.ListType)):
+ ttypes = [ttypes]
+ for token in self.tokens[idx:]:
+ if token.ttype in ttypes:
+ return token
+ return None
+
+ def token_next_match(self, idx, ttype, value, regex=False):
+ if type(idx) != types.IntType:
+ idx = self.token_index(idx)
+ for token in self.tokens[idx:]:
+ if token.match(ttype, value, regex):
+ return token
+ return None
+
+ def token_not_matching(self, idx, funcs):
+ for token in self.tokens[idx:]:
+ passed = False
+ for func in funcs:
+ if func(token):
+ passed = True
+ break
+ if not passed:
+ return token
+ return None
+
+ def token_prev(self, idx, skip_ws=True):
+ while idx != 0:
+ idx -= 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_next(self, idx, skip_ws=True):
+ while idx < len(self.tokens)-1:
+ idx += 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_index(self, token):
+ """Return list index of token."""
+ return self.tokens.index(token)
+
+ def tokens_between(self, start, end, exclude_end=False):
+ """Return all tokens between (and including) start and end."""
+ if exclude_end:
+ offset = 0
+ else:
+ offset = 1
+ return self.tokens[self.token_index(start):self.token_index(end)+offset]
+
+ def group_tokens(self, grp_cls, tokens):
+ """Replace tokens by instance of grp_cls."""
+ idx = self.token_index(tokens[0])
+ for t in tokens:
+ self.tokens.remove(t)
+ grp = grp_cls(tokens)
+ self.tokens.insert(idx, grp)
+ return grp
+
+ def insert_before(self, where, token):
+ self.tokens.insert(self.token_index(where), token)
+
+
+class Statement(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_type(self):
+ first_token = self.token_first()
+ if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ return first_token.value.upper()
+ else:
+ return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def has_alias(self):
+ return self.get_alias() is not None
+
+ def get_alias(self):
+ kw = self.token_next_match(0, T.Keyword, 'AS')
+ if kw is not None:
+ alias = self.token_next(self.token_index(kw))
+ if alias is None:
+ return None
+ else:
+ next_ = self.token_next(0)
+ if next_ is None or not isinstance(next_, Identifier):
+ return None
+ alias = next_
+ if isinstance(alias, Identifier):
+ return alias.get_name()
+ else:
+ return alias.to_unicode()
+
+ def get_name(self):
+ alias = self.get_alias()
+ if alias is not None:
+ return alias
+ return self.get_real_name()
+
+ def get_real_name(self):
+ return self.token_next_by_type(0, T.Name).value
+
+ def get_typecast(self):
+ marker = self.token_next_match(0, T.Punctuation, '::')
+ if marker is None:
+ return None
+ next_ = self.token_next(self.token_index(marker), False)
+ if next_ is None:
+ return None
+ return next_.to_unicode()
+
+
+class IdentifierList(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_identifiers(self):
+ return [x for x in self.tokens if isinstance(x, Identifier)]
+
+
+class Parenthesis(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Assignment(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class If(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class For(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Comparsion(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Comment(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Where(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_cases(self):
+ """Returns a list of 2-tuples (condition, value).
+
+ If an ELSE exists condition is None.
+ """
+ ret = []
+ in_condition = in_value = False
+ for token in self.tokens:
+ if token.match(T.Keyword, 'WHEN'):
+ ret.append(([], []))
+ in_condition = True
+ in_value = False
+ elif token.match(T.Keyword, 'ELSE'):
+ ret.append((None, []))
+ in_condition = False
+ in_value = True
+ elif token.match(T.Keyword, 'THEN'):
+ in_condition = False
+ in_value = True
+ elif token.match(T.Keyword, 'END'):
+ in_condition = False
+ in_value = False
+ if in_condition:
+ ret[-1][0].append(token)
+ elif in_value:
+ ret[-1][1].append(token)
+ return ret
+
+def _group_left_right(tlist, ttype, value, cls,
+ check_right=lambda t: True,
+ include_semicolon=False):
+# [_group_left_right(sgroup, ttype, value, cls, check_right,
+# include_semicolon) for sgroup in tlist.get_sublists()
+# if not isinstance(sgroup, cls)]
+ idx = 0
+ token = tlist.token_next_match(idx, ttype, value)
+ while token:
+ right = tlist.token_next(tlist.token_index(token))
+ left = tlist.token_prev(tlist.token_index(token))
+ if (right is None or not check_right(right)
+ or left is None):
+ token = tlist.token_next_match(tlist.token_index(token)+1,
+ ttype, value)
+ else:
+ if include_semicolon:
+ right = tlist.token_next_match(tlist.token_index(right),
+ T.Punctuation, ';')
+ tokens = tlist.tokens_between(left, right)[1:]
+ if not isinstance(left, cls):
+ new = cls([left])
+ new_idx = tlist.token_index(left)
+ tlist.tokens.remove(left)
+ tlist.tokens.insert(new_idx, new)
+ left = new
+ left.tokens.extend(tokens)
+ for t in tokens:
+ tlist.tokens.remove(t)
+ token = tlist.token_next_match(tlist.token_index(left)+1,
+ ttype, value)
+
+def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
+ cls, include_semicolon=False, recurse=False):
+ def _find_matching(i, tl, stt, sva, ett, eva):
+ depth = 1
+ for t in tl.tokens[i:]:
+ if t.match(stt, sva):
+ depth += 1
+ elif t.match(ett, eva):
+ depth -= 1
+ if depth == 1:
+ return t
+ return None
+ [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
+ cls, include_semicolon) for sgroup in tlist.get_sublists()
+ if recurse]
+ if isinstance(tlist, cls):
+ idx = 1
+ else:
+ idx = 0
+ token = tlist.token_next_match(idx, start_ttype, start_value)
+ while token:
+ tidx = tlist.token_index(token)
+ end = _find_matching(tidx, tlist, start_ttype, start_value,
+ end_ttype, end_value)
+ if end is None:
+ idx = tidx+1
+ else:
+ if include_semicolon:
+ next_ = tlist.token_next(tlist.token_index(end))
+ if next_ and next_.match(T.Punctuation, ';'):
+ end = next_
+ group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
+ _group_matching(group, start_ttype, start_value,
+ end_ttype, end_value, cls, include_semicolon)
+ idx = tlist.token_index(group)+1
+ token = tlist.token_next_match(idx, start_ttype, start_value)
+
+def group_if(tlist):
+ _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True)
+
+def group_for(tlist):
+ _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True)
+
+def group_as(tlist):
+ _group_left_right(tlist, T.Keyword, 'AS', Identifier)
+
+def group_assignment(tlist):
+ _group_left_right(tlist, T.Assignment, ':=', Assignment,
+ include_semicolon=True)
+
+def group_comparsion(tlist):
+ _group_left_right(tlist, T.Operator, None, Comparsion)
+
+
+def group_case(tlist):
+ _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, True)
+
+
+def group_identifier(tlist):
+ def _consume_cycle(tl, i):
+ x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'),
+ lambda y: y.ttype in (T.String.Symbol, T.Name)))
+ for t in tl.tokens[i:]:
+ if x.next()(t):
+ yield t
+ else:
+ raise StopIteration
+
+ # bottom up approach: group subgroups first
+ [group_identifier(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Identifier)]
+
+ # real processing
+ idx = 0
+ token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+ while token:
+ identifier_tokens = [token]+list(
+ _consume_cycle(tlist,
+ tlist.token_index(token)+1))
+ group = tlist.group_tokens(Identifier, identifier_tokens)
+ idx = tlist.token_index(group)+1
+ token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+
+
+def group_identifier_list(tlist):
+ [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, IdentifierList)]
+ idx = 0
+ token = tlist.token_next_by_instance(idx, Identifier)
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_not_matching(tidx+1,
+ [lambda t: isinstance(t, Identifier),
+ lambda t: t.is_whitespace(),
+ lambda t: t.match(T.Punctuation,
+ ',')
+ ])
+ if end is None:
+ idx = tidx + 1
+ else:
+ grp_tokens = tlist.tokens_between(token, end, exclude_end=True)
+ while grp_tokens and (grp_tokens[-1].is_whitespace()
+ or grp_tokens[-1].match(T.Punctuation, ',')):
+ grp_tokens.pop()
+ if len(grp_tokens) <= 1:
+ idx = tidx + 1
+ else:
+ group = tlist.group_tokens(IdentifierList, grp_tokens)
+ idx = tlist.token_index(group)
+ token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_parenthesis(tlist):
+ _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis)
+
+def group_comments(tlist):
+ [group_comments(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Comment)]
+ idx = 0
+ token = tlist.token_next_by_type(idx, T.Comment)
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_not_matching(tidx+1,
+ [lambda t: t.ttype in T.Comment,
+ lambda t: t.is_whitespace()])
+ if end is None:
+ idx = tidx + 1
+ else:
+ eidx = tlist.token_index(end)
+ grp_tokens = tlist.tokens_between(token,
+ tlist.token_prev(eidx, False))
+ group = tlist.group_tokens(Comment, grp_tokens)
+ idx = tlist.token_index(group)
+ token = tlist.token_next_by_type(idx, T.Comment)
+
+def group_where(tlist):
+ [group_where(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Where)]
+ idx = 0
+ token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+ stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION')
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_next_match(tidx+1, T.Keyword, stopwords)
+ if end is None:
+ end = tlist.tokens[-1]
+ else:
+ end = tlist.tokens[tlist.token_index(end)-1]
+ group = tlist.group_tokens(Where, tlist.tokens_between(token, end))
+ idx = tlist.token_index(group)
+ token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+
+def group_aliased(tlist):
+ [group_aliased(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Identifier)]
+ idx = 0
+ token = tlist.token_next_by_instance(idx, Identifier)
+ while token:
+ next_ = tlist.token_next(tlist.token_index(token))
+ if next_ is not None and isinstance(next_, Identifier):
+ grp = tlist.tokens_between(token, next_)[1:]
+ token.tokens.extend(grp)
+ for t in grp:
+ tlist.tokens.remove(t)
+ idx = tlist.token_index(token)+1
+ token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_typecasts(tlist):
+ _group_left_right(tlist, T.Punctuation, '::', Identifier)
+
+
+def group(tlist):
+ for func in [group_parenthesis,
+ group_comments,
+ group_where,
+ group_case,
+ group_identifier,
+ group_typecasts,
+ group_as,
+ group_aliased,
+ group_assignment,
+ group_comparsion,
+ group_identifier_list,
+ group_if,
+ group_for,]:
+ func(tlist)
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
new file mode 100644
index 0000000..695b298
--- /dev/null
+++ b/sqlparse/filters.py
@@ -0,0 +1,432 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine import grouping
+from sqlparse import tokens as T
+
+
+class Filter(object):
+
+ def process(self, *args):
+ raise NotImplementedError
+
+
+class TokenFilter(Filter):
+
+ def process(self, stack, stream):
+ raise NotImplementedError
+
+
+# FIXME: Should be removed
+def rstrip(stream):
+ buff = []
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ # assuming there's only one \n in value
+ before, rest = token.value.split('\n', 1)
+ token.value = '\n%s' % rest
+ buff = []
+ yield token
+ elif token.is_whitespace():
+ buff.append(token)
+ elif token.is_group():
+ token.tokens = list(rstrip(token.tokens))
+ # process group and look if it starts with a nl
+ if token.tokens and token.tokens[0].is_whitespace():
+ before, rest = token.tokens[0].value.split('\n', 1)
+ token.tokens[0].value = '\n%s' % rest
+ buff = []
+ while buff:
+ yield buff.pop(0)
+ yield token
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+
+
+# --------------------------
+# token process
+
+class _CaseFilter(TokenFilter):
+
+ ttype = None
+
+ def __init__(self, case=None):
+ if case is None:
+ case = 'upper'
+ assert case in ['lower', 'upper', 'capitalize']
+ self.convert = getattr(unicode, case)
+
+ def process(self, stack, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype:
+ value = self.convert(value)
+ yield ttype, value
+
+
+class KeywordCaseFilter(_CaseFilter):
+ ttype = T.Keyword
+
+
+class IdentifierCaseFilter(_CaseFilter):
+ ttype = (T.Name, T.String.Symbol)
+
+
+# ----------------------
+# statement process
+
+class StripCommentsFilter(Filter):
+
+ def _process(self, tlist):
+ idx = 0
+ clss = set([x.__class__ for x in tlist.tokens])
+ while grouping.Comment in clss:
+ token = tlist.token_next_by_instance(0, grouping.Comment)
+ tidx = tlist.token_index(token)
+ prev = tlist.token_prev(tidx, False)
+ next_ = tlist.token_next(tidx, False)
+ # Replace by whitespace if prev and next exist and if they're not
+ # whitespaces. This doesn't apply if prev or next is a paranthesis.
+ if (prev is not None and next_ is not None
+ and not prev.is_whitespace() and not next_.is_whitespace()
+ and not (prev.match(T.Punctuation, '(')
+ or next_.match(T.Punctuation, ')'))):
+ tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ')
+ else:
+ tlist.tokens.pop(tidx)
+ clss = set([x.__class__ for x in tlist.tokens])
+
+ def process(self, stack, stmt):
+ [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+ self._process(stmt)
+
+
+class StripWhitespaceFilter(Filter):
+
+ def _stripws(self, tlist):
+ func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._stripws_default)
+ func(tlist)
+
+ def _stripws_default(self, tlist):
+ last_was_ws = False
+ for token in tlist.tokens:
+ if token.is_whitespace():
+ if last_was_ws:
+ token.value = ''
+ else:
+ token.value = ' '
+ last_was_ws = token.is_whitespace()
+
+ def _stripws_parenthesis(self, tlist):
+ if tlist.tokens[1].is_whitespace():
+ tlist.tokens.pop(1)
+ if tlist.tokens[-2].is_whitespace():
+ tlist.tokens.pop(-2)
+ self._stripws_default(tlist)
+
+ def process(self, stack, stmt):
+ [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+ self._stripws(stmt)
+ if stmt.tokens[-1].is_whitespace():
+ stmt.tokens.pop(-1)
+
+
+class ReindentFilter(Filter):
+
+ def __init__(self, width=2, char=' ', line_width=None):
+ self.width = width
+ self.char = char
+ self.indent = 0
+ self.offset = 0
+ self.line_width = line_width
+ self._curr_stmt = None
+ self._last_stmt = None
+
+ def _get_offset(self, token):
+ all_ = list(self._curr_stmt.flatten())
+ idx = all_.index(token)
+ raw = ''.join(unicode(x) for x in all_[:idx+1])
+ line = raw.splitlines()[-1]
+ # Now take current offset into account and return relative offset.
+ full_offset = len(line)-(len(self.char*(self.width*self.indent)))
+ return full_offset - self.offset
+
+ def nl(self):
+ # TODO: newline character should be configurable
+ ws = '\n'+(self.char*((self.indent*self.width)+self.offset))
+ return grouping.Token(T.Whitespace, ws)
+
+ def _split_kwds(self, tlist):
+ split_words = ('FROM', 'JOIN$', 'AND', 'OR',
+ 'GROUP', 'ORDER', 'UNION', 'VALUES')
+ idx = 0
+ token = tlist.token_next_match(idx, T.Keyword, split_words,
+ regex=True)
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ offset = 1
+ if prev and prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ offset += 1
+ nl = self.nl()
+ tlist.insert_before(token, nl)
+ token = tlist.token_next_match(tlist.token_index(nl)+offset,
+ T.Keyword, split_words, regex=True)
+
+ def _split_statements(self, tlist):
+ idx = 0
+ token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ if prev and prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ # only break if it's not the first token
+ if prev:
+ nl = self.nl()
+ tlist.insert_before(token, nl)
+ token = tlist.token_next_by_type(tlist.token_index(token)+1,
+ (T.Keyword.DDL, T.Keyword.DML))
+
+ def _process(self, tlist):
+ func_name = '_process_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._process_default)
+ func(tlist)
+
+ def _process_where(self, tlist):
+ token = tlist.token_next_match(0, T.Keyword, 'WHERE')
+ tlist.insert_before(token, self.nl())
+ self.indent += 1
+ self._process_default(tlist)
+ self.indent -= 1
+
+ def _process_parenthesis(self, tlist):
+ first = tlist.token_next(0)
+ indented = False
+ if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ self.indent += 1
+ tlist.tokens.insert(0, self.nl())
+ indented = True
+ num_offset = self._get_offset(tlist.token_next_match(0,
+ T.Punctuation, '('))
+ self.offset += num_offset
+ self._process_default(tlist, stmts=not indented)
+ if indented:
+ self.indent -= 1
+ self.offset -= num_offset
+
+ def _process_identifierlist(self, tlist):
+ identifiers = tlist.get_identifiers()
+ if len(identifiers) > 1:
+ first = list(identifiers[0].flatten())[0]
+ num_offset = self._get_offset(first)-len(first.value)
+ self.offset += num_offset
+ for token in identifiers[1:]:
+ tlist.insert_before(token, self.nl())
+ self.offset -= num_offset
+ self._process_default(tlist)
+
+ def _process_case(self, tlist):
+ cases = tlist.get_cases()
+ is_first = True
+ num_offset = None
+ case = tlist.tokens[0]
+ outer_offset = self._get_offset(case)-len(case.value)
+ self.offset += outer_offset
+ for cond, value in tlist.get_cases():
+ if is_first:
+ is_first = False
+ num_offset = self._get_offset(cond[0])-len(cond[0].value)
+ self.offset += num_offset
+ continue
+ if cond is None:
+ token = value[0]
+ else:
+ token = cond[0]
+ tlist.insert_before(token, self.nl())
+ # Line breaks on group level are done. Now let's add an offset of
+ # 5 (=length of "when", "then", "else") and process subgroups.
+ self.offset += 5
+ self._process_default(tlist)
+ self.offset -= 5
+ if num_offset is not None:
+ self.offset -= num_offset
+ end = tlist.token_next_match(0, T.Keyword, 'END')
+ tlist.insert_before(end, self.nl())
+ self.offset -= outer_offset
+
+ def _process_default(self, tlist, stmts=True, kwds=True):
+ if stmts:
+ self._split_statements(tlist)
+ if kwds:
+ self._split_kwds(tlist)
+ [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+ def process(self, stack, stmt):
+ if isinstance(stmt, grouping.Statement):
+ self._curr_stmt = stmt
+ self._process(stmt)
+ if isinstance(stmt, grouping.Statement):
+ if self._last_stmt is not None:
+ if self._last_stmt.to_unicode().endswith('\n'):
+ nl = '\n'
+ else:
+ nl = '\n\n'
+ stmt.tokens.insert(0,
+ grouping.Token(T.Whitespace, nl))
+ if self._last_stmt != stmt:
+ self._last_stmt = stmt
+
+
+# FIXME: Doesn't work ;)
+class RightMarginFilter(Filter):
+
+ keep_together = (
+# grouping.TypeCast, grouping.Identifier, grouping.Alias,
+ )
+
+ def __init__(self, width=79):
+ self.width = width
+ self.line = ''
+
+ def _process(self, stack, group, stream):
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ if token.value.endswith('\n'):
+ self.line = ''
+ else:
+ self.line = token.value.splitlines()[-1]
+ elif (token.is_group()
+ and not token.__class__ in self.keep_together):
+ token.tokens = self._process(stack, token, token.tokens)
+ else:
+ val = token.to_unicode()
+ if len(self.line) + len(val) > self.width:
+ match = re.search('^ +', self.line)
+ if match is not None:
+ indent = match.group()
+ else:
+ indent = ''
+ yield grouping.Token(T.Whitespace, '\n%s' % indent)
+ self.line = indent
+ self.line += val
+ yield token
+
+ def process(self, stack, group):
+ return
+ group.tokens = self._process(stack, group, group.tokens)
+
+
+# ---------------------------
+# postprocess
+
+class SerializerUnicode(Filter):
+
+ def process(self, stack, stmt):
+ raw = stmt.to_unicode()
+ add_nl = raw.endswith('\n')
+ res = '\n'.join(line.rstrip() for line in raw.splitlines())
+ if add_nl:
+ res += '\n'
+ return res
+
+
+class OutputPythonFilter(Filter):
+
+ def __init__(self, varname='sql'):
+ self.varname = varname
+ self.cnt = 0
+
+ def _process(self, stream, varname, count, has_nl):
+ if count > 1:
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield grouping.Token(T.Operator, '(')
+ yield grouping.Token(T.Text, "'")
+ cnt = 0
+ for token in stream:
+ cnt += 1
+ if token.is_whitespace() and '\n' in token.value:
+ if cnt == 1:
+ continue
+ after_lb = token.value.split('\n', 1)[1]
+ yield grouping.Token(T.Text, "'")
+ yield grouping.Token(T.Whitespace, '\n')
+ for i in range(len(varname)+4):
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, "'")
+ if after_lb: # it's the indendation
+ yield grouping.Token(T.Whitespace, after_lb)
+ continue
+ elif token.value and "'" in token.value:
+ token.value = token.value.replace("'", "\\'")
+ yield grouping.Token(T.Text, token.value or '')
+ yield grouping.Token(T.Text, "'")
+ if has_nl:
+ yield grouping.Token(T.Operator, ')')
+
+ def process(self, stack, stmt):
+ self.cnt += 1
+ if self.cnt > 1:
+ varname = '%s%d' % (self.varname, self.cnt)
+ else:
+ varname = self.varname
+ has_nl = len(stmt.to_unicode().strip().splitlines()) > 1
+ stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl)
+ return stmt
+
+
+class OutputPHPFilter(Filter):
+
+ def __init__(self, varname='sql'):
+ self.varname = '$%s' % varname
+ self.count = 0
+
+ def _process(self, stream, varname):
+ if self.count > 1:
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, '"')
+ cnt = 0
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ cnt += 1
+ if cnt == 1:
+ continue
+ after_lb = token.value.split('\n', 1)[1]
+ yield grouping.Token(T.Text, '"')
+ yield grouping.Token(T.Operator, ';')
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Punctuation, '.')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, '"')
+ if after_lb:
+ yield grouping.Token(T.Text, after_lb)
+ continue
+ elif '"' in token.value:
+ token.value = token.value.replace('"', '\\"')
+ yield grouping.Token(T.Text, token.value)
+ yield grouping.Token(T.Text, '"')
+ yield grouping.Token(T.Punctuation, ';')
+
+ def process(self, stack, stmt):
+ self.count += 1
+ if self.count > 1:
+ varname = '%s%d' % (self.varname, self.count)
+ else:
+ varname = self.varname
+ stmt.tokens = tuple(self._process(stmt.tokens, varname))
+ return stmt
+
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
new file mode 100644
index 0000000..9d443ca
--- /dev/null
+++ b/sqlparse/formatter.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL formatter"""
+
+import logging
+
+from sqlparse import SQLParseError
+from sqlparse import filters
+
+
+def validate_options(options):
+ """Validates options."""
+ kwcase = options.get('keyword_case', None)
+ if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+
+ idcase = options.get('identifier_case', None)
+ if idcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+
+ ofrmt = options.get('output_format', None)
+ if ofrmt not in [None, 'sql', 'python', 'php']:
+ raise SQLParseError('Unknown output format: %r' % ofrmt)
+
+ strip_comments = options.get('strip_comments', False)
+ if strip_comments not in [True, False]:
+ raise SQLParseError('Invalid value for strip_comments: %r'
+ % strip_comments)
+
+ strip_ws = options.get('strip_whitespace', False)
+ if strip_ws not in [True, False]:
+ raise SQLParseError('Invalid value for strip_whitespace: %r'
+ % strip_ws)
+
+ reindent = options.get('reindent', False)
+ if reindent not in [True, False]:
+ raise SQLParseError('Invalid value for reindent: %r'
+ % reindent)
+ elif reindent:
+ options['strip_whitespace'] = True
+ indent_tabs = options.get('indent_tabs', False)
+ if indent_tabs not in [True, False]:
+ raise SQLParserError('Invalid value for indent_tabs: %r' % indent_tabs)
+ elif indent_tabs:
+ options['indent_char'] = '\t'
+ else:
+ options['indent_char'] = ' '
+ indent_width = options.get('indent_width', 2)
+ try:
+ indent_width = int(indent_width)
+ except (TypeError, ValueError):
+ raise SQLParseError('indent_width requires an integer')
+ if indent_width < 1:
+ raise SQLParseError('indent_width requires an positive integer')
+ options['indent_width'] = indent_width
+
+ right_margin = options.get('right_margin', None)
+ if right_margin is not None:
+ try:
+ right_margin = int(right_margin)
+ except (TypeError, ValueError):
+ raise SQLParseError('right_margin requires an integer')
+ if right_margin < 10:
+ raise SQLParseError('right_margin requires an integer > 10')
+ options['right_margin'] = right_margin
+
+ return options
+
+
+def build_filter_stack(stack, options):
+ """Setup and return a filter stack.
+
+ Args:
+ stack: :class:`~sqlparse.filters.FilterStack` instance
+ options: Dictionary with options validated by validate_options.
+ """
+ # Token filter
+ if 'keyword_case' in options:
+ stack.preprocess.append(
+ filters.KeywordCaseFilter(options['keyword_case']))
+
+ if 'identifier_case' in options:
+ stack.preprocess.append(
+ filters.IdentifierCaseFilter(options['identifier_case']))
+
+ # After grouping
+ if options.get('strip_comments', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripCommentsFilter())
+
+ if (options.get('strip_whitespace', False)
+ or options.get('reindent', False)):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+ if options.get('reindent', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.ReindentFilter(char=options['indent_char'],
+ width=options['indent_width']))
+
+ if options.get('right_margin', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.RightMarginFilter(width=options['right_margin']))
+
+ # Serializer
+ if options.get('output_format'):
+ frmt = options['output_format']
+ if frmt.lower() == 'php':
+ fltr = filters.OutputPHPFilter()
+ elif frmt.lower() == 'python':
+ fltr = filters.OutputPythonFilter()
+ else:
+ fltr = None
+ if fltr is not None:
+ stack.postprocess.append(fltr)
+
+ return stack
+
+
+def format(statement, **options):
+ import filters
+ logging.info('OPTIONS %r', options)
+ lexer = Lexer()
+# lexer.add_filter('whitespace')
+ lexer.add_filter(filters.GroupFilter())
+ if options.get('reindent', False):
+ lexer.add_filter(filters.StripWhitespaceFilter())
+ lexer.add_filter(filters.IndentFilter(
+ n_indents=options.get('n_indents', 2)))
+ if options.get('ltrim', False):
+ lexer.add_filter(filters.LTrimFilter())
+ keyword_case = options.get('keyword_case', None)
+ if keyword_case is not None:
+ assert keyword_case in ('lower', 'upper', 'capitalize')
+ lexer.add_filter(filters.KeywordCaseFilter(case=keyword_case))
+ identifier_case = options.get('identifier_case', None)
+ if identifier_case is not None:
+ assert identifier_case in ('lower', 'upper', 'capitalize')
+ lexer.add_filter(filters.IdentifierCaseFilter(case=identifier_case))
+ if options.get('strip_comments', False):
+ lexer.add_filter(filters.StripCommentsFilter())
+ right_margin = options.get('right_margin', None)
+ if right_margin is not None:
+ right_margin = int(right_margin)
+ assert right_margin > 0
+ lexer.add_filter(filters.RightMarginFilter(margin=right_margin))
+ lexer.add_filter(filters.UngroupFilter())
+ if options.get('output_format', None):
+ ofrmt = options['output_format']
+ assert ofrmt in ('sql', 'python', 'php')
+ if ofrmt == 'python':
+ lexer.add_filter(filters.OutputPythonFilter())
+ elif ofrmt == 'php':
+ lexer.add_filter(filters.OutputPHPFilter())
+ tokens = []
+ for ttype, value in lexer.get_tokens(unicode(statement)):
+ tokens.append((ttype, value))
+ return statement.__class__(tokens)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
new file mode 100644
index 0000000..3f0632e
--- /dev/null
+++ b/sqlparse/keywords.py
@@ -0,0 +1,589 @@
+from sqlparse.tokens import *
+
+KEYWORDS = {
+ 'ABORT': Keyword,
+ 'ABS': Keyword,
+ 'ABSOLUTE': Keyword,
+ 'ACCESS': Keyword,
+ 'ADA': Keyword,
+ 'ADD': Keyword,
+ 'ADMIN': Keyword,
+ 'AFTER': Keyword,
+ 'AGGREGATE': Keyword,
+ 'ALIAS': Keyword,
+ 'ALL': Keyword,
+ 'ALLOCATE': Keyword,
+ 'ANALYSE': Keyword,
+ 'ANALYZE': Keyword,
+ 'AND': Keyword,
+ 'ANY': Keyword,
+ 'ARE': Keyword,
+ 'AS': Keyword,
+ 'ASC': Keyword,
+ 'ASENSITIVE': Keyword,
+ 'ASSERTION': Keyword,
+ 'ASSIGNMENT': Keyword,
+ 'ASYMMETRIC': Keyword,
+ 'AT': Keyword,
+ 'ATOMIC': Keyword,
+ 'AUTHORIZATION': Keyword,
+ 'AVG': Keyword,
+
+ 'BACKWARD': Keyword,
+ 'BEFORE': Keyword,
+ 'BEGIN': Keyword,
+ 'BETWEEN': Keyword,
+ 'BITVAR': Keyword,
+ 'BIT_LENGTH': Keyword,
+ 'BOTH': Keyword,
+ 'BREADTH': Keyword,
+ 'BY': Keyword,
+
+ 'C': Keyword,
+ 'CACHE': Keyword,
+ 'CALL': Keyword,
+ 'CALLED': Keyword,
+ 'CARDINALITY': Keyword,
+ 'CASCADE': Keyword,
+ 'CASCADED': Keyword,
+ 'CASE': Keyword,
+ 'CAST': Keyword,
+ 'CATALOG': Keyword,
+ 'CATALOG_NAME': Keyword,
+ 'CHAIN': Keyword,
+ 'CHARACTERISTICS': Keyword,
+ 'CHARACTER_LENGTH': Keyword,
+ 'CHARACTER_SET_CATALOG': Keyword,
+ 'CHARACTER_SET_NAME': Keyword,
+ 'CHARACTER_SET_SCHEMA': Keyword,
+ 'CHAR_LENGTH': Keyword,
+ 'CHECK': Keyword,
+ 'CHECKED': Keyword,
+ 'CHECKPOINT': Keyword,
+ 'CLASS': Keyword,
+ 'CLASS_ORIGIN': Keyword,
+ 'CLOB': Keyword,
+ 'CLOSE': Keyword,
+ 'CLUSTER': Keyword,
+ 'COALSECE': Keyword,
+ 'COBOL': Keyword,
+ 'COLLATE': Keyword,
+ 'COLLATION': Keyword,
+ 'COLLATION_CATALOG': Keyword,
+ 'COLLATION_NAME': Keyword,
+ 'COLLATION_SCHEMA': Keyword,
+ 'COLUMN': Keyword,
+ 'COLUMN_NAME': Keyword,
+ 'COMMAND_FUNCTION': Keyword,
+ 'COMMAND_FUNCTION_CODE': Keyword,
+ 'COMMENT': Keyword,
+ 'COMMIT': Keyword,
+ 'COMMITTED': Keyword,
+ 'COMPLETION': Keyword,
+ 'CONDITION_NUMBER': Keyword,
+ 'CONNECT': Keyword,
+ 'CONNECTION': Keyword,
+ 'CONNECTION_NAME': Keyword,
+ 'CONSTRAINT': Keyword,
+ 'CONSTRAINTS': Keyword,
+ 'CONSTRAINT_CATALOG': Keyword,
+ 'CONSTRAINT_NAME': Keyword,
+ 'CONSTRAINT_SCHEMA': Keyword,
+ 'CONSTRUCTOR': Keyword,
+ 'CONTAINS': Keyword,
+ 'CONTINUE': Keyword,
+ 'CONVERSION': Keyword,
+ 'CONVERT': Keyword,
+ 'COPY': Keyword,
+ 'CORRESPONTING': Keyword,
+ 'COUNT': Keyword,
+ 'CREATEDB': Keyword,
+ 'CREATEUSER': Keyword,
+ 'CROSS': Keyword,
+ 'CUBE': Keyword,
+ 'CURRENT': Keyword,
+ 'CURRENT_DATE': Keyword,
+ 'CURRENT_PATH': Keyword,
+ 'CURRENT_ROLE': Keyword,
+ 'CURRENT_TIME': Keyword,
+ 'CURRENT_TIMESTAMP': Keyword,
+ 'CURRENT_USER': Keyword,
+ 'CURSOR': Keyword,
+ 'CURSOR_NAME': Keyword,
+ 'CYCLE': Keyword,
+
+ 'DATA': Keyword,
+ 'DATABASE': Keyword,
+ 'DATETIME_INTERVAL_CODE': Keyword,
+ 'DATETIME_INTERVAL_PRECISION': Keyword,
+ 'DAY': Keyword,
+ 'DEALLOCATE': Keyword,
+ 'DECLARE': Keyword,
+ 'DEFAULT': Keyword,
+ 'DEFAULTS': Keyword,
+ 'DEFERRABLE': Keyword,
+ 'DEFERRED': Keyword,
+ 'DEFINED': Keyword,
+ 'DEFINER': Keyword,
+ 'DELIMITER': Keyword,
+ 'DELIMITERS': Keyword,
+ 'DEREF': Keyword,
+ 'DESC': Keyword,
+ 'DESCRIBE': Keyword,
+ 'DESCRIPTOR': Keyword,
+ 'DESTROY': Keyword,
+ 'DESTRUCTOR': Keyword,
+ 'DETERMINISTIC': Keyword,
+ 'DIAGNOSTICS': Keyword,
+ 'DICTIONARY': Keyword,
+ 'DISCONNECT': Keyword,
+ 'DISPATCH': Keyword,
+ 'DISTINCT': Keyword,
+ 'DO': Keyword,
+ 'DOMAIN': Keyword,
+ 'DYNAMIC': Keyword,
+ 'DYNAMIC_FUNCTION': Keyword,
+ 'DYNAMIC_FUNCTION_CODE': Keyword,
+
+ 'EACH': Keyword,
+ 'ELSE': Keyword,
+ 'ENCODING': Keyword,
+ 'ENCRYPTED': Keyword,
+ 'END': Keyword,
+ 'END-EXEC': Keyword,
+ 'EQUALS': Keyword,
+ 'ESCAPE': Keyword,
+ 'EVERY': Keyword,
+ 'EXCEPT': Keyword,
+ 'ESCEPTION': Keyword,
+ 'EXCLUDING': Keyword,
+ 'EXCLUSIVE': Keyword,
+ 'EXEC': Keyword,
+ 'EXECUTE': Keyword,
+ 'EXISTING': Keyword,
+ 'EXISTS': Keyword,
+ 'EXTERNAL': Keyword,
+ 'EXTRACT': Keyword,
+
+ 'FALSE': Keyword,
+ 'FETCH': Keyword,
+ 'FINAL': Keyword,
+ 'FIRST': Keyword,
+ 'FOR': Keyword,
+ 'FORCE': Keyword,
+ 'FOREIGN': Keyword,
+ 'FORTRAN': Keyword,
+ 'FORWARD': Keyword,
+ 'FOUND': Keyword,
+ 'FREE': Keyword,
+ 'FREEZE': Keyword,
+ 'FROM': Keyword,
+ 'FULL': Keyword,
+ 'FUNCTION': Keyword,
+
+ 'G': Keyword,
+ 'GENERAL': Keyword,
+ 'GENERATED': Keyword,
+ 'GET': Keyword,
+ 'GLOBAL': Keyword,
+ 'GO': Keyword,
+ 'GOTO': Keyword,
+ 'GRANT': Keyword,
+ 'GRANTED': Keyword,
+ 'GROUP': Keyword,
+ 'GROUPING': Keyword,
+
+ 'HANDLER': Keyword,
+ 'HAVING': Keyword,
+ 'HIERARCHY': Keyword,
+ 'HOLD': Keyword,
+ 'HOST': Keyword,
+
+ 'IDENTITY': Keyword,
+ 'IF': Keyword,
+ 'IGNORE': Keyword,
+ 'ILIKE': Keyword,
+ 'IMMEDIATE': Keyword,
+ 'IMMUTABLE': Keyword,
+
+ 'IMPLEMENTATION': Keyword,
+ 'IMPLICIT': Keyword,
+ 'IN': Keyword,
+ 'INCLUDING': Keyword,
+ 'INCREMENT': Keyword,
+ 'INDEX': Keyword,
+
+ 'INDITCATOR': Keyword,
+ 'INFIX': Keyword,
+ 'INHERITS': Keyword,
+ 'INITIALIZE': Keyword,
+ 'INITIALLY': Keyword,
+ 'INNER': Keyword,
+ 'INOUT': Keyword,
+ 'INPUT': Keyword,
+ 'INSENSITIVE': Keyword,
+ 'INSTANTIABLE': Keyword,
+ 'INSTEAD': Keyword,
+ 'INTERSECT': Keyword,
+ 'INTO': Keyword,
+ 'INVOKER': Keyword,
+ 'IS': Keyword,
+ 'ISNULL': Keyword,
+ 'ISOLATION': Keyword,
+ 'ITERATE': Keyword,
+
+ 'JOIN': Keyword,
+
+ 'K': Keyword,
+ 'KEY': Keyword,
+ 'KEY_MEMBER': Keyword,
+ 'KEY_TYPE': Keyword,
+
+ 'LANCOMPILER': Keyword,
+ 'LANGUAGE': Keyword,
+ 'LARGE': Keyword,
+ 'LAST': Keyword,
+ 'LATERAL': Keyword,
+ 'LEADING': Keyword,
+ 'LEFT': Keyword,
+ 'LENGTH': Keyword,
+ 'LESS': Keyword,
+ 'LEVEL': Keyword,
+ 'LIKE': Keyword,
+ 'LILMIT': Keyword,
+ 'LISTEN': Keyword,
+ 'LOAD': Keyword,
+ 'LOCAL': Keyword,
+ 'LOCALTIME': Keyword,
+ 'LOCALTIMESTAMP': Keyword,
+ 'LOCATION': Keyword,
+ 'LOCATOR': Keyword,
+ 'LOCK': Keyword,
+ 'LOWER': Keyword,
+
+ 'M': Keyword,
+ 'MAP': Keyword,
+ 'MATCH': Keyword,
+ 'MAX': Keyword,
+ 'MAXVALUE': Keyword,
+ 'MESSAGE_LENGTH': Keyword,
+ 'MESSAGE_OCTET_LENGTH': Keyword,
+ 'MESSAGE_TEXT': Keyword,
+ 'METHOD': Keyword,
+ 'MIN': Keyword,
+ 'MINUTE': Keyword,
+ 'MINVALUE': Keyword,
+ 'MOD': Keyword,
+ 'MODE': Keyword,
+ 'MODIFIES': Keyword,
+ 'MODIFY': Keyword,
+ 'MONTH': Keyword,
+ 'MORE': Keyword,
+ 'MOVE': Keyword,
+ 'MUMPS': Keyword,
+
+ 'NAMES': Keyword,
+ 'NATIONAL': Keyword,
+ 'NATURAL': Keyword,
+ 'NCHAR': Keyword,
+ 'NCLOB': Keyword,
+ 'NEW': Keyword,
+ 'NEXT': Keyword,
+ 'NO': Keyword,
+ 'NOCREATEDB': Keyword,
+ 'NOCREATEUSER': Keyword,
+ 'NONE': Keyword,
+ 'NOT': Keyword,
+ 'NOTHING': Keyword,
+ 'NOTIFY': Keyword,
+ 'NOTNULL': Keyword,
+ 'NULL': Keyword,
+ 'NULLABLE': Keyword,
+ 'NULLIF': Keyword,
+
+ 'OBJECT': Keyword,
+ 'OCTET_LENGTH': Keyword,
+ 'OF': Keyword,
+ 'OFF': Keyword,
+ 'OFFSET': Keyword,
+ 'OIDS': Keyword,
+ 'OLD': Keyword,
+ 'ON': Keyword,
+ 'ONLY': Keyword,
+ 'OPEN': Keyword,
+ 'OPERATION': Keyword,
+ 'OPERATOR': Keyword,
+ 'OPTION': Keyword,
+ 'OPTIONS': Keyword,
+ 'OR': Keyword,
+ 'ORDER': Keyword,
+ 'ORDINALITY': Keyword,
+ 'OUT': Keyword,
+ 'OUTER': Keyword,
+ 'OUTPUT': Keyword,
+ 'OVERLAPS': Keyword,
+ 'OVERLAY': Keyword,
+ 'OVERRIDING': Keyword,
+ 'OWNER': Keyword,
+
+ 'PAD': Keyword,
+ 'PARAMETER': Keyword,
+ 'PARAMETERS': Keyword,
+ 'PARAMETER_MODE': Keyword,
+ 'PARAMATER_NAME': Keyword,
+ 'PARAMATER_ORDINAL_POSITION': Keyword,
+ 'PARAMETER_SPECIFIC_CATALOG': Keyword,
+ 'PARAMETER_SPECIFIC_NAME': Keyword,
+ 'PARAMATER_SPECIFIC_SCHEMA': Keyword,
+ 'PARTIAL': Keyword,
+ 'PASCAL': Keyword,
+ 'PENDANT': Keyword,
+ 'PLACING': Keyword,
+ 'PLI': Keyword,
+ 'POSITION': Keyword,
+ 'POSTFIX': Keyword,
+ 'PRECISION': Keyword,
+ 'PREFIX': Keyword,
+ 'PREORDER': Keyword,
+ 'PREPARE': Keyword,
+ 'PRESERVE': Keyword,
+ 'PRIMARY': Keyword,
+ 'PRIOR': Keyword,
+ 'PRIVILEGES': Keyword,
+ 'PROCEDURAL': Keyword,
+ 'PROCEDURE': Keyword,
+ 'PUBLIC': Keyword,
+
+ 'RAISE': Keyword,
+ 'READ': Keyword,
+ 'READS': Keyword,
+ 'RECHECK': Keyword,
+ 'RECURSIVE': Keyword,
+ 'REF': Keyword,
+ 'REFERENCES': Keyword,
+ 'REFERENCING': Keyword,
+ 'REINDEX': Keyword,
+ 'RELATIVE': Keyword,
+ 'RENAME': Keyword,
+ 'REPEATABLE': Keyword,
+ 'REPLACE': Keyword,
+ 'RESET': Keyword,
+ 'RESTART': Keyword,
+ 'RESTRICT': Keyword,
+ 'RESULT': Keyword,
+ 'RETURN': Keyword,
+ 'RETURNED_LENGTH': Keyword,
+ 'RETURNED_OCTET_LENGTH': Keyword,
+ 'RETURNED_SQLSTATE': Keyword,
+ 'RETURNS': Keyword,
+ 'REVOKE': Keyword,
+ 'RIGHT': Keyword,
+ 'ROLE': Keyword,
+ 'ROLLBACK': Keyword,
+ 'ROLLUP': Keyword,
+ 'ROUTINE': Keyword,
+ 'ROUTINE_CATALOG': Keyword,
+ 'ROUTINE_NAME': Keyword,
+ 'ROUTINE_SCHEMA': Keyword,
+ 'ROW': Keyword,
+ 'ROWS': Keyword,
+ 'ROW_COUNT': Keyword,
+ 'RULE': Keyword,
+
+ 'SAVE_POINT': Keyword,
+ 'SCALE': Keyword,
+ 'SCHEMA': Keyword,
+ 'SCHEMA_NAME': Keyword,
+ 'SCOPE': Keyword,
+ 'SCROLL': Keyword,
+ 'SEARCH': Keyword,
+ 'SECOND': Keyword,
+ 'SECURITY': Keyword,
+ 'SELF': Keyword,
+ 'SENSITIVE': Keyword,
+ 'SERIALIZABLE': Keyword,
+ 'SERVER_NAME': Keyword,
+ 'SESSION': Keyword,
+ 'SESSION_USER': Keyword,
+ 'SETOF': Keyword,
+ 'SETS': Keyword,
+ 'SHARE': Keyword,
+ 'SHOW': Keyword,
+ 'SIMILAR': Keyword,
+ 'SIMPLE': Keyword,
+ 'SIZE': Keyword,
+ 'SOME': Keyword,
+ 'SOURCE': Keyword,
+ 'SPACE': Keyword,
+ 'SPECIFIC': Keyword,
+ 'SPECIFICTYPE': Keyword,
+ 'SPECIFIC_NAME': Keyword,
+ 'SQL': Keyword,
+ 'SQLCODE': Keyword,
+ 'SQLERROR': Keyword,
+ 'SQLEXCEPTION': Keyword,
+ 'SQLSTATE': Keyword,
+ 'SQLWARNINIG': Keyword,
+ 'STABLE': Keyword,
+ 'START': Keyword,
+ 'STATE': Keyword,
+ 'STATEMENT': Keyword,
+ 'STATIC': Keyword,
+ 'STATISTICS': Keyword,
+ 'STDIN': Keyword,
+ 'STDOUT': Keyword,
+ 'STORAGE': Keyword,
+ 'STRICT': Keyword,
+ 'STRUCTURE': Keyword,
+ 'STYPE': Keyword,
+ 'SUBCLASS_ORIGIN': Keyword,
+ 'SUBLIST': Keyword,
+ 'SUBSTRING': Keyword,
+ 'SUM': Keyword,
+ 'SYMMETRIC': Keyword,
+ 'SYSID': Keyword,
+ 'SYSTEM': Keyword,
+ 'SYSTEM_USER': Keyword,
+
+ 'TABLE': Keyword,
+ 'TABLE_NAME': Keyword,
+ ' TEMP': Keyword,
+ 'TEMPLATE': Keyword,
+ 'TEMPORARY': Keyword,
+ 'TERMINATE': Keyword,
+ 'THAN': Keyword,
+ 'THEN': Keyword,
+ 'TIMESTAMP': Keyword,
+ 'TIMEZONE_HOUR': Keyword,
+ 'TIMEZONE_MINUTE': Keyword,
+ 'TO': Keyword,
+ 'TOAST': Keyword,
+ 'TRAILING': Keyword,
+ 'TRANSATION': Keyword,
+ 'TRANSACTIONS_COMMITTED': Keyword,
+ 'TRANSACTIONS_ROLLED_BACK': Keyword,
+ 'TRANSATION_ACTIVE': Keyword,
+ 'TRANSFORM': Keyword,
+ 'TRANSFORMS': Keyword,
+ 'TRANSLATE': Keyword,
+ 'TRANSLATION': Keyword,
+ 'TREAT': Keyword,
+ 'TRIGGER': Keyword,
+ 'TRIGGER_CATALOG': Keyword,
+ 'TRIGGER_NAME': Keyword,
+ 'TRIGGER_SCHEMA': Keyword,
+ 'TRIM': Keyword,
+ 'TRUE': Keyword,
+ 'TRUNCATE': Keyword,
+ 'TRUSTED': Keyword,
+ 'TYPE': Keyword,
+
+ 'UNCOMMITTED': Keyword,
+ 'UNDER': Keyword,
+ 'UNENCRYPTED': Keyword,
+ 'UNION': Keyword,
+ 'UNIQUE': Keyword,
+ 'UNKNOWN': Keyword,
+ 'UNLISTEN': Keyword,
+ 'UNNAMED': Keyword,
+ 'UNNEST': Keyword,
+ 'UNTIL': Keyword,
+ 'UPPER': Keyword,
+ 'USAGE': Keyword,
+ 'USER': Keyword,
+ 'USER_DEFINED_TYPE_CATALOG': Keyword,
+ 'USER_DEFINED_TYPE_NAME': Keyword,
+ 'USER_DEFINED_TYPE_SCHEMA': Keyword,
+ 'USING': Keyword,
+
+ 'VACUUM': Keyword,
+ 'VALID': Keyword,
+ 'VALIDATOR': Keyword,
+ 'VALUES': Keyword,
+ 'VARIABLE': Keyword,
+ 'VERBOSE': Keyword,
+ 'VERSION': Keyword,
+ 'VIEW': Keyword,
+ 'VOLATILE': Keyword,
+
+ 'WHEN': Keyword,
+ 'WHENEVER': Keyword,
+ 'WHERE': Keyword,
+ 'WITH': Keyword,
+ 'WITHOUT': Keyword,
+ 'WORK': Keyword,
+ 'WRITE': Keyword,
+
+ 'YEAR': Keyword,
+
+ 'ZONE': Keyword,
+
+
+ 'ARRAY': Name.Builtin,
+ 'BIGINT': Name.Builtin,
+ 'BINARY': Name.Builtin,
+ 'BIT': Name.Builtin,
+ 'BLOB': Name.Builtin,
+ 'BOOLEAN': Name.Builtin,
+ 'CHAR': Name.Builtin,
+ 'CHARACTER': Name.Builtin,
+ 'DATE': Name.Builtin,
+ 'DEC': Name.Builtin,
+ 'DECIMAL': Name.Builtin,
+ 'FLOAT': Name.Builtin,
+ 'INT': Name.Builtin,
+ 'INTEGER': Name.Builtin,
+ 'INTERVAL': Name.Builtin,
+ 'NUMBER': Name.Builtin,
+ 'NUMERIC': Name.Builtin,
+ 'REAL': Name.Builtin,
+ 'SERIAL': Name.Builtin,
+ 'SMALLINT': Name.Builtin,
+ 'VARCHAR': Name.Builtin,
+ 'VARYING': Name.Builtin,
+ 'INT8': Name.Builtin,
+ 'SERIAL8': Name.Builtin,
+ 'TEXT': Name.Builtin,
+ }
+
+
+KEYWORDS_COMMON = {
+ 'SELECT': Keyword.DML,
+ 'INSERT': Keyword.DML,
+ 'DELETE': Keyword.DML,
+ 'UPDATE': Keyword.DML,
+ 'DROP': Keyword.DDL,
+ 'CREATE': Keyword.DDL,
+ 'ALTER': Keyword.DDL,
+
+ 'WHERE': Keyword,
+ 'FROM': Keyword,
+ 'INNER': Keyword,
+ 'JOIN': Keyword,
+ 'AND': Keyword,
+ 'OR': Keyword,
+ 'LIKE': Keyword,
+ 'ON': Keyword,
+ 'IN': Keyword,
+
+ 'BY': Keyword,
+ 'GROUP': Keyword,
+ 'ORDER': Keyword,
+ 'LEFT': Keyword,
+ 'OUTER': Keyword,
+
+ 'IF': Keyword,
+ 'END': Keyword,
+ 'THEN': Keyword,
+ 'LOOP': Keyword,
+ 'AS': Keyword,
+ 'ELSE': Keyword,
+ 'FOR': Keyword,
+
+ 'CASE': Keyword,
+ 'WHEN': Keyword,
+ 'MIN': Keyword,
+ 'MAX': Keyword,
+ 'DISTINCT': Keyword,
+
+ }
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
new file mode 100644
index 0000000..b635fc6
--- /dev/null
+++ b/sqlparse/lexer.py
@@ -0,0 +1,310 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL Lexer"""
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
+import re
+
+from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from sqlparse.tokens import *
+from sqlparse.tokens import _TokenType
+
+
+class include(str):
+ pass
+
+class combined(tuple):
+ """Indicates a state combined from multiple states."""
+
+ def __new__(cls, *args):
+ return tuple.__new__(cls, args)
+
+ def __init__(self, *args):
+ # tuple.__init__ doesn't do anything
+ pass
+
+def is_keyword(value):
+ test = value.upper()
+ return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value
+
+
+def apply_filters(stream, filters, lexer=None):
+ """
+ Use this method to apply an iterable of filters to
+ a stream. If lexer is given it's forwarded to the
+ filter, otherwise the filter receives `None`.
+ """
+ def _apply(filter_, stream):
+ for token in filter_.filter(lexer, stream):
+ yield token
+ for filter_ in filters:
+ stream = _apply(filter_, stream)
+ return stream
+
+
+class LexerMeta(type):
+ """
+ Metaclass for Lexer, creates the self._tokens attribute from
+ self.tokens on the first instantiation.
+ """
+
+ def _process_state(cls, unprocessed, processed, state):
+ assert type(state) is str, "wrong state name %r" % state
+ assert state[0] != '#', "invalid state name %r" % state
+ if state in processed:
+ return processed[state]
+ tokens = processed[state] = []
+ rflags = cls.flags
+ for tdef in unprocessed[state]:
+ if isinstance(tdef, include):
+ # it's a state reference
+ assert tdef != state, "circular state reference %r" % state
+ tokens.extend(cls._process_state(unprocessed, processed, str(tdef)))
+ continue
+
+ assert type(tdef) is tuple, "wrong rule def %r" % tdef
+
+ try:
+ rex = re.compile(tdef[0], rflags).match
+ except Exception, err:
+ raise ValueError("uncompilable regex %r in state %r of %r: %s" %
+ (tdef[0], state, cls, err))
+
+ assert type(tdef[1]) is _TokenType or callable(tdef[1]), \
+ 'token type must be simple type or callable, not %r' % (tdef[1],)
+
+ if len(tdef) == 2:
+ new_state = None
+ else:
+ tdef2 = tdef[2]
+ if isinstance(tdef2, str):
+ # an existing state
+ if tdef2 == '#pop':
+ new_state = -1
+ elif tdef2 in unprocessed:
+ new_state = (tdef2,)
+ elif tdef2 == '#push':
+ new_state = tdef2
+ elif tdef2[:5] == '#pop:':
+ new_state = -int(tdef2[5:])
+ else:
+ assert False, 'unknown new state %r' % tdef2
+ elif isinstance(tdef2, combined):
+ # combine a new state from existing ones
+ new_state = '_tmp_%d' % cls._tmpname
+ cls._tmpname += 1
+ itokens = []
+ for istate in tdef2:
+ assert istate != state, 'circular state ref %r' % istate
+ itokens.extend(cls._process_state(unprocessed,
+ processed, istate))
+ processed[new_state] = itokens
+ new_state = (new_state,)
+ elif isinstance(tdef2, tuple):
+ # push more than one state
+ for state in tdef2:
+ assert (state in unprocessed or
+ state in ('#pop', '#push')), \
+ 'unknown new state ' + state
+ new_state = tdef2
+ else:
+ assert False, 'unknown new state def %r' % tdef2
+ tokens.append((rex, tdef[1], new_state))
+ return tokens
+
+ def process_tokendef(cls):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ processed = cls._all_tokens[cls.__name__] = {}
+ #tokendefs = tokendefs or cls.tokens[name]
+ for state in cls.tokens.keys():
+ cls._process_state(cls.tokens, processed, state)
+ return processed
+
+ def __call__(cls, *args, **kwds):
+ if not hasattr(cls, '_tokens'):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ if hasattr(cls, 'token_variants') and cls.token_variants:
+ # don't process yet
+ pass
+ else:
+ cls._tokens = cls.process_tokendef()
+
+ return type.__call__(cls, *args, **kwds)
+
+
+
+
+class Lexer:
+
+ __metaclass__ = LexerMeta
+
+ encoding = 'utf-8'
+ stripall = False
+ stripnl = False
+ tabsize = 0
+ flags = re.IGNORECASE
+
+ tokens = {
+ 'root': [
+ (r'--.*?(\r|\n|\r\n)', Comment.Single),
+ (r'(\r|\n|\r\n)', Newline),
+ (r'\s+', Whitespace),
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r':=', Assignment),
+ (r'::', Punctuation),
+ (r'[*]', Wildcard),
+ (r'[+/<>=~!@#%^&|`?^-]', Operator),
+ (r'[0-9]+', Number.Integer),
+ # TODO: Backslash escapes?
+ (r"'(''|[^'])*'", String.Single),
+ (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
+ (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword),
+ (r'END( IF| LOOP)?', Keyword),
+ (r'CREATE( OR REPLACE)?', Keyword.DDL),
+ (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword),
+ (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin),
+ (r'[;:()\[\],\.]', Punctuation),
+ ],
+ 'multiline-comments': [
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r'\*/', Comment.Multiline, '#pop'),
+ (r'[^/\*]+', Comment.Multiline),
+ (r'[/*]', Comment.Multiline)
+ ]
+ }
+
+ def __init__(self):
+ self.filters = []
+
+ def add_filter(self, filter_, **options):
+ from sqlparse.filters import Filter
+ if not isinstance(filter_, Filter):
+ filter_ = filter_(**options)
+ self.filters.append(filter_)
+
+ def get_tokens(self, text, unfiltered=False):
+ """
+ Return an iterable of (tokentype, value) pairs generated from
+ `text`. If `unfiltered` is set to `True`, the filtering mechanism
+ is bypassed even if filters are defined.
+
+ Also preprocess the text, i.e. expand tabs and strip it if
+ wanted and applies registered filters.
+ """
+ if not isinstance(text, unicode):
+ if self.encoding == 'guess':
+ try:
+ text = text.decode('utf-8')
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+ except UnicodeDecodeError:
+ text = text.decode('latin1')
+ elif self.encoding == 'chardet':
+ try:
+ import chardet
+ except ImportError:
+ raise ImportError('To enable chardet encoding guessing, '
+ 'please install the chardet library '
+ 'from http://chardet.feedparser.org/')
+ enc = chardet.detect(text)
+ text = text.decode(enc['encoding'])
+ else:
+ text = text.decode(self.encoding)
+ if self.stripall:
+ text = text.strip()
+ elif self.stripnl:
+ text = text.strip('\n')
+ if self.tabsize > 0:
+ text = text.expandtabs(self.tabsize)
+# if not text.endswith('\n'):
+# text += '\n'
+
+ def streamer():
+ for i, t, v in self.get_tokens_unprocessed(text):
+ yield t, v
+ stream = streamer()
+ if not unfiltered:
+ stream = apply_filters(stream, self.filters, self)
+ return stream
+
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+
+ ``stack`` is the inital stack (default: ``['root']``)
+ """
+ pos = 0
+ tokendefs = self._tokens
+ statestack = list(stack)
+ statetokens = tokendefs[statestack[-1]]
+ known_names = {}
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ m = rexmatch(text, pos)
+ if m:
+ # print rex.pattern
+ value = m.group()
+ if value in known_names:
+ yield pos, known_names[value], value
+ elif type(action) is _TokenType:
+ yield pos, action, value
+ elif hasattr(action, '__call__'):
+ ttype, value = action(value)
+ known_names[value] = ttype
+ yield pos, ttype, value
+ else:
+ for item in action(self, m):
+ yield item
+ pos = m.end()
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ statestack.pop()
+ elif state == '#push':
+ statestack.append(statestack[-1])
+ else:
+ statestack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del statestack[new_state:]
+ elif new_state == '#push':
+ statestack.append(statestack[-1])
+ else:
+ assert False, "wrong state def: %r" % new_state
+ statetokens = tokendefs[statestack[-1]]
+ break
+ else:
+ try:
+ if text[pos] == '\n':
+ # at EOL, reset state to "root"
+ pos += 1
+ statestack = ['root']
+ statetokens = tokendefs['root']
+ yield pos, Text, u'\n'
+ continue
+ yield pos, Error, text[pos]
+ pos += 1
+ except IndexError:
+ break
+
+
+def tokenize(sql):
+ """Tokenize sql.
+
+ Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
+ of ``(token type, value)`` items.
+ """
+ lexer = Lexer()
+ return lexer.get_tokens(sql)
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
new file mode 100644
index 0000000..2c63c41
--- /dev/null
+++ b/sqlparse/tokens.py
@@ -0,0 +1,131 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+# The Token implementation is based on pygment's token system written
+# by Georg Brandl.
+# http://pygments.org/
+
+"""Tokens"""
+
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+
+class _TokenType(tuple):
+ parent = None
+
+ def split(self):
+ buf = []
+ node = self
+ while node is not None:
+ buf.append(node)
+ node = node.parent
+ buf.reverse()
+ return buf
+
+ def __init__(self, *args):
+ # no need to call super.__init__
+ self.subtypes = set()
+
+ def __contains__(self, val):
+ return self is val or (
+ type(val) is self.__class__ and
+ val[:len(self)] == self
+ )
+
+ def __getattr__(self, val):
+ if not val or not val[0].isupper():
+ return tuple.__getattribute__(self, val)
+ new = _TokenType(self + (val,))
+ setattr(self, val, new)
+ self.subtypes.add(new)
+ new.parent = self
+ return new
+
+ def __hash__(self):
+ return hash(tuple(self))
+
+ def __repr__(self):
+ return 'Token' + (self and '.' or '') + '.'.join(self)
+
+
+Token = _TokenType()
+
+# Special token types
+Text = Token.Text
+Whitespace = Text.Whitespace
+Newline = Whitespace.Newline
+Error = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
+Other = Token.Other
+
+# Common token types for source code
+Keyword = Token.Keyword
+Name = Token.Name
+Literal = Token.Literal
+String = Literal.String
+Number = Literal.Number
+Punctuation = Token.Punctuation
+Operator = Token.Operator
+Wildcard = Token.Wildcard
+Comment = Token.Comment
+Assignment = Token.Assignement
+
+# Generic types for non-source code
+Generic = Token.Generic
+
+# String and some others are not direct childs of Token.
+# alias them:
+Token.Token = Token
+Token.String = String
+Token.Number = Number
+
+# SQL specific tokens
+DML = Keyword.DML
+DDL = Keyword.DDL
+Command = Keyword.Command
+
+Group = Token.Group
+Group.Parenthesis = Token.Group.Parenthesis
+Group.Comment = Token.Group.Comment
+Group.Where = Token.Group.Where
+
+
+def is_token_subtype(ttype, other):
+ """
+ Return True if ``ttype`` is a subtype of ``other``.
+
+ exists for backwards compatibility. use ``ttype in other`` now.
+ """
+ return ttype in other
+
+
+def string_to_tokentype(s):
+ """
+ Convert a string into a token type::
+
+ >>> string_to_token('String.Double')
+ Token.Literal.String.Double
+ >>> string_to_token('Token.Literal.Number')
+ Token.Literal.Number
+ >>> string_to_token('')
+ Token
+
+ Tokens that are already tokens are returned unchanged:
+
+ >>> string_to_token(String)
+ Token.Literal.String
+ """
+ if isinstance(s, _TokenType):
+ return s
+ if not s:
+ return Token
+ node = Token
+ for item in s.split('.'):
+ node = getattr(node, item)
+ return node
+
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/files/begintag.sql b/tests/files/begintag.sql
new file mode 100644
index 0000000..699b365
--- /dev/null
+++ b/tests/files/begintag.sql
@@ -0,0 +1,4 @@
+begin;
+update foo
+ set bar = 1;
+commit; \ No newline at end of file
diff --git a/tests/files/dashcomment.sql b/tests/files/dashcomment.sql
new file mode 100644
index 0000000..0d5ac62
--- /dev/null
+++ b/tests/files/dashcomment.sql
@@ -0,0 +1,5 @@
+select * from user;
+--select * from host;
+select * from user;
+select * -- foo;
+from foo; \ No newline at end of file
diff --git a/tests/files/function.sql b/tests/files/function.sql
new file mode 100644
index 0000000..d19227f
--- /dev/null
+++ b/tests/files/function.sql
@@ -0,0 +1,13 @@
+CREATE OR REPLACE FUNCTION foo(
+ p_in1 VARCHAR
+ , p_in2 INTEGER
+) RETURNS INTEGER AS
+
+ DECLARE
+ v_foo INTEGER;
+ BEGIN
+ SELECT *
+ FROM foo
+ INTO v_foo;
+ RETURN v_foo.id;
+ END; \ No newline at end of file
diff --git a/tests/files/function_psql.sql b/tests/files/function_psql.sql
new file mode 100644
index 0000000..e485f7a
--- /dev/null
+++ b/tests/files/function_psql.sql
@@ -0,0 +1,72 @@
+CREATE OR REPLACE FUNCTION public.delete_data (
+ p_tabelle VARCHAR
+ , p_key VARCHAR
+ , p_value INTEGER
+) RETURNS INTEGER AS
+$$
+DECLARE
+ p_retval INTEGER;
+ v_constraint RECORD;
+ v_count INTEGER;
+ v_data RECORD;
+ v_fieldname VARCHAR;
+ v_sql VARCHAR;
+ v_key VARCHAR;
+ v_value INTEGER;
+BEGIN
+ v_sql := 'SELECT COUNT(*) FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+ --RAISE NOTICE '%', v_sql;
+ EXECUTE v_sql INTO v_count;
+ IF v_count::integer != 0 THEN
+ SELECT att.attname
+ INTO v_key
+ FROM pg_attribute att
+ LEFT JOIN pg_constraint con ON con.conrelid = att.attrelid
+ AND con.conkey[1] = att.attnum
+ AND con.contype = 'p', pg_type typ, pg_class rel, pg_namespace ns
+ WHERE att.attrelid = rel.oid
+ AND att.attnum > 0
+ AND typ.oid = att.atttypid
+ AND att.attisdropped = false
+ AND rel.relname = p_tabelle
+ AND con.conkey[1] = 1
+ AND ns.oid = rel.relnamespace
+ AND ns.nspname = 'public'
+ ORDER BY att.attnum;
+ v_sql := 'SELECT ' || v_key || ' AS id FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+ FOR v_data IN EXECUTE v_sql
+ LOOP
+ --RAISE NOTICE ' -> % %', p_tabelle, v_data.id;
+ FOR v_constraint IN SELECT t.constraint_name
+ , t.constraint_type
+ , t.table_name
+ , c.column_name
+ FROM public.v_table_constraints t
+ , public.v_constraint_columns c
+ WHERE t.constraint_name = c.constraint_name
+ AND t.constraint_type = 'FOREIGN KEY'
+ AND c.table_name = p_tabelle
+ AND t.table_schema = 'public'
+ AND c.table_schema = 'public'
+ LOOP
+ v_fieldname := substring(v_constraint.constraint_name from 1 for length(v_constraint.constraint_name) - length(v_constraint.column_name) - 1);
+ IF (v_constraint.table_name = p_tabelle) AND (p_value = v_data.id) THEN
+ --RAISE NOTICE 'Skip (Selbstverweis)';
+ CONTINUE;
+ ELSE
+ PERFORM delete_data(v_constraint.table_name::varchar, v_fieldname::varchar, v_data.id::integer);
+ END IF;
+ END LOOP;
+ END LOOP;
+ v_sql := 'DELETE FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value;
+ --RAISE NOTICE '%', v_sql;
+ EXECUTE v_sql;
+ p_retval := 1;
+ ELSE
+ --RAISE NOTICE ' -> Keine Sätze gefunden';
+ p_retval := 0;
+ END IF;
+ RETURN p_retval;
+END;
+$$
+LANGUAGE plpgsql; \ No newline at end of file
diff --git a/tests/files/function_psql2.sql b/tests/files/function_psql2.sql
new file mode 100644
index 0000000..b5d494c
--- /dev/null
+++ b/tests/files/function_psql2.sql
@@ -0,0 +1,7 @@
+CREATE OR REPLACE FUNCTION update_something() RETURNS void AS
+$body$
+BEGIN
+ raise notice 'foo';
+END;
+$body$
+LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER; \ No newline at end of file
diff --git a/tests/files/function_psql3.sql b/tests/files/function_psql3.sql
new file mode 100644
index 0000000..b25d818
--- /dev/null
+++ b/tests/files/function_psql3.sql
@@ -0,0 +1,8 @@
+CREATE OR REPLACE FUNCTION foo() RETURNS integer AS
+$body$
+DECLARE
+BEGIN
+ select * from foo;
+END;
+$body$
+LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER; \ No newline at end of file
diff --git a/tests/run_tests.py b/tests/run_tests.py
new file mode 100755
index 0000000..1c7960e
--- /dev/null
+++ b/tests/run_tests.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Test runner for sqlparse."""
+
+import os
+import sys
+import unittest
+
+sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../'))
+
+
+def main():
+ """Create a TestSuite and run it."""
+ loader = unittest.TestLoader()
+ suite = unittest.TestSuite()
+ fnames = [os.path.split(f)[-1] for f in sys.argv[1:]]
+ for fname in os.listdir(os.path.dirname(__file__)):
+ if (not fname.startswith('test_') or not fname.endswith('.py')
+ or (fnames and fname not in fnames)):
+ continue
+ modname = os.path.splitext(fname)[0]
+ mod = __import__(os.path.splitext(fname)[0])
+ suite.addTests(loader.loadTestsFromModule(mod))
+ unittest.TextTestRunner(verbosity=2).run(suite)
+
+
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/test_format.py b/tests/test_format.py
new file mode 100644
index 0000000..5748704
--- /dev/null
+++ b/tests/test_format.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+from tests.utils import TestCaseBase
+
+import sqlparse
+
+
+class TestFormat(TestCaseBase):
+
+ def test_keywordcase(self):
+ sql = 'select * from bar; -- select foo\n'
+ res = sqlparse.format(sql, keyword_case='upper')
+ self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- select foo\n')
+ res = sqlparse.format(sql, keyword_case='capitalize')
+ self.ndiffAssertEqual(res, 'Select * From bar; -- select foo\n')
+ res = sqlparse.format(sql.upper(), keyword_case='lower')
+ self.ndiffAssertEqual(res, 'select * from BAR; -- SELECT FOO\n')
+ self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql,
+ keyword_case='foo')
+
+ def test_identifiercase(self):
+ sql = 'select * from bar; -- select foo\n'
+ res = sqlparse.format(sql, identifier_case='upper')
+ self.ndiffAssertEqual(res, 'select * from BAR; -- select foo\n')
+ res = sqlparse.format(sql, identifier_case='capitalize')
+ self.ndiffAssertEqual(res, 'select * from Bar; -- select foo\n')
+ res = sqlparse.format(sql.upper(), identifier_case='lower')
+ self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- SELECT FOO\n')
+ self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql,
+ identifier_case='foo')
+ sql = 'select * from "foo"."bar"'
+ res = sqlparse.format(sql, identifier_case="upper")
+ self.ndiffAssertEqual(res, 'select * from "FOO"."BAR"')
+
+ def test_strip_comments_single(self):
+ sql = 'select *-- statement starts here\nfrom foo'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select * from foo')
+ sql = 'select * -- statement starts here\nfrom foo'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select * from foo')
+ sql = 'select-- foo\nfrom -- bar\nwhere'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select from where')
+
+ def test_strip_comments_multi(self):
+ sql = '/* sql starts here */\nselect'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select')
+ sql = '/* sql starts here */ select'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select')
+ sql = '/*\n * sql starts here\n */\nselect'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select')
+ sql = 'select (/* sql starts here */ select 2)'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select (select 2)')
+
+ def test_strip_ws(self):
+ f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
+ s = 'select\n* from foo\n\twhere ( 1 = 2 )\n'
+ self.ndiffAssertEqual(f(s), 'select * from foo where (1 = 2)')
+ s = 'select -- foo\nfrom bar\n'
+ self.ndiffAssertEqual(f(s), 'select -- foo\nfrom bar')
+
+
+class TestFormatReindent(TestCaseBase):
+
+ def test_stmts(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select foo; select bar'
+ self.ndiffAssertEqual(f(s), 'select foo;\n\nselect bar')
+ s = 'select foo'
+ self.ndiffAssertEqual(f(s), 'select foo')
+ s = 'select foo; -- test\n select bar'
+ self.ndiffAssertEqual(f(s), 'select foo; -- test\n\nselect bar')
+
+ def test_keywords(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select * from foo union select * from bar;'
+ self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+ 'from foo',
+ 'union',
+ 'select *',
+ 'from bar;']))
+
+ def test_parenthesis(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select count(*) from (select * from foo);'
+ self.ndiffAssertEqual(f(s),
+ '\n'.join(['select count(*)',
+ 'from',
+ ' (select *',
+ ' from foo);',
+ ])
+ )
+
+ def test_where(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;'
+ self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
+ 'where bar = 1\n'
+ ' and baz = 2\n'
+ ' or bzz = 3;'))
+ s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);'
+ self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
+ 'where bar = 1\n'
+ ' and (baz = 2\n'
+ ' or bzz = 3);'))
+
+ def test_join(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select * from foo join bar on 1 = 2'
+ self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+ 'from foo',
+ 'join bar on 1 = 2']))
+ s = 'select * from foo inner join bar on 1 = 2'
+ self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+ 'from foo',
+ 'inner join bar on 1 = 2']))
+ s = 'select * from foo left outer join bar on 1 = 2'
+ self.ndiffAssertEqual(f(s), '\n'.join(['select *',
+ 'from foo',
+ 'left outer join bar on 1 = 2']
+ ))
+
+ def test_identifier_list(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'select foo, bar, baz from table1, table2 where 1 = 2'
+ self.ndiffAssertEqual(f(s), '\n'.join(['select foo,',
+ ' bar,',
+ ' baz',
+ 'from table1,',
+ ' table2',
+ 'where 1 = 2']))
+
+ def test_case(self):
+ f = lambda sql: sqlparse.format(sql, reindent=True)
+ s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end'
+ self.ndiffAssertEqual(f(s), '\n'.join(['case when foo = 1 then 2',
+ ' when foo = 3 then 4',
+ ' else 5',
+ 'end']))
+
+
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
new file mode 100644
index 0000000..fc3bea5
--- /dev/null
+++ b/tests/test_grouping.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+import sqlparse
+from sqlparse import tokens as T
+from sqlparse.engine.grouping import *
+
+from tests.utils import TestCaseBase
+
+
+class TestGrouping(TestCaseBase):
+
+ def test_parenthesis(self):
+ s ='x1 (x2 (x3) x2) foo (y2) bar'
+ parsed = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, str(parsed))
+ self.assertEqual(len(parsed.tokens), 9)
+ self.assert_(isinstance(parsed.tokens[2], Parenthesis))
+ self.assert_(isinstance(parsed.tokens[-3], Parenthesis))
+ self.assertEqual(len(parsed.tokens[2].tokens), 7)
+ self.assert_(isinstance(parsed.tokens[2].tokens[3], Parenthesis))
+ self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
+
+ def test_comments(self):
+ s = '/*\n * foo\n */ \n bar'
+ parsed = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, unicode(parsed))
+ self.assertEqual(len(parsed.tokens), 2)
+
+ def test_identifiers(self):
+ s = 'select foo.bar from "myscheme"."table" where fail. order'
+ parsed = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, parsed.to_unicode())
+ self.assert_(isinstance(parsed.tokens[2], Identifier))
+ self.assert_(isinstance(parsed.tokens[6], Identifier))
+ self.assert_(isinstance(parsed.tokens[8], Where))
+ s = 'select * from foo where foo.id = 1'
+ parsed = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, parsed.to_unicode())
+ self.assert_(isinstance(parsed.tokens[-1].tokens[2], Identifier))
+ s = 'select * from (select "foo"."id" from foo)'
+ parsed = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, parsed.to_unicode())
+ self.assert_(isinstance(parsed.tokens[-1].tokens[3], Identifier))
+
+ def test_where(self):
+ s = 'select * from foo where bar = 1 order by id desc'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertTrue(len(p.tokens), 16)
+ s = 'select x from (select y from foo where bar = 1) z'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertTrue(isinstance(p.tokens[-3].tokens[-1], Where))
+
+ def test_typecast(self):
+ s = 'select foo::integer from bar'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[2].get_typecast(), 'integer')
+ self.assertEqual(p.tokens[2].get_name(), 'foo')
+ s = 'select (current_database())::information_schema.sql_identifier'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[2].get_typecast(),
+ 'information_schema.sql_identifier')
+
+ def test_alias(self):
+ s = 'select foo as bar from mytable'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[2].get_real_name(), 'foo')
+ self.assertEqual(p.tokens[2].get_alias(), 'bar')
+ s = 'select foo from mytable t1'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[6].get_real_name(), 'mytable')
+ self.assertEqual(p.tokens[6].get_alias(), 't1')
+ s = 'select foo::integer as bar from mytable'
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[2].get_alias(), 'bar')
+ s = ('SELECT DISTINCT '
+ '(current_database())::information_schema.sql_identifier AS view')
+ p = sqlparse.parse(s)[0]
+ self.ndiffAssertEqual(s, p.to_unicode())
+ self.assertEqual(p.tokens[4].get_alias(), 'view')
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..536b6f6
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+"""Tests sqlparse function."""
+
+from utils import TestCaseBase
+
+import sqlparse
+
+
+class SQLParseTest(TestCaseBase):
+ """Tests sqlparse.parse()."""
+
+ def test_tokenize(self):
+ sql = 'select * from foo;'
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 1)
+ self.assertEqual(str(stmts[0]), sql)
+
+ def test_multistatement(self):
+ sql1 = 'select * from foo;'
+ sql2 = 'select * from bar;'
+ stmts = sqlparse.parse(sql1+sql2)
+ self.assertEqual(len(stmts), 2)
+ self.assertEqual(str(stmts[0]), sql1)
+ self.assertEqual(str(stmts[1]), sql2)
+
+ def test_newlines(self):
+ sql = u'select\n*from foo;'
+ p = sqlparse.parse(sql)[0]
+ self.assertEqual(unicode(p), sql)
+ sql = u'select\r\n*from foo'
+ p = sqlparse.parse(sql)[0]
+ self.assertEqual(unicode(p), sql)
+ sql = u'select\r*from foo'
+ p = sqlparse.parse(sql)[0]
+ self.assertEqual(unicode(p), sql)
+ sql = u'select\r\n*from foo\n'
+ p = sqlparse.parse(sql)[0]
+ self.assertEqual(unicode(p), sql)
diff --git a/tests/test_split.py b/tests/test_split.py
new file mode 100644
index 0000000..782b226
--- /dev/null
+++ b/tests/test_split.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# Tests splitting functions.
+
+import unittest
+
+from utils import load_file, TestCaseBase
+
+import sqlparse
+
+
+class SQLSplitTest(TestCaseBase):
+ """Tests sqlparse.sqlsplit()."""
+
+ _sql1 = 'select * from foo;'
+ _sql2 = 'select * from bar;'
+
+ def test_split_semicolon(self):
+ sql2 = 'select * from foo where bar = \'foo;bar\';'
+ stmts = sqlparse.parse(''.join([self._sql1, sql2]))
+ self.assertEqual(len(stmts), 2)
+ self.ndiffAssertEqual(unicode(stmts[0]), self._sql1)
+ self.ndiffAssertEqual(unicode(stmts[1]), sql2)
+
+ def test_create_function(self):
+ sql = load_file('function.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 1)
+ self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+ def test_create_function_psql(self):
+ sql = load_file('function_psql.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 1)
+ self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+ def test_create_function_psql3(self):
+ sql = load_file('function_psql3.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 1)
+ self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+ def test_create_function_psql2(self):
+ sql = load_file('function_psql2.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 1)
+ self.ndiffAssertEqual(unicode(stmts[0]), sql)
+
+ def test_dashcomments(self):
+ sql = load_file('dashcomment.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 3)
+ self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+ def test_begintag(self):
+ sql = load_file('begintag.sql')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 3)
+ self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+ def test_dropif(self):
+ sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;'
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 2)
+ self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+ def test_comment_with_umlaut(self):
+ sql = (u'select * from foo;\n'
+ u'-- Testing an umlaut: ä\n'
+ u'select * from bar;')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 2)
+ self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+
+ def test_comment_end_of_line(self):
+ sql = ('select * from foo; -- foo\n'
+ 'select * from bar;')
+ stmts = sqlparse.parse(sql)
+ self.assertEqual(len(stmts), 2)
+ self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
+ # make sure the comment belongs to first query
+ self.ndiffAssertEqual(unicode(stmts[0]), 'select * from foo; -- foo\n')
+
+ def test_casewhen(self):
+ sql = ('SELECT case when val = 1 then 2 else null end as foo;\n'
+ 'comment on table actor is \'The actor table.\';')
+ stmts = sqlparse.split(sql)
+ self.assertEqual(len(stmts), 2)
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
new file mode 100644
index 0000000..7106b3c
--- /dev/null
+++ b/tests/test_tokenize.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import types
+
+from sqlparse import lexer
+from sqlparse.tokens import *
+
+
+class TestTokenize(unittest.TestCase):
+
+ def test_simple(self):
+ sql = 'select * from foo;'
+ stream = lexer.tokenize(sql)
+ self.assert_(type(stream) is types.GeneratorType)
+ tokens = list(stream)
+ self.assertEqual(len(tokens), 8)
+ self.assertEqual(len(tokens[0]), 2)
+ self.assertEqual(tokens[0], (Keyword.DML, u'select'))
+ self.assertEqual(tokens[-1], (Punctuation, u';'))
+
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..a78b460
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+"""Helpers for testing."""
+
+import codecs
+import difflib
+import os
+import unittest
+from StringIO import StringIO
+
+NL = '\n'
+DIR_PATH = os.path.abspath(os.path.dirname(__file__))
+PARENT_DIR = os.path.dirname(DIR_PATH)
+FILES_DIR = os.path.join(DIR_PATH, 'files')
+
+
+def load_file(filename, encoding='utf-8'):
+ """Opens filename with encoding and return it's contents."""
+ f = codecs.open(os.path.join(FILES_DIR, filename), 'r', encoding)
+ data = f.read()
+ f.close()
+ return data
+
+
+class TestCaseBase(unittest.TestCase):
+ """Base class for test cases with some additional checks."""
+
+ # Adopted from Python's tests.
+ def ndiffAssertEqual(self, first, second):
+ """Like failUnlessEqual except use ndiff for readable output."""
+ if first <> second:
+ sfirst = unicode(first)
+ ssecond = unicode(second)
+ diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines())
+ fp = StringIO()
+ print >> fp, NL, NL.join(diff)
+ print fp.getvalue()
+ raise self.failureException, fp.getvalue()