diff options
87 files changed, 5905 insertions, 0 deletions
@@ -0,0 +1,5 @@ +python-sqlparse is written and maintained by Andi Albrecht <albrecht.andi@gmail.com>. + +This module contains code (namely the lexer and filter mechanism) from +the pygments project that was written by Georg Brandl. + @@ -0,0 +1,3 @@ +Release 0.1.0 (In Development) +------------------------------ + * Initial.
\ No newline at end of file @@ -0,0 +1,25 @@ +Copyright (c) 2008, Andi Albrecht <albrecht.andi@gmail.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the authors nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..60ab652 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include COPYING +include test.py +recursive-include docs *.rst
\ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..868043f --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +# Makefile to simplify some common development tasks. +# Run 'make help' for a list of commands. + +PYTHON=`which python` + +default: help + +help: + @echo "Available commands:" + @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort + +test: + $(PYTHON) tests/run_tests.py + +clean: + $(PYTHON) setup.py clean + find . -name '*.pyc' -delete + find . -name '*~' -delete
\ No newline at end of file @@ -0,0 +1,45 @@ +python-sqlparse - Parse SQL statements +====================================== + +sqlparse currently support only one feature: You can split a series +of SQL statements into single statements. + +Parts of the code are based on pygments written by Georg Brandl and others. +pygments-Homepage: http://pygments.org/ + +Run Tests +--------- + + python test/run_tests.py + + +Create Documentation +-------------------- + +You need `Sphinx`_ installed on your system to build the documentation. + + cd docs/ + make html + + +Install +------- + +Run + + python setup.py install + +with root privileges to install python-sqlparse on your system. + + +Links +----- + +Source code: http://github.com/andialbrecht/python-sqlparse + + +python-sqlparse is licensed under the BSD license: +http://github.com/andialbrecht/python-sqlparse + + +.. _Sphinx: http://sphinx.pocoo.org/ @@ -0,0 +1 @@ +- Add GQL dialect.
\ No newline at end of file diff --git a/bin/sqlformat b/bin/sqlformat new file mode 100755 index 0000000..8d219a5 --- /dev/null +++ b/bin/sqlformat @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +import optparse +import os +import sys + +import sqlparse + + +_CASE_CHOICES = ['upper', 'lower', 'capitalize'] + + +parser = optparse.OptionParser(usage='%prog [OPTIONS] FILE, ...', + version='%%prog %s' % sqlparse.__version__) +parser.set_description(('Format FILE according to OPTIONS. Use "-" as FILE ' + 'to read from stdin.')) +parser.add_option('-v', '--verbose', dest='verbose', action='store_true') +parser.add_option('-o', '--outfile', dest='outfile', metavar='FILE', + help='write output to FILE (defaults to stdout)') +group = parser.add_option_group('Formatting Options') +group.add_option('-k', '--keywords', metavar='CHOICE', + dest='keyword_case', choices=_CASE_CHOICES, + help=('change case of keywords, CHOICE is one of %s' + % ', '.join('"%s"' % x for x in _CASE_CHOICES))) +group.add_option('-i', '--identifiers', metavar='CHOICE', + dest='identifier_case', choices=_CASE_CHOICES, + help=('change case of identifiers, CHOICE is one of %s' + % ', '.join('"%s"' % x for x in _CASE_CHOICES))) +group.add_option('-l', '--language', metavar='LANG', + dest='output_format', choices=['python', 'php'], + help=('output a snippet in programming language LANG, ' + 'choices are "python", "php"')) +group.add_option('--strip-comments', dest='strip_comments', + action='store_true', default=False, + help='remove comments') +group.add_option('-r', '--reindent', dest='reindent', + action='store_true', default=False, + help='reindent statements') +group.add_option('--indent_width', dest='indent_width', default=2, + help='indentation width (defaults to 2 spaces)') + +_FORMATTING_GROUP = group + + +def _error(msg, exit_=None): + """Print msg and optionally exit with return code exit_.""" + print >>sys.stderr, '[ERROR] %s' % msg + if exit_ is not None: + sys.exit(exit_) + + +def _build_formatter_opts(options): + """Convert command line options to dictionary.""" + d = {} + for option in _FORMATTING_GROUP.option_list: + d[option.dest] = getattr(options, option.dest) + return d + + +def main(): + options, args = parser.parse_args() + if options.verbose: + print >>sys.stderr, 'Verbose mode' + + if len(args) != 1: + _error('No input data.') + parser.print_usage() + sys.exit(1) + + if '-' in args: # read from stdin + data = sys.stdin.read() + else: + try: + data = '\n'.join(open(args[0]).readlines()) + except OSError, err: + _error('Failed to read %s: %s' % (args[0], err), exit_=1) + + if options.outfile: + try: + stream = open(options.outfile, 'w') + except OSError, err: + _error('Failed to open %s: %s' % (options.outfile, err), exit_=1) + else: + stream = sys.stdout + + formatter_opts = _build_formatter_opts(options) + try: + formatter_opts = sqlparse.formatter.validate_options(formatter_opts) + except sqlparse.SQLParseError, err: + _error('Invalid options: %s' % err, exit_=1) + + stream.write(sqlparse.format(data, **formatter_opts).encode('utf-8', + 'replace')) + stream.flush() + + +if __name__ == '__main__': + main() diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..cb23107 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,88 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf build/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html + @echo + @echo "Build finished. The HTML pages are in build/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) build/dirhtml + @echo + @echo "Build finished. The HTML pages are in build/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in build/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) build/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in build/qthelp, like this:" + @echo "# qcollectiongenerator build/qthelp/python-sqlparse.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile build/qthelp/python-sqlparse.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex + @echo + @echo "Build finished; the LaTeX files are in build/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes + @echo + @echo "The overview file is in build/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in build/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) build/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in build/doctest/output.txt." diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..3bce389 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,12 @@ +:mod:`sqlparse` -- Parse SQL statements +======================================= + +The :mod:`sqlparse` module provides the following functions on module-level. + +.. autofunction:: sqlparse.split + +.. autofunction:: sqlparse.format + +.. autofunction:: sqlparse.parse + + diff --git a/docs/source/changes.rst b/docs/source/changes.rst new file mode 100644 index 0000000..5811c57 --- /dev/null +++ b/docs/source/changes.rst @@ -0,0 +1,7 @@ +.. _changes: + +Changes in python-sqlparse +========================== + +.. include:: ../../CHANGES + diff --git a/docs/source/cmdline.rst b/docs/source/cmdline.rst new file mode 100644 index 0000000..93a35a7 --- /dev/null +++ b/docs/source/cmdline.rst @@ -0,0 +1,8 @@ +``sqlformat`` -- Command Line Script +=========================================== + +The :mod:`sqlparse` module is shipped with the script +:program:`sqlformat` which provides a command line interface to the formatting +functions. + +.. todo:: Describe options and example usage. diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..b84245a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# python-sqlparse documentation build configuration file, created by +# sphinx-quickstart on Thu Feb 26 08:19:28 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath('../../')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', + 'sphinx.ext.autosummary'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'python-sqlparse' +copyright = u'2009, Andi Albrecht' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.1.0' +# The full version, including alpha/beta/rc tags. +release = '0.1.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = [os.path.abspath('../')] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'python-sqlparsedoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'python-sqlparse.tex', ur'python-sqlparse Documentation', + ur'Andi Albrecht', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True +todo_include_todos = True diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..40e99e0 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,26 @@ +.. python-sqlparse documentation master file, created by + sphinx-quickstart on Thu Feb 26 08:19:28 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to python-sqlparse's documentation! +=========================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + api + cmdline + changes + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/intro.rst b/docs/source/intro.rst new file mode 100644 index 0000000..3d7a888 --- /dev/null +++ b/docs/source/intro.rst @@ -0,0 +1,89 @@ +Introduction +============ + +:mod:`sqlparse` is a non-validating SQL parser for Python. + +It provides support for parsing, splitting and formatting SQL statements. + +:mod:`sqlparse` is released under the terms of the +`New BSD license <http://www.opensource.org/licenses/bsd-license.php>`_. + +Visit http://sqlformat.appspot.com to try it's formatting features. + + +Download & Installation +----------------------- + +To download and install :mod:`sqlparse` on your system run the following +commands: + +.. code-block:: bash + + $ git clone git://github.com/andialbrecht/python-sqlparse.git + $ cd python-sqlparse.git/ + $ sudo python setup.py install + +A tarball of the current sources is available under the following URL: +http://github.com/andialbrecht/python-sqlparse/tarball/master + + +Example Usage +------------- + +Here are some usage examples of this module. + +Splitting statements:: + + >>> import sqlparse + >>> sql = 'select * from foo; select * from bar;' + >>> sqlparse.split(sql) + <<< [u'select * from foo; ', u'select * from bar;'] + +Formatting statemtents:: + + >>> sql = 'select * from foo where id in (select id from bar);' + >>> print sqlparse.format(sql, reindent=True, keyword_case='upper') + SELECT * + FROM foo + WHERE id IN + (SELECT id + FROM bar); + +Now, let's have a deeper look at the internals:: + + >>> sql = 'select * from "someschema"."mytable" where id = 1' + >>> pared = sqlparse.parse(sql) + >>> pared + <<< (<Statement 'select...' at 0x9ad08ec>,) + >>> stmt = parsed[0] + >>> stmt.to_unicode() # converting it back to unicode + <<< u'select * from "someschema"."mytable" where id = 1' + >>> # This is how the internal representation looks like: + >>> stmt.tokens + <<< + (<DML 'select' at 0x9b63c34>, + <Whitespace ' ' at 0x9b63e8c>, + <Operator '*' at 0x9b63e64>, + <Whitespace ' ' at 0x9b63c5c>, + <Keyword 'from' at 0x9b63c84>, + <Whitespace ' ' at 0x9b63cd4>, + <Identifier '"somes...' at 0x9b5c62c>, + <Whitespace ' ' at 0x9b63f04>, + <Where 'where ...' at 0x9b5caac>) + >>> + + +.. todo:: Describe general concepts + Why non-validating? Processing stages (tokens, groups,...), filter, + + +Development & Contributing +-------------------------- + +Please file bug reports and feature requests on the project site at +http://code.google.com/p/python-sqlparse/issues/entry or if you have +code to contribute upload it to http://codereview.appspot.com and +add albrecht.andi@googlemail.com as reviewer. + +For more information about the review tool and how to use it visit +it's project page: http://code.google.com/p/rietveld. diff --git a/docs/tango/static/bgfooter.png b/docs/tango/static/bgfooter.png Binary files differnew file mode 100644 index 0000000..9ce5bdd --- /dev/null +++ b/docs/tango/static/bgfooter.png diff --git a/docs/tango/static/bgtop.png b/docs/tango/static/bgtop.png Binary files differnew file mode 100644 index 0000000..a0d4709 --- /dev/null +++ b/docs/tango/static/bgtop.png diff --git a/docs/tango/static/default.css_t b/docs/tango/static/default.css_t new file mode 100644 index 0000000..c86759a --- /dev/null +++ b/docs/tango/static/default.css_t @@ -0,0 +1,90 @@ +/** + * Sphinx stylesheet -- default theme + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +@import url("basic.css"); + +body { + color: #000000; + font-family: "Free Sans", Arial, Verdana, sans; + font-size: 11pt; + width: 70em; + margin: 0; + margin-left: auto; + margin-right: auto; +} + + +div.related { + background: #eeeeec; /* #d3d7cf; */ + color: white; + padding-bottom: .6em; + padding-top: .6em; +} + +div.related a { + color: #2e3436; + text-decoration: none; +} + +div.document { + background: #eeeeec; +} + +div.body { + background: white; + padding: .5em; + -moz-border-radius: 0px 10px 10px 10px; + border-right: 1px solid #eeeeec; +} + +div.document h1, h2, h3, h4 { + color: #204a87; + font-weight: normal; + letter-spacing: .05em; +} + +div.document p, dl, div.highlight { + margin-left: 25px; +} + +div.document dd p, dd { + margin-left: .5em; +} + +div.document a { + color: #f57900; +} + +div.sphinxsidebarwrapper { + background: url(bgfooter.png) top left repeat-x #eeeeec; + padding-right: .5em; +} + +div.sphinxsidebar ul { + margin: 0; + padding: 0; +} + +div.sphinxsidebar a { + color: #888a85; + text-decoration: none; +} + +div.footer { + font-size: .8em; + color: #888a85; + text-align: right; + padding: 10px; +} + +div.footer a { + color: #888a85; +} + +div.highlight pre { + background-color: #eeeeec; + border: 1px solid #babdb6; + padding: 7px; +}
\ No newline at end of file diff --git a/docs/tango/theme.conf b/docs/tango/theme.conf new file mode 100644 index 0000000..812330f --- /dev/null +++ b/docs/tango/theme.conf @@ -0,0 +1,28 @@ +[theme] +inherit = basic +stylesheet = default.css +pygments_style = sphinx + +[options] +rightsidebar = false +stickysidebar = false + +footerbgcolor = #11303d +footertextcolor = #ffffff +sidebarbgcolor = #1c4e63 +sidebartextcolor = #ffffff +sidebarlinkcolor = #98dbcc +relbarbgcolor = #133f52 +relbartextcolor = #ffffff +relbarlinkcolor = #ffffff +bgcolor = #ffffff +textcolor = #000000 +headbgcolor = #f2f2f2 +headtextcolor = #20435c +headlinkcolor = #c60f0f +linkcolor = #355f7c +codebgcolor = #eeffcc +codetextcolor = #333333 + +bodyfont = sans-serif +headfont = 'Trebuchet MS', sans-serif diff --git a/extras/appengine/Makefile b/extras/appengine/Makefile new file mode 100644 index 0000000..28380cf --- /dev/null +++ b/extras/appengine/Makefile @@ -0,0 +1,51 @@ +# Makefile to simplify some common AppEngine actions. +# Use 'make help' for a list of commands. + +PYTHON=`which python2.5` +DEV_APPSERVER=$(PYTHON) `which dev_appserver.py` +APPCFG=$(PYTHON) `which appcfg.py` +PORT=8080 + + +default: help + +help: + @echo "Available commands:" + @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile | sort + +serve: + $(DEV_APPSERVER) --port=$(PORT) . + +serve_remote: + $(DEV_APPSERVER) --port=$(PORT) --address 0.0.0.0 . + +serve_email: + $(DEV_APPSERVER) --port=$(PORT) --enable_sendmail . + +serve_remote_email: + $(DEV_APPSERVER) --port=$(PORT) --enable_sendmail --address 0.0.0.0 . + +release: make_release.sh django/.svn + sh make_release.sh + +update: release + $(APPCFG) update release + +upload: update + +update_indexes: + $(APPCFG) update_indexes . + +vacuum_indexes: + $(APPCFG) vacuum_indexes . + +all: django pygments sqlparse + +django: + svn co http://code.djangoproject.com/svn/django/trunk/django + +pygments: + ln -s `python -c "import pygments,os; print os.path.dirname(pygments.__file__)"` . + +sqlparse: + ln -s ../../sqlparse . diff --git a/extras/appengine/README b/extras/appengine/README new file mode 100644 index 0000000..4762faa --- /dev/null +++ b/extras/appengine/README @@ -0,0 +1,22 @@ +gae-sqlformat - An SQL formatting tool runnging on App Engine +============================================================= + + +To set up this application run + + make all + +This command fetches Django from Subversion and symlinks Pygments +and sqlparse. Note: You'll need Pygments installed somewhere in your +PYTHONPATH. + +For a manual setup have a look at the Makefile ;-) + + +To run the development server run + + make serve + + +Homepage: http://sqlformat.appspot.com + diff --git a/extras/appengine/__init__.py b/extras/appengine/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/extras/appengine/__init__.py diff --git a/extras/appengine/app.yaml b/extras/appengine/app.yaml new file mode 100644 index 0000000..6aa08b2 --- /dev/null +++ b/extras/appengine/app.yaml @@ -0,0 +1,27 @@ +application: sqlformat +version: 2 +runtime: python +api_version: 1 + +default_expiration: 7d # This is good for images, which never change + +handlers: + +- url: /(robots.txt|favicon.ico) + static_files: static/\1 + upload: static/(robots.txt|favicon.ico) + +- url: /google7a062e78b56854c0.html + static_files: static/robots.txt + upload: static/robots.txt + +- url: /static/(script.js|styles.css|upload.py) + static_files: static/\1 + upload: static/(script.js|styles.css|upload.py) + expiration: 1h # Shorter expiration, these change often + +- url: /static + static_dir: static + +- url: .* + script: main.py diff --git a/extras/appengine/examples/customers.sql b/extras/appengine/examples/customers.sql new file mode 100644 index 0000000..8b73850 --- /dev/null +++ b/extras/appengine/examples/customers.sql @@ -0,0 +1 @@ +USE mydatabase;SELECT orders.customer, orders.day_of_order, orders.product, orders.quantity as number_ordered, inventory.quantity as number_instock, inventory.price FROM orders JOIN inventory ON orders.product = inventory.product;
\ No newline at end of file diff --git a/extras/appengine/examples/multiple_inserts.sql b/extras/appengine/examples/multiple_inserts.sql new file mode 100644 index 0000000..cf49d5d --- /dev/null +++ b/extras/appengine/examples/multiple_inserts.sql @@ -0,0 +1 @@ +insert into customer (id, name) values (1, 'John');insert into customer (id, name) values (2, 'Jack');insert into customer (id, name) values (3, 'Jane');insert into customer (id, name) values (4, 'Jim');insert into customer (id, name) values (5, 'Jerry');insert into customer (id, name) values (1, 'Joe');
\ No newline at end of file diff --git a/extras/appengine/examples/pg_view.sql b/extras/appengine/examples/pg_view.sql new file mode 100644 index 0000000..edf9f06 --- /dev/null +++ b/extras/appengine/examples/pg_view.sql @@ -0,0 +1 @@ +SELECT DISTINCT (current_database())::information_schema.sql_identifier AS view_catalog, (nv.nspname)::information_schema.sql_identifier AS view_schema, (v.relname)::information_schema.sql_identifier AS view_name, (current_database())::information_schema.sql_identifier AS table_catalog, (nt.nspname)::information_schema.sql_identifier AS table_schema, (t.relname)::information_schema.sql_identifier AS table_name FROM pg_namespace nv, pg_class v, pg_depend dv, pg_depend dt, pg_class t, pg_namespace nt WHERE ((((((((((((((nv.oid = v.relnamespace) AND (v.relkind = 'v'::"char")) AND (v.oid = dv.refobjid)) AND (dv.refclassid = ('pg_class'::regclass)::oid)) AND (dv.classid = ('pg_rewrite'::regclass)::oid)) AND (dv.deptype = 'i'::"char")) AND (dv.objid = dt.objid)) AND (dv.refobjid <> dt.refobjid)) AND (dt.classid = ('pg_rewrite'::regclass)::oid)) AND (dt.refclassid = ('pg_class'::regclass)::oid)) AND (dt.refobjid = t.oid)) AND (t.relnamespace = nt.oid)) AND (t.relkind = ANY (ARRAY['r'::"char", 'v'::"char"]))) AND pg_has_role(t.relowner, 'USAGE'::text)) ORDER BY (current_database())::information_schema.sql_identifier, (nv.nspname)::information_schema.sql_identifier, (v.relname)::information_schema.sql_identifier, (current_database())::information_schema.sql_identifier, (nt.nspname)::information_schema.sql_identifier, (t.relname)::information_schema.sql_identifier; diff --git a/extras/appengine/examples/subquery.sql b/extras/appengine/examples/subquery.sql new file mode 100644 index 0000000..dd4bbc1 --- /dev/null +++ b/extras/appengine/examples/subquery.sql @@ -0,0 +1 @@ +select sum(a1.Sales) from Store_Information a1 where a1.Store_name in (select store_name from Geography a2 where a2.store_name = a1.store_name);
\ No newline at end of file diff --git a/extras/appengine/examples/subquery2.sql b/extras/appengine/examples/subquery2.sql new file mode 100644 index 0000000..6c00a87 --- /dev/null +++ b/extras/appengine/examples/subquery2.sql @@ -0,0 +1 @@ +select user_id, count(*) as how_many from bboard where not exists (select 1 from bboard_authorized_maintainers bam where bam.user_id = bboard.user_id) and posting_time + 60 > sysdate group by user_id order by how_many desc;
\ No newline at end of file diff --git a/extras/appengine/index.yaml b/extras/appengine/index.yaml new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/extras/appengine/index.yaml diff --git a/extras/appengine/main.py b/extras/appengine/main.py new file mode 100644 index 0000000..d0a8418 --- /dev/null +++ b/extras/appengine/main.py @@ -0,0 +1,131 @@ +# Copyright 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main program for Rietveld. + +This is also a template for running a Django app under Google App +Engine, especially when using a newer version of Django than provided +in the App Engine standard library. + +The site-specific code is all in other files: urls.py, models.py, +views.py, settings.py. +""" + +# Standard Python imports. +import os +import sys +import logging + +# Log a message each time this module get loaded. +logging.info('Loading %s, app version = %s', + __name__, os.getenv('CURRENT_VERSION_ID')) + +# Delete the preloaded copy of Django. +for key in [key for key in sys.modules if key.startswith('django')]: + del sys.modules[key] + +os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' + +# Force sys.path to have our own directory first, so we can import from it. +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) + +# Import Django from a zipfile. +sys.path.insert(0, os.path.abspath('django.zip')) + +# Fail early if we can't import Django. Log identifying information. +import django +logging.info('django.__file__ = %r, django.VERSION = %r', + django.__file__, django.VERSION) +assert django.VERSION[0] >= 1, "This Django version is too old" + +# AppEngine imports. +from google.appengine.ext.webapp import util + + +# Helper to enter the debugger. This passes in __stdin__ and +# __stdout__, because stdin and stdout are connected to the request +# and response streams. You must import this from __main__ to use it. +# (I tried to make it universally available via __builtin__, but that +# doesn't seem to work for some reason.) +def BREAKPOINT(): + import pdb + p = pdb.Pdb(None, sys.__stdin__, sys.__stdout__) + p.set_trace() + + +# Custom Django configuration. +from django.conf import settings +settings._target = None + +# Import various parts of Django. +import django.core.handlers.wsgi +import django.core.signals +import django.db +import django.dispatch.dispatcher +import django.forms + +# Work-around to avoid warning about django.newforms in djangoforms. +django.newforms = django.forms + + +def log_exception(*args, **kwds): + """Django signal handler to log an exception.""" + cls, err = sys.exc_info()[:2] + logging.exception('Exception in request: %s: %s', cls.__name__, err) + + +# Log all exceptions detected by Django. +django.core.signals.got_request_exception.connect(log_exception) + +# Unregister Django's default rollback event handler. +django.core.signals.got_request_exception.disconnect( + django.db._rollback_on_exception) + + +def real_main(): + """Main program.""" + # Create a Django application for WSGI. + application = django.core.handlers.wsgi.WSGIHandler() + # Run the WSGI CGI handler with that application. + util.run_wsgi_app(application) + + +def profile_main(): + """Main program for profiling.""" + import cProfile + import pstats + import StringIO + + prof = cProfile.Profile() + prof = prof.runctx('real_main()', globals(), locals()) + stream = StringIO.StringIO() + stats = pstats.Stats(prof, stream=stream) + # stats.strip_dirs() # Don't; too many modules are named __init__.py. + stats.sort_stats('time') # 'time', 'cumulative' or 'calls' + stats.print_stats() # Optional arg: how many to print + # The rest is optional. + # stats.print_callees() + # stats.print_callers() + print '\n<hr>' + print '<h1>Profile</h1>' + print '<pre>' + print stream.getvalue()[:1000000] + print '</pre>' + +# Set this to profile_main to enable profiling. +main = real_main + + +if __name__ == '__main__': + main() diff --git a/extras/appengine/make_release.sh b/extras/appengine/make_release.sh new file mode 100755 index 0000000..adb8a5c --- /dev/null +++ b/extras/appengine/make_release.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +# Script to create a "release" subdirectory. This is a subdirectory +# containing a bunch of symlinks, from which the app can be updated. +# The main reason for this is to import Django from a zipfile, which +# saves dramatically in upload time: statting and computing the SHA1 +# for 1000s of files is slow. Even if most of those files don't +# actually need to be uploaded, they still add to the work done for +# each update. + +ZIPFILE=django.zip +RELEASE=release +FILES="app.yaml index.yaml __init__.py main.py settings.py" +DIRS="static templates sqlparse pygments sqlformat examples" + +# Remove old $ZIPFILE file. +rm -rf $ZIPFILE + +# Create new $ZIPFILE file. +# We prune: +# - .svn subdirectories for obvious reasons. +# - contrib/gis/ and related files because it's huge and unneeded. +# - *.po and *.mo files because they are bulky and unneeded. +# - *.pyc and *.pyo because they aren't used by App Engine anyway. +zip -q $ZIPFILE `find django/ \ + -name .svn -prune -o \ + -name gis -prune -o \ + -name admin -prune -o \ + -name localflavor -prune -o \ + -name mysql -prune -o \ + -name mysql_old -prune -o \ + -name oracle -prune -o \ + -name postgresql-prune -o \ + -name postgresql_psycopg2 -prune -o \ + -name sqlite3 -prune -o \ + -name test -prune -o \ + -type f ! -name \*.py[co] ! -name \*.[pm]o -print` + +# Remove old $RELEASE directory. +rm -rf $RELEASE + +# Create new $RELEASE directory. +mkdir $RELEASE + +# Create symbolic links. +for x in $FILES $DIRS $ZIPFILE +do + ln -s ../$x $RELEASE/$x +done diff --git a/extras/appengine/settings.py b/extras/appengine/settings.py new file mode 100644 index 0000000..fad6bb5 --- /dev/null +++ b/extras/appengine/settings.py @@ -0,0 +1,37 @@ +# Copyright 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Minimal Django settings.""" + +import os + +APPEND_SLASH = False +DEBUG = os.environ['SERVER_SOFTWARE'].startswith('Dev') +INSTALLED_APPS = ( + 'sqlformat', +) +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.middleware.http.ConditionalGetMiddleware', +# 'codereview.middleware.AddUserToRequestMiddleware', +) +ROOT_URLCONF = 'sqlformat.urls' +TEMPLATE_CONTEXT_PROCESSORS = () +TEMPLATE_DEBUG = DEBUG +TEMPLATE_DIRS = ( + os.path.join(os.path.dirname(__file__), 'templates'), + ) +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.load_template_source', + ) diff --git a/extras/appengine/sqlformat/__init__.py b/extras/appengine/sqlformat/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/extras/appengine/sqlformat/__init__.py diff --git a/extras/appengine/sqlformat/urls.py b/extras/appengine/sqlformat/urls.py new file mode 100644 index 0000000..c83290e --- /dev/null +++ b/extras/appengine/sqlformat/urls.py @@ -0,0 +1,11 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns( + 'sqlformat.views', + (r'^$', 'index'), + (r'^source/$', 'source'), + (r'^about/$', 'about'), + (r'^api/$', 'api'), + (r'^format/$', 'format'), + (r'^load_example', 'load_example'), +) diff --git a/extras/appengine/sqlformat/views.py b/extras/appengine/sqlformat/views.py new file mode 100644 index 0000000..d135c44 --- /dev/null +++ b/extras/appengine/sqlformat/views.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- + +import logging +import md5 +import os +import time + +from django import forms +from django.http import HttpResponse +from django.shortcuts import render_to_response +from django.utils import simplejson as json + +from google.appengine.api import users + +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import SqlLexer, PythonLexer, PhpLexer + +import sqlparse + + +INITIAL_SQL = "select * from foo join bar on val1 = val2 where id = 123;" +EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), '../examples') + +def _get_user_image(user): + if user is None: + return None + digest = md5.new(user.email().lower()).hexdigest() + if os.environ['SERVER_SOFTWARE'].startswith('Dev'): + host = 'localhost%3A8080' + else: + host = 'sqlformat.appspot.com' + default = 'http%3A%2F%2F'+host+'%2Fstatic%2Fblank.gif' + return 'http://gravatar.com/avatar/%s?s=32&d=%s' % (digest, default) + +def _get_examples(): + fnames = os.listdir(EXAMPLES_DIR) + fnames.sort() + return fnames + + +class FormOptions(forms.Form): + data = forms.CharField(widget=forms.Textarea({'class': 'resizable'}), + initial=INITIAL_SQL, required=False) + datafile = forms.FileField(required=False) + highlight = forms.BooleanField(initial=True, required=False, + widget=forms.CheckboxInput(), + label='Enable syntax highlighting') + remove_comments = forms.BooleanField(initial=False, required=False, + widget=forms.CheckboxInput(), + label='Remove comments') + keyword_case = forms.CharField( + widget=forms.Select(choices=(('', 'Unchanged'), + ('lower', 'Lower case'), + ('upper', 'Upper case'), + ('capitalize', 'Capitalize'))), + required=False, initial='upper', label='Keywords') + identifier_case = forms.CharField( + widget=forms.Select(choices=(('', 'Unchanged'), + ('lower', 'Lower case'), + ('upper', 'Upper case'), + ('capitalize', 'Capitalize'))), + required=False, initial='', label='Identifiers') + n_indents = forms.IntegerField(min_value=0, max_value=30, + initial=2, required=False, + label='spaces', + widget=forms.TextInput({'size': 2, + 'maxlength': 2})) +# right_margin = forms.IntegerField(min_value=10, max_value=500, +# initial=60, required=False, +# label='characters', +# widget=forms.TextInput({'size': 3, +# 'maxlength': 3})) + output_format = forms.CharField( + widget=forms.Select(choices=(('sql', 'SQL'), + ('python', 'Python'), + ('php', 'PHP'), + )), + required=False, initial='sql', label='Language') + + def clean(self): + super(FormOptions, self).clean() + data = self.cleaned_data.get('data') + logging.info(self.files) + if 'datafile' in self.files: + self._datafile = self.files['datafile'].read() + else: + self._datafile = None + if not data and not self._datafile: + raise forms.ValidationError('Whoops, I need a file or text!') + elif data and self._datafile: + raise forms.ValidationError('Whoops, I need a file OR text!') + return self.cleaned_data + + def get_data(self): + data = self.cleaned_data.get('data') + if self._datafile: + return self._datafile + else: + return data + + +def format_sql(form, format='html'): + data = form.cleaned_data + popts = {} + sql = form.get_data() + if data.get('remove_comments'): + popts['strip_comments'] = True + if data.get('keyword_case'): + popts['keyword_case'] = data.get('keyword_case') + if data.get('identifier_case'): + popts['identifier_case'] = data.get('identifier_case') + if data.get('n_indents', None) is not None: + popts['reindent'] = True + popts['indent_width'] = data.get('n_indents') + if data.get('right_margin', None) is not None: + popts['right_margin'] = data.get('right_margin') + if data.get('output_format', None) is not None: + popts['output_format'] = data.get('output_format') + sql = sqlparse.format(sql, **popts) + if format in ('html', 'json'): + if data.get('highlight', False): + if popts['output_format'] == 'python': + lexer = PythonLexer() + elif popts['output_format'] == 'php': + lexer = PhpLexer() + else: + lexer = SqlLexer() + sql = highlight(sql, lexer, HtmlFormatter()) + else: + sql = ('<textarea class="resizable" ' + 'style="height: 350px; margin-top: 1em;">%s</textarea>' + % sql) + return sql + + +def index(request): + output = None + data = {} + proc_time = None + if request.method == 'POST': + logging.debug(request.POST) + form = FormOptions(request.POST, request.FILES) + if form.is_valid(): + start = time.time() + output = format_sql(form, + format=request.POST.get('format', 'html')) + proc_time = time.time()-start + else: + form = FormOptions() + if request.POST.get('format', None) == 'json': + logging.warning(form.errors) + data['errors'] = str(form.errors) + data['output'] = output + logging.info('%r', proc_time) + data['proc_time'] = '%.3f' % proc_time or 0.0 + data = json.dumps(data) + return HttpResponse(data, content_type='text/x-json') + elif request.POST.get('format', None) == 'text': + if not form.is_valid(): + data = str(form.errors) # XXX convert to plain text + else: + data = output + return HttpResponse(data, content_type='text/plain') + return render_to_response('index.html', + {'form': form, 'output': output, + 'proc_time': proc_time and '%.3f' % proc_time or None, + 'user': users.get_current_user(), + 'login_url': users.create_login_url('/'), + 'logout_url': users.create_logout_url('/'), + 'userimg': _get_user_image(users.get_current_user()), + 'examples': _get_examples()}) + + +def format(request): + if request.method == 'POST': + form = FormOptions(request.POST) + if form.is_valid(): + response = format_sql(form, format='text') + else: + response = 'ERROR: %s' % str(form.errors) + else: + response = 'POST request required' + return HttpResponse(response, content_type='text/plain') + +def source(request): + return render_to_response('source.html') + +def about(request): + return render_to_response('about.html') + +def api(request): + return render_to_response('api.html') + +def load_example(request): + fname = request.POST.get('fname') + if fname is None: + answer = 'Uups, I\'ve got no filename...' + elif fname not in _get_examples(): + answer = 'Hmm, I think you don\'t want to do that.' + else: + answer = open(os.path.join(EXAMPLES_DIR, fname)).read() + data = json.dumps({'answer': answer}) + return HttpResponse(data, content_type='text/x-json') diff --git a/extras/appengine/static/bg_options.png b/extras/appengine/static/bg_options.png Binary files differnew file mode 100644 index 0000000..bc1a6ed --- /dev/null +++ b/extras/appengine/static/bg_options.png diff --git a/extras/appengine/static/bgfieldset.png b/extras/appengine/static/bgfieldset.png Binary files differnew file mode 100644 index 0000000..4d55f4a --- /dev/null +++ b/extras/appengine/static/bgfieldset.png diff --git a/extras/appengine/static/bgfooter.png b/extras/appengine/static/bgfooter.png Binary files differnew file mode 100644 index 0000000..9ce5bdd --- /dev/null +++ b/extras/appengine/static/bgfooter.png diff --git a/extras/appengine/static/bgtop.png b/extras/appengine/static/bgtop.png Binary files differnew file mode 100644 index 0000000..a0d4709 --- /dev/null +++ b/extras/appengine/static/bgtop.png diff --git a/extras/appengine/static/blank.gif b/extras/appengine/static/blank.gif Binary files differnew file mode 100644 index 0000000..3be2119 --- /dev/null +++ b/extras/appengine/static/blank.gif diff --git a/extras/appengine/static/canvas.html b/extras/appengine/static/canvas.html new file mode 100644 index 0000000..ab642d0 --- /dev/null +++ b/extras/appengine/static/canvas.html @@ -0,0 +1,114 @@ +<html> + +<head> +<style type="text/css"> + /* + These styles are customizable. + Provide .canvas-gadget (the div that holds the canvas mode gadget) + at least 500px width so that the gadget has sufficient screen real estate +*/ +body { + margin: 0; + font-family:arial, sans-serif; + text-align:center; +} +.container { + width:652px; + margin:0 auto; + text-align:left; +} +.fc-sign-in-header { + text-align:left; + font-size: 13px; + padding:3px 10px; + border-bottom:1px solid #000000; +} +.signin { + text-align:left; + float:right; + font-size: 13px; + height: 32px; +} +.go-back { + text-align:left; + margin:5px auto 15px auto; +} +.go-back a, .go-back a:visited { + font-weight:bold; +} +.canvas-gadget { + text-align:left; + width:650px; /* ALLOW AT LEAST 500px WIDTH*/ + margin:10px auto 10px auto; + border:1px solid #cccccc; +} +.site-header { + margin-top: 10px; +} +.section-title { + font-size: 2em; +} +.clear { + clear:both; + font-size:1px; + height:1px; + line-height:0; + margin:0; + padding:0; +} +</style> +<script type="text/javascript" src="http://www.google.com/friendconnect/script/friendconnect.js"></script> +</head> +<body> +<div class="container"> + <div class="fc-sign-in-header"> + <!--REQUIRED SO VISITORS CAN SIGN IN--> + <div class="signin" id="gadget-signin"></div> + <script type="text/javascript"> + var skin = {}; + skin['BORDER_COLOR'] = '#cccccc'; + skin['ENDCAP_BG_COLOR'] = '#e0ecff'; + skin['ENDCAP_TEXT_COLOR'] = '#333333'; + skin['ENDCAP_LINK_COLOR'] = '#0000cc'; + skin['ALTERNATE_BG_COLOR'] = '#ffffff'; + skin['CONTENT_BG_COLOR'] = '#ffffff'; + skin['CONTENT_LINK_COLOR'] = '#0000cc'; + skin['CONTENT_TEXT_COLOR'] = '#333333'; + skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc'; + skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666'; + skin['CONTENT_HEADLINE_COLOR'] = '#333333'; + skin['ALIGNMENT'] = 'right'; + google.friendconnect.container.renderCanvasSignInGadget({'id': 'gadget-signin'}, skin); + </script> + <!--END REQUIRED--> + <div class="clear"></div> + </div> + + <div class="site-header"><span class="section-title">Site Name</span></div> + <div class="go-back"> + <!--REQUIRED SO VISITORS CAN RETURN TO REFERRING PAGE--> + <a href="javascript:google.friendconnect.container.goBackToSite();"> + ‹‹ Return home</a> + <!--END REQUIRED--> + </div> + <!-- REQUIRED - THIS IS WHERE THE GADGET IS PRESENTED. ALLOW AT LEAST 500px WIDTH --> + <div id="gadget-canvas" class="canvas-gadget"></div> + <script type="text/javascript"> + var skin = {}; + skin['BORDER_COLOR'] = '#cccccc'; + skin['ENDCAP_BG_COLOR'] = '#e0ecff'; + skin['ENDCAP_TEXT_COLOR'] = '#333333'; + skin['ENDCAP_LINK_COLOR'] = '#0000cc'; + skin['ALTERNATE_BG_COLOR'] = '#ffffff'; + skin['CONTENT_BG_COLOR'] = '#ffffff'; + skin['CONTENT_LINK_COLOR'] = '#0000cc'; + skin['CONTENT_TEXT_COLOR'] = '#333333'; + skin['CONTENT_SECONDARY_LINK_COLOR'] = '#7777cc'; + skin['CONTENT_SECONDARY_TEXT_COLOR'] = '#666666'; + skin['CONTENT_HEADLINE_COLOR'] = '#333333'; + google.friendconnect.container.renderUrlCanvasGadget({'id': 'gadget-canvas'}, skin); + </script> + <!--END REQUIRED--> +</div> +</body> +</html>
\ No newline at end of file diff --git a/extras/appengine/static/hotkeys.js b/extras/appengine/static/hotkeys.js new file mode 100644 index 0000000..0e62a92 --- /dev/null +++ b/extras/appengine/static/hotkeys.js @@ -0,0 +1 @@ +(function(B){B.fn.__bind__=B.fn.bind;B.fn.__unbind__=B.fn.unbind;B.fn.__find__=B.fn.find;var A={version:"0.7.8",override:/keydown|keypress|keyup/g,triggersMap:{},specialKeys:{27:"esc",9:"tab",32:"space",13:"return",8:"backspace",145:"scroll",20:"capslock",144:"numlock",19:"pause",45:"insert",36:"home",46:"del",35:"end",33:"pageup",34:"pagedown",37:"left",38:"up",39:"right",40:"down",112:"f1",113:"f2",114:"f3",115:"f4",116:"f5",117:"f6",118:"f7",119:"f8",120:"f9",121:"f10",122:"f11",123:"f12"},shiftNums:{"`":"~","1":"!","2":"@","3":"#","4":"$","5":"%","6":"^","7":"&","8":"*","9":"(","0":")","-":"_","=":"+",";":":","'":'"',",":"<",".":">","/":"?","\\":"|"},newTrigger:function(E,D,F){var C={};C[E]={};C[E][D]={cb:F,disableInInput:false};return C}};if(B.browser.mozilla){A.specialKeys=B.extend(A.specialKeys,{96:"0",97:"1",98:"2",99:"3",100:"4",101:"5",102:"6",103:"7",104:"8",105:"9"})}B.fn.find=function(C){this.query=C;return B.fn.__find__.apply(this,arguments)};B.fn.unbind=function(H,E,G){if(B.isFunction(E)){G=E;E=null}if(E&&typeof E==="string"){var F=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();var D=H.split(" ");for(var C=0;C<D.length;C++){delete A.triggersMap[F][D[C]][E]}}return this.__unbind__(H,G)};B.fn.bind=function(J,F,K){var H=J.match(A.override);if(B.isFunction(F)||!H){return this.__bind__(J,F,K)}else{var N=null,I=B.trim(J.replace(A.override,""));if(I){N=this.__bind__(I,F,K)}if(typeof F==="string"){F={combi:F}}if(F.combi){for(var M=0;M<H.length;M++){var D=H[M];var G=F.combi.toLowerCase(),E=A.newTrigger(D,G,K),L=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();E[D][G].disableInInput=F.disableInInput;if(!A.triggersMap[L]){A.triggersMap[L]=E}else{if(!A.triggersMap[L][D]){A.triggersMap[L][D]=E[D]}}var C=A.triggersMap[L][D][G];if(!C){A.triggersMap[L][D][G]=[E[D][G]]}else{if(C.constructor!==Array){A.triggersMap[L][D][G]=[C]}else{A.triggersMap[L][D][G][C.length]=E[D][G]}}this.each(function(){var O=B(this);if(O.attr("hkId")&&O.attr("hkId")!==L){L=O.attr("hkId")+";"+L}O.attr("hkId",L)});N=this.__bind__(H.join(" "),F,A.handler)}}return N}};A.findElement=function(C){if(!B(C).attr("hkId")){if(B.browser.opera||B.browser.safari){while(!B(C).attr("hkId")&&C.parentNode){C=C.parentNode}}}return C};A.handler=function(E){var O=A.findElement(E.currentTarget),I=B(O),D=I.attr("hkId");if(D){D=D.split(";");var G=E.which,Q=E.type,P=A.specialKeys[G],N=!P&&String.fromCharCode(G).toLowerCase(),H=E.shiftKey,C=E.ctrlKey,M=E.altKey||E.originalEvent.altKey,F=null;for(var R=0;R<D.length;R++){if(A.triggersMap[D[R]][Q]){F=A.triggersMap[D[R]][Q];break}}if(F){var J;if(!H&&!C&&!M){J=F[P]||(N&&F[N])}else{var L="";if(M){L+="alt+"}if(C){L+="ctrl+"}if(H){L+="shift+"}J=F[L+P];if(!J){if(N){J=F[L+N]||F[L+A.shiftNums[N]]||(L==="shift+"&&F[A.shiftNums[N]])}}}if(J){var S=false;for(var R=0;R<J.length;R++){if(J[R].disableInInput){var K=B(E.target);if(I.is("input")||I.is("textarea")||K.is("input")||K.is("textarea")){return true}}S=S||J[R].cb.apply(this,[E])}return S}}}};window.hotkeys=A;return B})(jQuery);
\ No newline at end of file diff --git a/extras/appengine/static/img_loading.gif b/extras/appengine/static/img_loading.gif Binary files differnew file mode 100644 index 0000000..6465823 --- /dev/null +++ b/extras/appengine/static/img_loading.gif diff --git a/extras/appengine/static/jquery.textarearesizer.compressed.js b/extras/appengine/static/jquery.textarearesizer.compressed.js new file mode 100644 index 0000000..5464ae6 --- /dev/null +++ b/extras/appengine/static/jquery.textarearesizer.compressed.js @@ -0,0 +1 @@ +(function($){var textarea,staticOffset;var iLastMousePos=0;var iMin=32;var grip;$.fn.TextAreaResizer=function(){return this.each(function(){textarea=$(this).addClass('processed'),staticOffset=null;$(this).wrap('<div class="resizable-textarea"><span></span></div>').parent().append($('<div class="grippie"></div>').bind("mousedown",{el:this},startDrag));var grippie=$('div.grippie',$(this).parent())[0];grippie.style.marginRight=(grippie.offsetWidth-$(this)[0].offsetWidth)+'px'})};function startDrag(e){textarea=$(e.data.el);textarea.blur();iLastMousePos=mousePosition(e).y;staticOffset=textarea.height()-iLastMousePos;textarea.css('opacity',0.25);$(document).mousemove(performDrag).mouseup(endDrag);return false}function performDrag(e){var iThisMousePos=mousePosition(e).y;var iMousePos=staticOffset+iThisMousePos;if(iLastMousePos>=(iThisMousePos)){iMousePos-=5}iLastMousePos=iThisMousePos;iMousePos=Math.max(iMin,iMousePos);textarea.height(iMousePos+'px');if(iMousePos<iMin){endDrag(e)}return false}function endDrag(e){$(document).unbind('mousemove',performDrag).unbind('mouseup',endDrag);textarea.css('opacity',1);textarea.focus();textarea=null;staticOffset=null;iLastMousePos=0}function mousePosition(e){return{x:e.clientX+document.documentElement.scrollLeft,y:e.clientY+document.documentElement.scrollTop}}})(jQuery);
\ No newline at end of file diff --git a/extras/appengine/static/loading.gif b/extras/appengine/static/loading.gif Binary files differnew file mode 100644 index 0000000..a879bed --- /dev/null +++ b/extras/appengine/static/loading.gif diff --git a/extras/appengine/static/lynx_screenshot.png b/extras/appengine/static/lynx_screenshot.png Binary files differnew file mode 100644 index 0000000..d1592ac --- /dev/null +++ b/extras/appengine/static/lynx_screenshot.png diff --git a/extras/appengine/static/pygments.css b/extras/appengine/static/pygments.css new file mode 100644 index 0000000..da02807 --- /dev/null +++ b/extras/appengine/static/pygments.css @@ -0,0 +1,59 @@ +.c { color: #408080; font-style: italic } /* Comment */ +.err { border: 1px solid #FF0000 } /* Error */ +.k { color: #008000; font-weight: bold } /* Keyword */ +.o { color: #666666 } /* Operator */ +.cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.cp { color: #BC7A00 } /* Comment.Preproc */ +.c1 { color: #408080; font-style: italic } /* Comment.Single */ +.cs { color: #408080; font-style: italic } /* Comment.Special */ +.gd { color: #A00000 } /* Generic.Deleted */ +.ge { font-style: italic } /* Generic.Emph */ +.gr { color: #FF0000 } /* Generic.Error */ +.gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.gi { color: #00A000 } /* Generic.Inserted */ +.go { color: #808080 } /* Generic.Output */ +.gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.gs { font-weight: bold } /* Generic.Strong */ +.gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.gt { color: #0040D0 } /* Generic.Traceback */ +.kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.kp { color: #008000 } /* Keyword.Pseudo */ +.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.kt { color: #B00040 } /* Keyword.Type */ +.m { color: #666666 } /* Literal.Number */ +.s { color: #BA2121 } /* Literal.String */ +.na { color: #7D9029 } /* Name.Attribute */ +.nb { color: #008000 } /* Name.Builtin */ +.nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.no { color: #880000 } /* Name.Constant */ +.nd { color: #AA22FF } /* Name.Decorator */ +.ni { color: #999999; font-weight: bold } /* Name.Entity */ +.ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.nf { color: #0000FF } /* Name.Function */ +.nl { color: #A0A000 } /* Name.Label */ +.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.nt { color: #008000; font-weight: bold } /* Name.Tag */ +.nv { color: #19177C } /* Name.Variable */ +.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.w { color: #bbbbbb } /* Text.Whitespace */ +.mf { color: #666666 } /* Literal.Number.Float */ +.mh { color: #666666 } /* Literal.Number.Hex */ +.mi { color: #666666 } /* Literal.Number.Integer */ +.mo { color: #666666 } /* Literal.Number.Oct */ +.sb { color: #BA2121 } /* Literal.String.Backtick */ +.sc { color: #BA2121 } /* Literal.String.Char */ +.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.s2 { color: #BA2121 } /* Literal.String.Double */ +.se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.sh { color: #BA2121 } /* Literal.String.Heredoc */ +.si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.sx { color: #008000 } /* Literal.String.Other */ +.sr { color: #BB6688 } /* Literal.String.Regex */ +.s1 { color: #BA2121 } /* Literal.String.Single */ +.ss { color: #19177C } /* Literal.String.Symbol */ +.bp { color: #008000 } /* Name.Builtin.Pseudo */ +.vc { color: #19177C } /* Name.Variable.Class */ +.vg { color: #19177C } /* Name.Variable.Global */ +.vi { color: #19177C } /* Name.Variable.Instance */ +.il { color: #666666 } /* Literal.Number.Integer.Long */ diff --git a/extras/appengine/static/resize-grip.png b/extras/appengine/static/resize-grip.png Binary files differnew file mode 100644 index 0000000..cae2a4e --- /dev/null +++ b/extras/appengine/static/resize-grip.png diff --git a/extras/appengine/static/robots.txt b/extras/appengine/static/robots.txt new file mode 100644 index 0000000..c033917 --- /dev/null +++ b/extras/appengine/static/robots.txt @@ -0,0 +1,8 @@ +# Directions for web crawlers. +# See http://www.robotstxt.org/wc/norobots.html. + +User-agent: HTTrack +User-agent: puf +User-agent: MSIECrawler +User-agent: Nutch +Disallow: / diff --git a/extras/appengine/static/rpc_relay.html b/extras/appengine/static/rpc_relay.html new file mode 100644 index 0000000..c602043 --- /dev/null +++ b/extras/appengine/static/rpc_relay.html @@ -0,0 +1 @@ +<html><head><script type="text/javascript" src="http://www.google.com/friendconnect/script/rpc_relay.js"></script></head></html>
\ No newline at end of file diff --git a/extras/appengine/static/script.js b/extras/appengine/static/script.js new file mode 100644 index 0000000..71bbabb --- /dev/null +++ b/extras/appengine/static/script.js @@ -0,0 +1,103 @@ +var initialized = false; + +function update_output() { + data = {} + data.data = $('#id_data').val(); + data.format = 'json'; + if ( $('#id_remove_comments').attr('checked') ) { + data.remove_comments = 1 + } + if ( $('#id_highlight').attr('checked') ) { data.highlight = 1 } + data.keyword_case = $('#id_keyword_case').val(); + data.identifier_case = $('#id_identifier_case').val(); + data.n_indents = $('#id_n_indents').val(); + data.right_margin = $('#id_right_margin').val(); + data.output_format = $('#id_output_format').val(); + form = document.getElementById('form_options'); + $(form.elements).attr('disabled', 'disabled'); + $('#response').addClass('loading'); + $.post('/', data, + function(data) { + if ( data.output ) { + $('#response').html(data.output); + proc_time = 'Processed in '+data.proc_time+' seconds.'; + } else { + $('#response').html('An error occured: '+data.errors); + proc_time = ''; + } + $('#proc_time').html(proc_time); + $(form.elements).each( function(idx) { + obj = $(this); + if ( ! obj.is('.keep-disabled') ) { + obj.removeAttr('disabled'); + } + }); + $('#response').removeClass('loading'); + }, 'json'); + return false; +} + +function toggle_fieldset(event) { + id = $(this).attr('id'); + $('#'+id+'_content').slideDown(); + $('legend').each(function(idx) { + obj = $('#'+this.id+'_content'); + if ( this.id != id ) { + obj.slideUp(); + } + }); +} + + +function textarea_grab_focus(evt) { + evt.stopPropagation(); + evt.preventDefault(); + $('#id_data').focus(); + return false; +} + + +function show_help() { + $('#help').toggle(); + return false; +} + + +function hide_help() { + $('#help').hide(); + return false; +} + +function load_example() { + fname = $('#sel_example').val(); + data = {fname: fname}; + $.post('/load_example', data, + function(data) { + $('#id_data').val(data.answer); + }, 'json'); +} + + +function init() { + if (initialized) { return } + //$('legend').bind('click', toggle_fieldset); + // $('legend').each(function(idx) { + // obj = $('#'+this.id+'_content'); + // if ( this.id != 'general' ) { + // obj.hide(); + // } + // }); + $(document).bind('keydown', {combi:'Ctrl+f'}, + update_output); + $('#btn_format').val('Format SQL [Ctrl+F]'); + $(document).bind('keydown', {combi: 'h', disableInInput: true}, + show_help); + $(document).bind('keydown', 'Esc', hide_help); + $(document).bind('keydown', {combi: 't', disableInInput: true}, + textarea_grab_focus); + initialized = true; + /* jQuery textarea resizer plugin usage */ + $(document).ready(function() { + $('textarea.resizable:not(.processed)').TextAreaResizer(); + }); +}
\ No newline at end of file diff --git a/extras/appengine/static/sqlformat_client_example.py b/extras/appengine/static/sqlformat_client_example.py new file mode 100644 index 0000000..3b3bf0f --- /dev/null +++ b/extras/appengine/static/sqlformat_client_example.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +import urllib +import urllib2 + +payload = ( + ('data', 'select * from foo join bar on val1 = val2 where id = 123;'), + ('format', 'text'), + ('keyword_case', 'upper'), + ('reindent', True), + ('n_indents', 2), + ) + +response = urllib2.urlopen('http://sqlformat.appspot.com/format/', + urllib.urlencode(payload)) +print response.read() + diff --git a/extras/appengine/static/styles.css b/extras/appengine/static/styles.css new file mode 100644 index 0000000..41a540a --- /dev/null +++ b/extras/appengine/static/styles.css @@ -0,0 +1,245 @@ +body { + color: #000000; + background: #eeeeec; + font-family: "Free Sans", Arial, Verdana, sans; + font-size: 10pt; + margin: 0; + padding: 0; +} + +#header { + background: url(/static/bgtop.png) top left repeat-x; + border-bottom: 3px solid #2e3436; +} + +#header-inner, #main-inner, #footer-inner { + width: 70em; + margin-left: auto; + margin-right: auto; +} + + +#header-inner h1 { + margin: 0; + padding: 0; + margin-bottom: .2em; + font-weight: normal; + float: left; + font-size: 2em; + letter-spacing: .07em; +} + +#header-inner .q { + color: #f57900; + padding-right: 3px; +} + +#header-inner .q2 { + font-family: Georgia, "Times New Roman", serif; +} + +#header-inner h1 a { + text-decoration: none; + color: #eeeeec; +} + +#header-inner #slogan { + float: left; + color: #babdb6; + font-size: 1.4em; + margin-left: 1em; + letter-spacing: .18em; + padding-top: .2em; +} + + +#topmenu { + color: #729fcf; + clear: left; + padding-top: .5em; + padding-bottom: .5em; + font-size: 1.1em; +} + +#topmenu a { + color: #eeeeec; + text-decoration: none; +} + +#topmenu a:hover { + color: #ce5c00; +} + + +#main { + padding: 10px; + background: white; + line-height: 1.5em; + text-align: justify; +} + +#main form ul { + margin: 0; + padding: 0; + list-style-type: none; +} + +#main p, #main ol, #main .example, #main dl { + font-size: 12pt; + margin-left: 2em; +} + +#main dt { + font-weight: bold; +} + +#main li { + margin-bottom: .7em; +} + +#main a { + color: #f57900; +} + +#main h1, h2, h3, h4 { + color: #204a87; + font-weight: normal; + letter-spacing: .05em; +} + +#main pre, #main code.pre { + font-size: 10pt; + line-height: 1em; + padding: 4px; + background-color: #eeeeec; + border: 1px solid #babdb6; +} + +#input { + width: 50em; + float: right; + margin-left: 2em; +} + +#options { + width: 18em; + float: left; + color: #2e3436; + margin-top: .75em; + text-align: left; +} + +#options fieldset { + border: 1px solid #dddddd; + margin-bottom: .6em; + background: url(/static/bgfieldset.png) bottom left repeat-x; + -moz-border-radius: 3px; +} + + +#options input, select { + border: 1px solid #dddddd; +} + +#options .help { + font-size: .9em; + color: #888a85; + margin-bottom: .6em; +} + + +#footer { + background: url(/static/bgfooter.png) top left repeat-x; + padding: 10px; + min-height: 80px; + border-top: 4px solid #babdb6; +} + +#footer-inner { + width: 70em; + margin-left: auto; + margin-right: auto; + color: #888a85; +} + +#footer-inner a { + color: #888a85; +} + +#footer-inner a:hover { + color: #555753; +} + +.clearfix { + clear: both; +} + +.skip { + display: none; +} + +textarea { + border: 1px solid #cccccc; + border-bottom: none; + padding: 4px; + font-size: 12pt; + width: 100%; +} + +textarea:focus { + background-color: #eeeeec; +} + +div.grippie { + background: url(/static/resize-grip.png) bottom right no-repeat #eeeeec; + border-color: #cccccc; + border-style: solid; + border-width: 0pt 1px 1px; + cursor: se-resize; + height: 14px; + overflow: hidden; +} + +#help { + display: none; + position: fixed; + right: 10%; + left: 10%; + top: 0; + opacity: 0.85; + -moz-opacity: 0.85; + -khtml-opacity: 0.85; + filter: alpha(opacity=85); + -moz-border-radius: 0px 0px 10px 10px; + + background: #2e3436; + color: white; + font-weight: bold; + + padding: 1em; + z-index: 1; + overflow-x: hidden; + overflow-y: auto; +} + +#help .shortcut { + color: #f57900; + font-weight: bold; + width: 20px; + display: inline; +} + +.loading { + background: url(/static/loading.gif) top left no-repeat; +} + +.dev { + color: #cc0000; + font-size: .9em; + letter-spacing: 1; +} + +#proc_time { + color: #888a85; + font-size: .85em; +}
\ No newline at end of file diff --git a/extras/appengine/templates/about.html b/extras/appengine/templates/about.html new file mode 100644 index 0000000..3222b97 --- /dev/null +++ b/extras/appengine/templates/about.html @@ -0,0 +1,44 @@ +{% extends "master.html" %} + +{% block main %} +<h1>About this Application</h1> +<p> + This application is a online SQL formatting tool. +</p> +<p> + Basically it's a playground for a Python module to parse and format + SQL statements. Sometimes it's easier to combine the available + options and to see the resulting output using a web front-end than + on the command line ;-) +</p> +<p> + To get started, enter a SQL statement in the text box on the top, + choose some options and click on "Format SQL" (Ctrl+F) + to see the result. +</p> +<p> + <em>Note:</em> The SQL formatter and parser is in an early stage + of development. If you're looking for a mature tool, try one of + <a href="http://www.google.com/search?q=online+sql+formatter">these</a>. +</p> +<h2>Using it from the Command Line</h2> +<p> + There are three ways to use this SQL formatter from the command line: +</p> +<ol> + <li>Grab the <a href="/source/">sources</a> and use the module in your + Python scripts.</li> + <li> + Write a little script in your favorite language that sends a POST + request to this application.<br/> + Read the <a href="/api/">API Documentation</a> for more information. + </li> + <li>Use + <a href="/static/lynx_screenshot.png" + alt="Lynx Screenshot" target="_blank" + title="Screenshot: sqlformat.appspot.com on Lynx">Lynx + </a> + </li> +</ol> + +{% endblock %} diff --git a/extras/appengine/templates/api.html b/extras/appengine/templates/api.html new file mode 100644 index 0000000..b9aaae7 --- /dev/null +++ b/extras/appengine/templates/api.html @@ -0,0 +1,50 @@ +{% extends "master.html" %} + +{% block main %} +<h1>API Documentation</h1> + +<p> + Using the API for this application is pretty simple. Just send a + <code>POST</code> request to +</p> +<p> + <code>http://sqlformat.appspot.com/format/</code> +</p> + +<h2>Options</h2> +<p> + The <code>POST</code> request accepts various options to control + formatting. Only the <em>data</em> option is required. All others + are optional. +</p> + +<dl> + <dt>data</dt> + <dd>The SQL statement to format.</dd> + <dt>remove_comments</dt> + <dd>Set to 1 to remove comments.</dd> + <dt>keyword_case</dt> + <dd>How to convert keywords. Allowed values are 'lower', 'upper', + 'capitalize'.</dd> + <dt>identifier_case</dt> + <dd>How to convert identifiers. Allowed values are 'lower', 'upper', + 'capitalize'.</dd> + <dt>n_indents</dt> + <dd>An integer indicating the indendation depth.</dd> + <dt>right_margin</dt> + <dd>An integer indicating the maximum line length.</dd> + <dt>output_format</dt> + <dd>Transfer the statement into another programming language. + Allowed values are 'python', 'php'</dd> +</dl> + +<h2>Example</h2> +<p> + Here's a example in Python: +</p> +{% include "python-client-example.html" %} +<p> + <a href="/static/sqlformat_client_example.py">Download sqlformat_example_client.py</a> +</p> + +{% endblock %} diff --git a/extras/appengine/templates/index.html b/extras/appengine/templates/index.html new file mode 100644 index 0000000..57e7ed3 --- /dev/null +++ b/extras/appengine/templates/index.html @@ -0,0 +1,107 @@ +{% extends "master.html" %} + +{% block main %} + +{% if output %} + <a href="#output" class="skip">Jump to formatted query</a> +{% endif %} + +<form method="post" action="" id="form_options" enctype="multipart/form-data"> + <div id="input"> + {% if form.non_field_errors %}{{form.non_field_errors}}{% endif %} + <div> + <strong>Type your SQL here:</strong><br /> + {{form.data}} + {% if form.data.errors %}{{form.data.errors}}{% endif %} + </div> + <div style="margin-top: .5em;"> + <strong>...or upload a file:</strong> + {{form.datafile}} + </div> + <div id="examples" style="margin-top: .5em;"></div> + <div id="actions" style="margin-top: .5em;"> + <input type="submit" value="Format SQL" id="btn_format" /> + </div> + {% if output %}<a name="output"></a> + <div id="response">{{output|safe}}</div> + {% else %} + <div id="response"></div> + {% endif %} + <div id="proc_time"> + {% if proc_time %}Processed in {{proc_time}} seconds.{% endif %} + </div> + <div style="margin-top: 1em;"> + <script type="text/javascript"> + <!-- + google_ad_client = "pub-8870624642249726"; + /* 468x60, Erstellt 07.03.09 */ + google_ad_slot = "9840041509"; + google_ad_width = 468; + google_ad_height = 60; + //--> + </script> + <script type="text/javascript" + src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> + </script> + </div> + + </div> + <div id="options"> + <h1 class="skip">Options</h1> + <fieldset><legend id="general"><strong>General Options</strong></legend> + <div id="general_content" class="content"> + {{form.remove_comments}} + <label for="id_remove_comments">{{form.remove_comments.label}}</label> + <br /> + {{form.highlight}} + <label for="id_highlight">{{form.highlight.label}}</label> + {% if form.highlight.errors %} + <ul class="errors">{{form.highlight.errors</ul> + {% endif %} + </div> + </fieldset> + <fieldset><legend id="kwcase"> + <strong>Keywords & Identifiers</strong></legend> + <div> + {{form.keyword_case.label}}: {{form.keyword_case}} + </div> + <div> + {{form.identifier_case.label}}: {{form.identifier_case}} + </div> + </fieldset> + <fieldset><legend id="indent"><strong>Indentation & Margins</strong> + </legend> + <div id="indent_content" class="content"> + <label for="id_n_indents">Indentation: </label> + {{form.n_indents}} {{form.n_indents.label}} + <div class="help">Empty field means leave indentation unchanged.</div> +<!-- + <label for="id_right_margin">Right margin: </label> + {{form.right_margin}} {{form.right_margin.label}} + <div class="help">Empty field means don't mind right margin.</div> +--> + </div> + </fieldset> + <fieldset><legend id="output"><strong>Output Format</strong></legend> + <label for="id_output_format">Language: </label> + {{form.output_format}} + </fieldset> + + <div class="dev">This software is in development.</div> + + </div> + <div class="clearfix"></div> +</form> + +<script language="javascript"> +html = '<strong>...or select an example:</strong> '; +html = html + '<select onchange="load_example();" id="sel_example">'; +html = html + '<option value="">-- Choose Example --</option>'; +{% for ex in examples %} + html = html + '<option value="{{ex}}">{{ex}}</option>'; +{% endfor %} +html = html + '</select>'; +$('#examples').html(html); +</script> +{% endblock %} + diff --git a/extras/appengine/templates/master.html b/extras/appengine/templates/master.html new file mode 100644 index 0000000..4294cf4 --- /dev/null +++ b/extras/appengine/templates/master.html @@ -0,0 +1,103 @@ +<html> + <head> + <title>SQLFormat - Online SQL Formatting Service</title> + <meta name="keywords" content="SQL, format, parse, python, beautify" /> + <link rel="stylesheet" href="/static/pygments.css" /> + <link rel="stylesheet" href="/static/styles.css" /> + <script src="http://www.google.com/jsapi"></script> + <script> + google.load("jquery", "1.2.6"); + </script> + <script src="/static/hotkeys.js"></script> + <script type="text/javascript" + src="/static/jquery.textarearesizer.compressed.js"></script> + <script src="/static/script.js"></script> + </head> + <body> + + <div id="help"> + <p>Keyboard Shortcuts</p> + <p> + <span class="shortcut">H</span> - Show / hide this help window<br/> + <span class="shortcut">Ctrl+F</span> - Format SQL and display result<br/> + <span class="shortcut">O</span> - Show / hide options<br/> + <span class="shortcut">T</span> - Set focus on SQL input<br/> + </p> + </div> + + <div id="header"> + <div id="header-inner"> + {% if user %}<img src="{{userimg}}" border="0" align="right" style="padding-top:.4em;"/>{% endif %} + <h1> + <a href="/"> + <span class="q">S<span class="q2">Q</span>L</span>Format + </a> + </h1> + <div id="slogan">Online SQL formatting service</div> + <div id="topmenu"> + <a href="/">Home</a> + | + <a href="/about/">About</a> + | + <a href="/source/">Source Code</a> + | + <a href="/api/">API</a> +<!-- + | + {% if user %} + <a href="{{logout_url}}">Sign out</a> + {% else %} + <a href="{{login_url}}">Sign in</a> + <span style="color: #babdb6;">with your Google account + to save preferred settings.</span> + {% endif %} +--> + </div> + </div> + </div> + + <div id="main"> + <div id="main-inner"> + {% block main %}MAIN CONTENT GOES HERE{% endblock %} + </div> + </div> + + <div id="footer"> + <div id="footer-inner"> + <div style="float: left; font-size: .85em;"> + <div>© 2009 Andi Albrecht + <code><albrecht dot andi gmail></code> + </div> + <div> + <a href="/">Home</a> + | + <a href="/about/">About</a> + | + <a href="/source/">Source Code</a> + | + <a href="/api/">API</a> + | + <a href="http://andialbrecht.wordpress.com/">Blog</a> + </div> + </div> + <div style="float: right;"> + <img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif" + alt="Powered by Google App Engine" /> + </div> + <div class="clearfix"></div> + </div> + </div> + + <script type="text/javascript"> + var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); + document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); + </script> + <script type="text/javascript"> + try { + var pageTracker = _gat._getTracker("UA-3535525-2"); + pageTracker._trackPageview(); + } catch(err) {}</script> + <script>init();</script> + + </body> +</html> diff --git a/extras/appengine/templates/python-client-example.html b/extras/appengine/templates/python-client-example.html new file mode 100644 index 0000000..68bf820 --- /dev/null +++ b/extras/appengine/templates/python-client-example.html @@ -0,0 +1,17 @@ +<div class="highlight example"><pre><span class="c">#!/usr/bin/env python</span> + +<span class="k">import</span> <span class="nn">urllib</span> +<span class="k">import</span> <span class="nn">urllib2</span> + +<span class="n">payload</span> <span class="o">=</span> <span class="p">(</span> + <span class="p">(</span><span class="s">'data'</span><span class="p">,</span> <span class="s">'select * from foo join bar on val1 = val2 where id = 123;'</span><span class="p">),</span> + <span class="p">(</span><span class="s">'format'</span><span class="p">,</span> <span class="s">'text'</span><span class="p">),</span> + <span class="p">(</span><span class="s">'keyword_case'</span><span class="p">,</span> <span class="s">'upper'</span><span class="p">),</span> + <span class="p">(</span><span class="s">'reindent'</span><span class="p">,</span> <span class="bp">True</span><span class="p">),</span> + <span class="p">(</span><span class="s">'n_indents'</span><span class="p">,</span> <span class="mf">2</span><span class="p">),</span> + <span class="p">)</span> + +<span class="n">response</span> <span class="o">=</span> <span class="n">urllib2</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="s">'http://sqlformat.appspot.com/format/'</span><span class="p">,</span> + <span class="n">urllib</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">payload</span><span class="p">))</span> +<span class="k">print</span> <span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">()</span> +</pre></div> diff --git a/extras/appengine/templates/source.html b/extras/appengine/templates/source.html new file mode 100644 index 0000000..4988f2f --- /dev/null +++ b/extras/appengine/templates/source.html @@ -0,0 +1,56 @@ +{% extends "master.html" %} + +{% block main %} +<div id="response"> + <h1>Source Code</h1> + + <h2>Python Module</h2> + <p> + The sources for the SQL parser and formatter module are currently + hosted on Gitorious. + To clone the repository run: + <p> + <code class="pre">git clone git://github.com/andialbrecht/python-sqlparse.git</code> + </p> + <p> + <a href="http://python-sqlparse.googlecode.com">Visit the project page</a> + | + <a href="http://gitorious.org/projects/python-sqlparse/repos/mainline/trees/master">Browse the sources online</a> + </p> + <p> + Some relevant parts of the Python module contain code from the + <a href="http://pygments.org/">pygments</a> syntax highlighter. + The underlying Python module uses a non-validating SQL parser. + This approach makes it possible to parse even syntactically incorrect + SQL statements. + </p> + + <p> + Currently the parser module is used by + <a href="http://crunchyfrog.googlecode.com/">CrunchyFrog</a> - a + database front-end for Gnome. + </p> + + <p> + The <code>sqlparse</code> module is released under the terms of the + <a href="http://www.opensource.org/licenses/bsd-license.php">New BSD License</a>. + </p> + + <h2>App Engine Application</h2> + <p> + The source code for this App Engine application is available in the + <code>examples</code> directory of the Python module + (but it's really nothing special ;-). + </p> + + <h2>Contributing</h2> + <p> + Please file bug reports and feature requests on the project site at + <a href="http://code.google.com/p/python-sqlparse/issues/entry">http://code.google.com/p/python-sqlparse/issues/entry</a> + or if you have code to contribute upload it to + <a href="http://codereview.appspot.com">http://codereview.appspot.com</a> + and add albrecht.andi@googlemail.com as reviewer. + </p> + +</div> +{% endblock %} diff --git a/extras/sqlformat.png b/extras/sqlformat.png Binary files differnew file mode 100644 index 0000000..4189bc4 --- /dev/null +++ b/extras/sqlformat.png diff --git a/extras/sqlformat.svg b/extras/sqlformat.svg new file mode 100644 index 0000000..59e1183 --- /dev/null +++ b/extras/sqlformat.svg @@ -0,0 +1,115 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="120" + height="60" + id="svg2" + sodipodi:version="0.32" + inkscape:version="0.46" + version="1.0" + sodipodi:docname="sqlformat.svg" + inkscape:output_extension="org.inkscape.output.svg.inkscape"> + <defs + id="defs4"> + <linearGradient + id="linearGradient3163"> + <stop + id="stop3165" + offset="0" + style="stop-color:#888a85;stop-opacity:1;" /> + <stop + id="stop3167" + offset="1" + style="stop-color:#565854;stop-opacity:1;" /> + </linearGradient> + <inkscape:perspective + sodipodi:type="inkscape:persp3d" + inkscape:vp_x="0 : 526.18109 : 1" + inkscape:vp_y="0 : 1000 : 0" + inkscape:vp_z="744.09448 : 526.18109 : 1" + inkscape:persp3d-origin="372.04724 : 350.78739 : 1" + id="perspective10" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3163" + id="linearGradient3161" + x1="57.357143" + y1="1.6964277" + x2="57.464291" + y2="58.125" + gradientUnits="userSpaceOnUse" /> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="5.6" + inkscape:cx="58.539628" + inkscape:cy="24.488765" + inkscape:document-units="px" + inkscape:current-layer="layer1" + showgrid="false" + inkscape:window-width="1436" + inkscape:window-height="720" + inkscape:window-x="209" + inkscape:window-y="171" /> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Ebene 1" + inkscape:groupmode="layer" + id="layer1"> + <rect + style="fill:url(#linearGradient3161);fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.5103448;fill-opacity:1" + id="rect2383" + width="118.21429" + height="58.035713" + x="1.0714285" + y="1.0714284" + ry="3.5714285" /> + <text + xml:space="preserve" + style="font-size:41.68727112px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#f57900;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans" + x="3.8212376" + y="38.731842" + id="text3169" + transform="scale(1.3303696,0.7516708)"><tspan + sodipodi:role="line" + id="tspan3171" + x="3.8212376" + y="38.731842">S<tspan + style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Georgia;-inkscape-font-specification:Georgia Italic" + id="tspan3173">Q</tspan>L</tspan></text> + <text + xml:space="preserve" + style="font-size:29.59413719px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#babdb6;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:FreeSans;-inkscape-font-specification:FreeSans" + x="5.0176215" + y="60.359047" + id="text3177" + transform="scale(1.1410205,0.8764084)"><tspan + sodipodi:role="line" + id="tspan3179" + x="5.0176215" + y="60.359047">Format</tspan></text> + </g> +</svg> diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..6ed5178 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This setup script is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +from distutils.core import setup + +setup( + name='sqlparse', + version='0.1.0', + py_modules=['sqlparse'], + description='Provides simple SQL parsing', + author='Andi Albrecht', + author_email='albrecht.andi@gmail.com', + #long_description=release.long_description, + license='BSD', + url='http://python-sqlparse.googlecode.com/', + classifiers = [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Database', + 'Topic :: Software Development' + ], + scripts=['bin/sqlformat'], +) diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py new file mode 100644 index 0000000..01b3bd8 --- /dev/null +++ b/sqlparse/__init__.py @@ -0,0 +1,65 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""Parse SQL statements.""" + +__version__ = '0.1.0' + +import logging +import os + + +if 'SQLPARSE_DEBUG' in os.environ: + logging.basicConfig(level=logging.DEBUG) + + +class SQLParseError(Exception): + """Base class for exceptions in this module.""" + + +# Setup namespace +from sqlparse import engine +from sqlparse import filters +from sqlparse import formatter + + +def parse(sql): + """Parse sql and return a list of statements. + + *sql* is a single string containting one or more SQL statements. + + The returned :class:`~sqlparse.parser.Statement` are fully analyzed. + + Returns a list of :class:`~sqlparse.parser.Statement` instances. + """ + stack = engine.FilterStack() + stack.full_analyze() + return tuple(stack.run(sql)) + + +def format(sql, **options): + """Format *sql* according to *options*. + + Returns a list of :class:`~sqlparse.parse.Statement` instances like + :meth:`parse`, but the statements are formatted according to *options*. + + Available options are documented in the :mod:`~sqlparse.format` module. + """ + stack = engine.FilterStack() + options = formatter.validate_options(options) + stack = formatter.build_filter_stack(stack, options) + stack.postprocess.append(filters.SerializerUnicode()) + return ''.join(stack.run(sql)) + + +def split(sql): + """Split *sql* into separate statements. + + Returns a list of strings. + """ + stack = engine.FilterStack() + stack.split_statements = True + return [unicode(stmt) for stmt in stack.run(sql)] + diff --git a/sqlparse/dialects.py b/sqlparse/dialects.py new file mode 100644 index 0000000..cabe503 --- /dev/null +++ b/sqlparse/dialects.py @@ -0,0 +1,88 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""This module contains classes that represent SQL dialects.""" + +from tokens import * + + +class Dialect(object): + """Base class for SQL dialect implementations.""" + + def handle_token(self, tokentype, text): + """Handle a token. + + Arguments: + tokentype: A token type. + text: Text representation of the token. + + Returns: + A tuple of three items: tokentype, text, splitlevel. + splitlevel is either -1, 0 or 1 and describes an identation level. + """ + raise NotImplementedError + + def reset(self): + """Reset Dialect state.""" + pass + + +class DefaultDialect(Dialect): + + def __init__(self): + self._in_declare = False + self._stmt_type = None + + def get_statement_type(self): + return self._stmt_type + + def set_statement_type(self, type_): + self._stmt_type = type_ + + def handle_token(self, tokentype, text): + if not tokentype == Keyword: + return tokentype, text, 0 + unified = text.upper() + if unified == 'DECLARE': + self._in_declare = True + return tokentype, text, 1 + if unified == 'BEGIN': + if self._in_declare: + return tokentype, text, 0 + return tokentype, text, 0 + if unified == 'END': + return tokentype, text, -1 + # TODO: Use a constant here + if unified in ('IF', 'FOR') and self._stmt_type == 6: + return tokentype, text, 1 + return tokentype, text, 0 + + def reset(self): + self._in_declare = False + + +class PSQLDialect(DefaultDialect): + + def __init__(self): + super(PSQLDialect, self).__init__() + self._in_dbldollar = False + + def handle_token(self, tokentype, text): + if (tokentype == Name.Builtin + and text.startswith('$') and text.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return tokentype, text, -1 + else: + self._in_dbldollar = True + return tokentype, text, 1 + elif self._in_dbldollar: + return tokentype, text, 0 + else: + return super(PSQLDialect, self).handle_token(tokentype, text) + + def reset(self): + self._dollar_started = False + self._in_dbldollar = False diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py new file mode 100644 index 0000000..5cac528 --- /dev/null +++ b/sqlparse/engine/__init__.py @@ -0,0 +1,81 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""filter""" + +import logging +import re + +from sqlparse import lexer, SQLParseError +from sqlparse.engine import grouping +from sqlparse.engine.filter import StatementFilter + +# XXX remove this when cleanup is complete +Filter = object + + +class FilterStack(object): + + def __init__(self): + self.preprocess = [] + self.stmtprocess = [] + self.postprocess = [] + self.split_statements = False + self._grouping = False + + def _flatten(self, stream): + for token in stream: + if token.is_group(): + for t in self._flatten(token.tokens): + yield t + else: + yield token + + def enable_grouping(self): + self._grouping = True + + def full_analyze(self): + self.enable_grouping() + + def run(self, sql): + stream = lexer.tokenize(sql) + # Process token stream + if self.preprocess: + for filter_ in self.preprocess: + stream = filter_.process(self, stream) + + if (self.stmtprocess or self.postprocess or self.split_statements + or self._grouping): + splitter = StatementFilter() + stream = splitter.process(self, stream) + + if self._grouping: + def _group(stream): + for stmt in stream: + grouping.group(stmt) + yield stmt + stream = _group(stream) + + if self.stmtprocess: + def _run(stream): + ret = [] + for stmt in stream: + for filter_ in self.stmtprocess: + filter_.process(self, stmt) + ret.append(stmt) + return ret + stream = _run(stream) + + if self.postprocess: + def _run(stream): + for stmt in stream: + stmt.tokens = list(self._flatten(stmt.tokens)) + for filter_ in self.postprocess: + stmt = filter_.process(self, stmt) + yield stmt + stream = _run(stream) + + return stream + diff --git a/sqlparse/engine/_grouping.py b/sqlparse/engine/_grouping.py new file mode 100644 index 0000000..512c590 --- /dev/null +++ b/sqlparse/engine/_grouping.py @@ -0,0 +1,499 @@ +# -*- coding: utf-8 -*- + +import re + +from sqlparse.engine.filter import TokenFilter +from sqlparse import tokens as T + +class _Base(object): + + __slots__ = ('to_unicode', 'to_str', '_get_repr_name') + + def __unicode__(self): + return 'Unkown _Base object' + + def __str__(self): + return unicode(self).encode('latin-1') + + def __repr__(self): + raw = unicode(self) + if len(raw) > 7: + short = raw[:6]+u'...' + else: + short = raw + short = re.sub('\s+', ' ', short) + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def _get_repr_name(self): + return self.__class__.__name__ + + def to_unicode(self): + return unicode(self) + + def to_str(self): + return str(self) + + +class Token(_Base): + + __slots__ = ('value', 'ttype') + + def __init__(self, ttype, value): + self.value = value + self.ttype = ttype + + def __unicode__(self): + return self.value + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def match(self, ttype, values): + if self.ttype is not ttype: + return False + if isinstance(values, basestring): + values = [values] + if self.ttype is T.Keyword: + return self.value.upper() in [v.upper() for v in values] + else: + return self.value in values + + def is_group(self): + return False + + def is_whitespace(self): + return self.ttype and self.ttype is T.Whitespace + + +class _Group(Token): + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + super(_Group, self).__init__(None, None) + if tokens is None: + tokens = [] + self._tokens = tokens + + def _set_tokens(self, tokens): + self._tokens = tokens + def _get_tokens(self): + if type(self._tokens) is not types.TupleType: + self._tokens = tuple(self._tokens) + return self._tokens + tokens = property(fget=_get_tokens, fset=_set_tokens) + + def _get_repr_name(self): + return self.__class__.__name__ + + def _pprint_tree(self, depth=0): + """Pretty-print the object tree.""" + indent = ' '*(depth*2) + for token in self.tokens: + print '%s%r' % (indent, token) + if token.is_group(): + token._pprint_tree(depth+1) + + def __unicode__(self): + return u''.join(unicode(t) for t in self.tokens) + + @property + def subgroups(self): + #return [x for x in self.tokens if isinstance(x, _Group)] + for item in self.tokens: + if item.is_group(): + yield item + + def is_group(self): + return True + + +class Statement(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Parenthesis(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Where(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class CommentMulti(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Identifier(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class TypeCast(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + @property + def casted_object(self): + return self.tokens[0] + + @property + def casted_type(self): + return self.tokens[-1] + + +class Alias(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + @property + def aliased_object(self): + return self.tokens[0] + + @property + def alias(self): + return self.tokens[-1] + + + + +# - Filter + +class StatementFilter(TokenFilter): + + def __init__(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _reset(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _change_splitlevel(self, ttype, value): + # PostgreSQL + if (ttype == T.Name.Builtin + and value.startswith('$') and value.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + elif self._in_dbldollar: + return 0 + + # ANSI + if ttype is not T.Keyword: + return 0 + + unified = value.upper() + + if unified == 'DECLARE': + self._in_declare = True + return 1 + + if unified == 'BEGIN': + if self._in_declare: + return 0 + return 0 + + if unified == 'END': + return -1 + + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + + if unified in ('IF', 'FOR') and self._is_create: + return 1 + + # Default + return 0 + + def process(self, stack, stream): + splitlevel = 0 + stmt = None + consume_ws = False + stmt_tokens = [] + for ttype, value in stream: + # Before appending the token + if (consume_ws and ttype is not T.Whitespace + and ttype is not T.Comment.Single): + consume_ws = False + stmt.tokens = stmt_tokens + yield stmt + self._reset() + stmt = None + splitlevel = 0 + if stmt is None: + stmt = Statement() + stmt_tokens = [] + splitlevel += self._change_splitlevel(ttype, value) + # Append the token + stmt_tokens.append(Token(ttype, value)) + # After appending the token + if (not splitlevel and ttype is T.Punctuation + and value == ';'): + consume_ws = True + if stmt is not None: + stmt.tokens = stmt_tokens + yield stmt + + +class GroupFilter(object): + + def process(self, stream): + pass + + +class GroupParenthesis(GroupFilter): + """Group parenthesis groups.""" + + def _finish_group(self, group): + start = group[0] + end = group[-1] + tokens = list(self._process(group[1:-1])) + return [start]+tokens+[end] + + def _process(self, stream): + group = None + depth = 0 + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.match(T.Punctuation, '('): + if depth == 0: + group = [] + depth += 1 + if group is not None: + group.append(token) + if token.match(T.Punctuation, ')'): + depth -= 1 + if depth == 0: + yield Parenthesis(self._finish_group(group)) + group = None + continue + if group is None: + yield token + + def process(self, group): + if not isinstance(group, Parenthesis): + group.tokens = self._process(group.tokens) + + +class GroupWhere(GroupFilter): + + def _process(self, stream): + group = None + depth = 0 + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.match(T.Keyword, 'WHERE'): + if depth == 0: + group = [] + depth += 1 + # Process conditions here? E.g. "A =|!=|in|is|... B"... + elif (token.ttype is T.Keyword + and token.value.upper() in ('ORDER', 'GROUP', + 'LIMIT', 'UNION')): + depth -= 1 + if depth == 0: + yield Where(group) + group = None + if depth < 0: + depth = 0 + if group is not None: + group.append(token) + else: + yield token + if group is not None: + yield Where(group) + + def process(self, group): + if not isinstance(group, Where): + group.tokens = self._process(group.tokens) + + +class GroupMultiComments(GroupFilter): + """Groups Comment.Multiline and adds trailing whitespace up to first lb.""" + + def _process(self, stream): + new_tokens = [] + grp = None + consume_ws = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.ttype is T.Comment.Multiline: + if grp is None: + grp = [] + consume_ws = True + grp.append(token) + elif consume_ws and token.ttype is not T.Whitespace: + yield CommentMulti(grp) + grp = None + consume_ws = False + yield token + elif consume_ws: + lines = token.value.splitlines(True) + grp.append(Token(T.Whitespace, lines[0])) + if lines[0].endswith('\n'): + yield CommentMulti(grp) + grp = None + consume_ws = False + if lines[1:]: + yield Token(T.Whitespace, ''.join(lines[1:])) + else: + yield token + + def process(self, group): + if not isinstance(group, CommentMulti): + group.tokens = self._process(group.tokens) + + +## class GroupIdentifier(GroupFilter): + +## def _process(self, stream): +## buff = [] +## expect_dot = False +## for token in stream: +## if token.is_group(): +## token.tokens = self._process(token.tokens) +## if (token.ttype is T.String.Symbol or token.ttype is T.Name +## and not expect_dot): +## buff.append(token) +## expect_dot = True +## elif expect_dot and token.match(T.Punctuation, '.'): +## buff.append(token) +## expect_dot = False +## else: +## if expect_dot == False: +## # something's wrong, it ends with a dot... +## while buff: +## yield buff.pop(0) +## expect_dot = False +## elif buff: +## idt = Identifier() +## idt.tokens = buff +## yield idt +## buff = [] +## yield token +## if buff and expect_dot: +## idt = Identifier() +## idt.tokens = buff +## yield idt +## buff = [] +## while buff: +## yield buff.pop(0) + +## def process(self, group): +## if not isinstance(group, Identifier): +## group.tokens = self._process(group.tokens) + + +class AddTypeCastFilter(GroupFilter): + + def _process(self, stream): + buff = [] + expect_colon = False + has_colons = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if ((isinstance(token, Parenthesis) + or isinstance(token, Identifier)) + and not expect_colon): + buff.append(token) + expect_colon = True + elif expect_colon and token.match(T.Punctuation, ':'): + buff.append(token) + has_colons = True + elif (expect_colon + and (token.ttype in T.Name + or isinstance(token, Identifier)) + ): + if not has_colons: + while buff: + yield buff.pop(0) + yield token + else: + buff.append(token) + grp = TypeCast() + grp.tokens = buff + buff = [] + yield grp + expect_colons = has_colons = False + else: + while buff: + yield buff.pop(0) + yield token + while buff: + yield buff.pop(0) + + def process(self, group): + if not isinstance(group, TypeCast): + group.tokens = self._process(group.tokens) + + +class AddAliasFilter(GroupFilter): + + def _process(self, stream): + buff = [] + search_alias = False + lazy = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if search_alias and (isinstance(token, Identifier) + or token.ttype in (T.Name, + T.String.Symbol) + or (lazy and not token.is_whitespace())): + buff.append(token) + search_alias = lazy = False + grp = Alias() + grp.tokens = buff + buff = [] + yield grp + elif (isinstance(token, (Identifier, TypeCast)) + or token.ttype in (T.Name, T.String.Symbol)): + buff.append(token) + search_alias = True + elif search_alias and (token.is_whitespace() + or token.match(T.Keyword, 'as')): + buff.append(token) + if token.match(T.Keyword, 'as'): + lazy = True + else: + while buff: + yield buff.pop(0) + yield token + search_alias = False + while buff: + yield buff.pop(0) + + def process(self, group): + if not isinstance(group, Alias): + group.tokens = self._process(group.tokens) + + +GROUP_FILTER = (GroupParenthesis(), + GroupMultiComments(), + GroupWhere(), + GroupIdentifier(), + AddTypeCastFilter(), + AddAliasFilter(), + ) + +import types +def group_tokens(group): + def _materialize(g): + if type(g.tokens) is not types.TupleType: + g.tokens = tuple(g.tokens) + for sg in g.subgroups: + _materialize(sg) + for groupfilter in GROUP_FILTER: + groupfilter.process(group) +# _materialize(group) +# group.tokens = tuple(group.tokens) +# for subgroup in group.subgroups: +# group_tokens(subgroup) diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py new file mode 100644 index 0000000..146690c --- /dev/null +++ b/sqlparse/engine/filter.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +from sqlparse import tokens as T +from sqlparse.engine.grouping import Statement, Token + + +class TokenFilter(object): + + def __init__(self, **options): + self.options = options + + def process(self, stack, stream): + """Process token stream.""" + raise NotImplementedError + + +class StatementFilter(TokenFilter): + + def __init__(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _reset(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _change_splitlevel(self, ttype, value): + # PostgreSQL + if (ttype == T.Name.Builtin + and value.startswith('$') and value.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + elif self._in_dbldollar: + return 0 + + # ANSI + if ttype is not T.Keyword: + return 0 + + unified = value.upper() + + if unified == 'DECLARE': + self._in_declare = True + return 1 + + if unified == 'BEGIN': + if self._in_declare: + return 0 + return 0 + + if unified == 'END': + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + return -1 + + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + + if unified in ('IF', 'FOR') and self._is_create: + return 1 + + # Default + return 0 + + def process(self, stack, stream): + splitlevel = 0 + stmt = None + consume_ws = False + stmt_tokens = [] + for ttype, value in stream: + # Before appending the token + if (consume_ws and ttype is not T.Whitespace + and ttype is not T.Comment.Single): + consume_ws = False + stmt.tokens = stmt_tokens + yield stmt + self._reset() + stmt = None + splitlevel = 0 + if stmt is None: + stmt = Statement() + stmt_tokens = [] + splitlevel += self._change_splitlevel(ttype, value) + # Append the token + stmt_tokens.append(Token(ttype, value)) + # After appending the token + if (splitlevel <= 0 and ttype is T.Punctuation + and value == ';'): + consume_ws = True + if stmt is not None: + stmt.tokens = stmt_tokens + yield stmt diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py new file mode 100644 index 0000000..433f539 --- /dev/null +++ b/sqlparse/engine/grouping.py @@ -0,0 +1,537 @@ +# -*- coding: utf-8 -*- + +import itertools +import re +import types + +from sqlparse import tokens as T + + +class Token(object): + + __slots__ = ('value', 'ttype') + + def __init__(self, ttype, value): + self.value = value + self.ttype = ttype + + def __str__(self): + return unicode(self).encode('latin-1') + + def __repr__(self): + short = self._get_repr_value() + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def __unicode__(self): + return self.value + + def to_unicode(self): + return unicode(self) + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def _get_repr_value(self): + raw = unicode(self) + if len(raw) > 7: + short = raw[:6]+u'...' + else: + short = raw + return re.sub('\s+', ' ', short) + + def match(self, ttype, values, regex=False): + if self.ttype is not ttype: + return False + if values is None: + return self.ttype is ttype + if isinstance(values, basestring): + values = [values] + if regex: + if self.ttype is T.Keyword: + values = [re.compile(v, re.IGNORECASE) for v in values] + else: + values = [re.compile(v) for v in values] + for pattern in values: + if pattern.search(self.value): + return True + return False + else: + if self.ttype is T.Keyword: + return self.value.upper() in [v.upper() for v in values] + else: + return self.value in values + + def is_group(self): + return False + + def is_whitespace(self): + return self.ttype and self.ttype in T.Whitespace + + +class TokenList(Token): + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + if tokens is None: + tokens = [] + self.tokens = tokens + Token.__init__(self, None, None) + + def __unicode__(self): + return ''.join(unicode(x) for x in self.flatten()) + + def __str__(self): + return unicode(self).encode('latin-1') + + def _get_repr_name(self): + return self.__class__.__name__ + + def _pprint_tree(self, max_depth=None, depth=0): + """Pretty-print the object tree.""" + indent = ' '*(depth*2) + for token in self.tokens: + if token.is_group(): + pre = ' | ' + else: + pre = ' | ' + print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(), + token._get_repr_value()) + if (token.is_group() and max_depth is not None + and depth < max_depth): + token._pprint_tree(max_depth, depth+1) + + def flatten(self): + for token in self.tokens: + if isinstance(token, TokenList): + for item in token.flatten(): + yield item + else: + yield token + + def is_group(self): + return True + + def get_sublists(self): + return [x for x in self.tokens if isinstance(x, TokenList)] + + def token_first(self, ignore_whitespace=True): + for token in self.tokens: + if ignore_whitespace and token.is_whitespace(): + continue + return token + return None + + def token_next_by_instance(self, idx, clss): + if type(clss) not in (types.ListType, types.TupleType): + clss = (clss,) + if type(clss) is not types.TupleType: + clss = tuple(clss) + for token in self.tokens[idx:]: + if isinstance(token, clss): + return token + return None + + def token_next_by_type(self, idx, ttypes): + if not isinstance(ttypes, (types.TupleType, types.ListType)): + ttypes = [ttypes] + for token in self.tokens[idx:]: + if token.ttype in ttypes: + return token + return None + + def token_next_match(self, idx, ttype, value, regex=False): + if type(idx) != types.IntType: + idx = self.token_index(idx) + for token in self.tokens[idx:]: + if token.match(ttype, value, regex): + return token + return None + + def token_not_matching(self, idx, funcs): + for token in self.tokens[idx:]: + passed = False + for func in funcs: + if func(token): + passed = True + break + if not passed: + return token + return None + + def token_prev(self, idx, skip_ws=True): + while idx != 0: + idx -= 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_next(self, idx, skip_ws=True): + while idx < len(self.tokens)-1: + idx += 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_index(self, token): + """Return list index of token.""" + return self.tokens.index(token) + + def tokens_between(self, start, end, exclude_end=False): + """Return all tokens between (and including) start and end.""" + if exclude_end: + offset = 0 + else: + offset = 1 + return self.tokens[self.token_index(start):self.token_index(end)+offset] + + def group_tokens(self, grp_cls, tokens): + """Replace tokens by instance of grp_cls.""" + idx = self.token_index(tokens[0]) + for t in tokens: + self.tokens.remove(t) + grp = grp_cls(tokens) + self.tokens.insert(idx, grp) + return grp + + def insert_before(self, where, token): + self.tokens.insert(self.token_index(where), token) + + +class Statement(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_type(self): + first_token = self.token_first() + if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return first_token.value.upper() + else: + return 'UNKNOWN' + + +class Identifier(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def has_alias(self): + return self.get_alias() is not None + + def get_alias(self): + kw = self.token_next_match(0, T.Keyword, 'AS') + if kw is not None: + alias = self.token_next(self.token_index(kw)) + if alias is None: + return None + else: + next_ = self.token_next(0) + if next_ is None or not isinstance(next_, Identifier): + return None + alias = next_ + if isinstance(alias, Identifier): + return alias.get_name() + else: + return alias.to_unicode() + + def get_name(self): + alias = self.get_alias() + if alias is not None: + return alias + return self.get_real_name() + + def get_real_name(self): + return self.token_next_by_type(0, T.Name).value + + def get_typecast(self): + marker = self.token_next_match(0, T.Punctuation, '::') + if marker is None: + return None + next_ = self.token_next(self.token_index(marker), False) + if next_ is None: + return None + return next_.to_unicode() + + +class IdentifierList(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_identifiers(self): + return [x for x in self.tokens if isinstance(x, Identifier)] + + +class Parenthesis(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + + +class Assignment(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class If(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class For(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Comparsion(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Comment(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Where(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + + +class Case(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_cases(self): + """Returns a list of 2-tuples (condition, value). + + If an ELSE exists condition is None. + """ + ret = [] + in_condition = in_value = False + for token in self.tokens: + if token.match(T.Keyword, 'WHEN'): + ret.append(([], [])) + in_condition = True + in_value = False + elif token.match(T.Keyword, 'ELSE'): + ret.append((None, [])) + in_condition = False + in_value = True + elif token.match(T.Keyword, 'THEN'): + in_condition = False + in_value = True + elif token.match(T.Keyword, 'END'): + in_condition = False + in_value = False + if in_condition: + ret[-1][0].append(token) + elif in_value: + ret[-1][1].append(token) + return ret + +def _group_left_right(tlist, ttype, value, cls, + check_right=lambda t: True, + include_semicolon=False): +# [_group_left_right(sgroup, ttype, value, cls, check_right, +# include_semicolon) for sgroup in tlist.get_sublists() +# if not isinstance(sgroup, cls)] + idx = 0 + token = tlist.token_next_match(idx, ttype, value) + while token: + right = tlist.token_next(tlist.token_index(token)) + left = tlist.token_prev(tlist.token_index(token)) + if (right is None or not check_right(right) + or left is None): + token = tlist.token_next_match(tlist.token_index(token)+1, + ttype, value) + else: + if include_semicolon: + right = tlist.token_next_match(tlist.token_index(right), + T.Punctuation, ';') + tokens = tlist.tokens_between(left, right)[1:] + if not isinstance(left, cls): + new = cls([left]) + new_idx = tlist.token_index(left) + tlist.tokens.remove(left) + tlist.tokens.insert(new_idx, new) + left = new + left.tokens.extend(tokens) + for t in tokens: + tlist.tokens.remove(t) + token = tlist.token_next_match(tlist.token_index(left)+1, + ttype, value) + +def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon=False, recurse=False): + def _find_matching(i, tl, stt, sva, ett, eva): + depth = 1 + for t in tl.tokens[i:]: + if t.match(stt, sva): + depth += 1 + elif t.match(ett, eva): + depth -= 1 + if depth == 1: + return t + return None + [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon) for sgroup in tlist.get_sublists() + if recurse] + if isinstance(tlist, cls): + idx = 1 + else: + idx = 0 + token = tlist.token_next_match(idx, start_ttype, start_value) + while token: + tidx = tlist.token_index(token) + end = _find_matching(tidx, tlist, start_ttype, start_value, + end_ttype, end_value) + if end is None: + idx = tidx+1 + else: + if include_semicolon: + next_ = tlist.token_next(tlist.token_index(end)) + if next_ and next_.match(T.Punctuation, ';'): + end = next_ + group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + _group_matching(group, start_ttype, start_value, + end_ttype, end_value, cls, include_semicolon) + idx = tlist.token_index(group)+1 + token = tlist.token_next_match(idx, start_ttype, start_value) + +def group_if(tlist): + _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True) + +def group_for(tlist): + _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True) + +def group_as(tlist): + _group_left_right(tlist, T.Keyword, 'AS', Identifier) + +def group_assignment(tlist): + _group_left_right(tlist, T.Assignment, ':=', Assignment, + include_semicolon=True) + +def group_comparsion(tlist): + _group_left_right(tlist, T.Operator, None, Comparsion) + + +def group_case(tlist): + _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, True) + + +def group_identifier(tlist): + def _consume_cycle(tl, i): + x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'), + lambda y: y.ttype in (T.String.Symbol, T.Name))) + for t in tl.tokens[i:]: + if x.next()(t): + yield t + else: + raise StopIteration + + # bottom up approach: group subgroups first + [group_identifier(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + + # real processing + idx = 0 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + while token: + identifier_tokens = [token]+list( + _consume_cycle(tlist, + tlist.token_index(token)+1)) + group = tlist.group_tokens(Identifier, identifier_tokens) + idx = tlist.token_index(group)+1 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + + +def group_identifier_list(tlist): + [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, IdentifierList)] + idx = 0 + token = tlist.token_next_by_instance(idx, Identifier) + while token: + tidx = tlist.token_index(token) + end = tlist.token_not_matching(tidx+1, + [lambda t: isinstance(t, Identifier), + lambda t: t.is_whitespace(), + lambda t: t.match(T.Punctuation, + ',') + ]) + if end is None: + idx = tidx + 1 + else: + grp_tokens = tlist.tokens_between(token, end, exclude_end=True) + while grp_tokens and (grp_tokens[-1].is_whitespace() + or grp_tokens[-1].match(T.Punctuation, ',')): + grp_tokens.pop() + if len(grp_tokens) <= 1: + idx = tidx + 1 + else: + group = tlist.group_tokens(IdentifierList, grp_tokens) + idx = tlist.token_index(group) + token = tlist.token_next_by_instance(idx, Identifier) + + +def group_parenthesis(tlist): + _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis) + +def group_comments(tlist): + [group_comments(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Comment)] + idx = 0 + token = tlist.token_next_by_type(idx, T.Comment) + while token: + tidx = tlist.token_index(token) + end = tlist.token_not_matching(tidx+1, + [lambda t: t.ttype in T.Comment, + lambda t: t.is_whitespace()]) + if end is None: + idx = tidx + 1 + else: + eidx = tlist.token_index(end) + grp_tokens = tlist.tokens_between(token, + tlist.token_prev(eidx, False)) + group = tlist.group_tokens(Comment, grp_tokens) + idx = tlist.token_index(group) + token = tlist.token_next_by_type(idx, T.Comment) + +def group_where(tlist): + [group_where(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Where)] + idx = 0 + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') + while token: + tidx = tlist.token_index(token) + end = tlist.token_next_match(tidx+1, T.Keyword, stopwords) + if end is None: + end = tlist.tokens[-1] + else: + end = tlist.tokens[tlist.token_index(end)-1] + group = tlist.group_tokens(Where, tlist.tokens_between(token, end)) + idx = tlist.token_index(group) + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + +def group_aliased(tlist): + [group_aliased(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + idx = 0 + token = tlist.token_next_by_instance(idx, Identifier) + while token: + next_ = tlist.token_next(tlist.token_index(token)) + if next_ is not None and isinstance(next_, Identifier): + grp = tlist.tokens_between(token, next_)[1:] + token.tokens.extend(grp) + for t in grp: + tlist.tokens.remove(t) + idx = tlist.token_index(token)+1 + token = tlist.token_next_by_instance(idx, Identifier) + + +def group_typecasts(tlist): + _group_left_right(tlist, T.Punctuation, '::', Identifier) + + +def group(tlist): + for func in [group_parenthesis, + group_comments, + group_where, + group_case, + group_identifier, + group_typecasts, + group_as, + group_aliased, + group_assignment, + group_comparsion, + group_identifier_list, + group_if, + group_for,]: + func(tlist) diff --git a/sqlparse/filters.py b/sqlparse/filters.py new file mode 100644 index 0000000..695b298 --- /dev/null +++ b/sqlparse/filters.py @@ -0,0 +1,432 @@ +# -*- coding: utf-8 -*- + +import re + +from sqlparse.engine import grouping +from sqlparse import tokens as T + + +class Filter(object): + + def process(self, *args): + raise NotImplementedError + + +class TokenFilter(Filter): + + def process(self, stack, stream): + raise NotImplementedError + + +# FIXME: Should be removed +def rstrip(stream): + buff = [] + for token in stream: + if token.is_whitespace() and '\n' in token.value: + # assuming there's only one \n in value + before, rest = token.value.split('\n', 1) + token.value = '\n%s' % rest + buff = [] + yield token + elif token.is_whitespace(): + buff.append(token) + elif token.is_group(): + token.tokens = list(rstrip(token.tokens)) + # process group and look if it starts with a nl + if token.tokens and token.tokens[0].is_whitespace(): + before, rest = token.tokens[0].value.split('\n', 1) + token.tokens[0].value = '\n%s' % rest + buff = [] + while buff: + yield buff.pop(0) + yield token + else: + while buff: + yield buff.pop(0) + yield token + + +# -------------------------- +# token process + +class _CaseFilter(TokenFilter): + + ttype = None + + def __init__(self, case=None): + if case is None: + case = 'upper' + assert case in ['lower', 'upper', 'capitalize'] + self.convert = getattr(unicode, case) + + def process(self, stack, stream): + for ttype, value in stream: + if ttype in self.ttype: + value = self.convert(value) + yield ttype, value + + +class KeywordCaseFilter(_CaseFilter): + ttype = T.Keyword + + +class IdentifierCaseFilter(_CaseFilter): + ttype = (T.Name, T.String.Symbol) + + +# ---------------------- +# statement process + +class StripCommentsFilter(Filter): + + def _process(self, tlist): + idx = 0 + clss = set([x.__class__ for x in tlist.tokens]) + while grouping.Comment in clss: + token = tlist.token_next_by_instance(0, grouping.Comment) + tidx = tlist.token_index(token) + prev = tlist.token_prev(tidx, False) + next_ = tlist.token_next(tidx, False) + # Replace by whitespace if prev and next exist and if they're not + # whitespaces. This doesn't apply if prev or next is a paranthesis. + if (prev is not None and next_ is not None + and not prev.is_whitespace() and not next_.is_whitespace() + and not (prev.match(T.Punctuation, '(') + or next_.match(T.Punctuation, ')'))): + tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ') + else: + tlist.tokens.pop(tidx) + clss = set([x.__class__ for x in tlist.tokens]) + + def process(self, stack, stmt): + [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + self._process(stmt) + + +class StripWhitespaceFilter(Filter): + + def _stripws(self, tlist): + func_name = '_stripws_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._stripws_default) + func(tlist) + + def _stripws_default(self, tlist): + last_was_ws = False + for token in tlist.tokens: + if token.is_whitespace(): + if last_was_ws: + token.value = '' + else: + token.value = ' ' + last_was_ws = token.is_whitespace() + + def _stripws_parenthesis(self, tlist): + if tlist.tokens[1].is_whitespace(): + tlist.tokens.pop(1) + if tlist.tokens[-2].is_whitespace(): + tlist.tokens.pop(-2) + self._stripws_default(tlist) + + def process(self, stack, stmt): + [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + self._stripws(stmt) + if stmt.tokens[-1].is_whitespace(): + stmt.tokens.pop(-1) + + +class ReindentFilter(Filter): + + def __init__(self, width=2, char=' ', line_width=None): + self.width = width + self.char = char + self.indent = 0 + self.offset = 0 + self.line_width = line_width + self._curr_stmt = None + self._last_stmt = None + + def _get_offset(self, token): + all_ = list(self._curr_stmt.flatten()) + idx = all_.index(token) + raw = ''.join(unicode(x) for x in all_[:idx+1]) + line = raw.splitlines()[-1] + # Now take current offset into account and return relative offset. + full_offset = len(line)-(len(self.char*(self.width*self.indent))) + return full_offset - self.offset + + def nl(self): + # TODO: newline character should be configurable + ws = '\n'+(self.char*((self.indent*self.width)+self.offset)) + return grouping.Token(T.Whitespace, ws) + + def _split_kwds(self, tlist): + split_words = ('FROM', 'JOIN$', 'AND', 'OR', + 'GROUP', 'ORDER', 'UNION', 'VALUES') + idx = 0 + token = tlist.token_next_match(idx, T.Keyword, split_words, + regex=True) + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + offset = 1 + if prev and prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + offset += 1 + nl = self.nl() + tlist.insert_before(token, nl) + token = tlist.token_next_match(tlist.token_index(nl)+offset, + T.Keyword, split_words, regex=True) + + def _split_statements(self, tlist): + idx = 0 + token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + if prev and prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + # only break if it's not the first token + if prev: + nl = self.nl() + tlist.insert_before(token, nl) + token = tlist.token_next_by_type(tlist.token_index(token)+1, + (T.Keyword.DDL, T.Keyword.DML)) + + def _process(self, tlist): + func_name = '_process_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._process_default) + func(tlist) + + def _process_where(self, tlist): + token = tlist.token_next_match(0, T.Keyword, 'WHERE') + tlist.insert_before(token, self.nl()) + self.indent += 1 + self._process_default(tlist) + self.indent -= 1 + + def _process_parenthesis(self, tlist): + first = tlist.token_next(0) + indented = False + if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): + self.indent += 1 + tlist.tokens.insert(0, self.nl()) + indented = True + num_offset = self._get_offset(tlist.token_next_match(0, + T.Punctuation, '(')) + self.offset += num_offset + self._process_default(tlist, stmts=not indented) + if indented: + self.indent -= 1 + self.offset -= num_offset + + def _process_identifierlist(self, tlist): + identifiers = tlist.get_identifiers() + if len(identifiers) > 1: + first = list(identifiers[0].flatten())[0] + num_offset = self._get_offset(first)-len(first.value) + self.offset += num_offset + for token in identifiers[1:]: + tlist.insert_before(token, self.nl()) + self.offset -= num_offset + self._process_default(tlist) + + def _process_case(self, tlist): + cases = tlist.get_cases() + is_first = True + num_offset = None + case = tlist.tokens[0] + outer_offset = self._get_offset(case)-len(case.value) + self.offset += outer_offset + for cond, value in tlist.get_cases(): + if is_first: + is_first = False + num_offset = self._get_offset(cond[0])-len(cond[0].value) + self.offset += num_offset + continue + if cond is None: + token = value[0] + else: + token = cond[0] + tlist.insert_before(token, self.nl()) + # Line breaks on group level are done. Now let's add an offset of + # 5 (=length of "when", "then", "else") and process subgroups. + self.offset += 5 + self._process_default(tlist) + self.offset -= 5 + if num_offset is not None: + self.offset -= num_offset + end = tlist.token_next_match(0, T.Keyword, 'END') + tlist.insert_before(end, self.nl()) + self.offset -= outer_offset + + def _process_default(self, tlist, stmts=True, kwds=True): + if stmts: + self._split_statements(tlist) + if kwds: + self._split_kwds(tlist) + [self._process(sgroup) for sgroup in tlist.get_sublists()] + + def process(self, stack, stmt): + if isinstance(stmt, grouping.Statement): + self._curr_stmt = stmt + self._process(stmt) + if isinstance(stmt, grouping.Statement): + if self._last_stmt is not None: + if self._last_stmt.to_unicode().endswith('\n'): + nl = '\n' + else: + nl = '\n\n' + stmt.tokens.insert(0, + grouping.Token(T.Whitespace, nl)) + if self._last_stmt != stmt: + self._last_stmt = stmt + + +# FIXME: Doesn't work ;) +class RightMarginFilter(Filter): + + keep_together = ( +# grouping.TypeCast, grouping.Identifier, grouping.Alias, + ) + + def __init__(self, width=79): + self.width = width + self.line = '' + + def _process(self, stack, group, stream): + for token in stream: + if token.is_whitespace() and '\n' in token.value: + if token.value.endswith('\n'): + self.line = '' + else: + self.line = token.value.splitlines()[-1] + elif (token.is_group() + and not token.__class__ in self.keep_together): + token.tokens = self._process(stack, token, token.tokens) + else: + val = token.to_unicode() + if len(self.line) + len(val) > self.width: + match = re.search('^ +', self.line) + if match is not None: + indent = match.group() + else: + indent = '' + yield grouping.Token(T.Whitespace, '\n%s' % indent) + self.line = indent + self.line += val + yield token + + def process(self, stack, group): + return + group.tokens = self._process(stack, group, group.tokens) + + +# --------------------------- +# postprocess + +class SerializerUnicode(Filter): + + def process(self, stack, stmt): + raw = stmt.to_unicode() + add_nl = raw.endswith('\n') + res = '\n'.join(line.rstrip() for line in raw.splitlines()) + if add_nl: + res += '\n' + return res + + +class OutputPythonFilter(Filter): + + def __init__(self, varname='sql'): + self.varname = varname + self.cnt = 0 + + def _process(self, stream, varname, count, has_nl): + if count > 1: + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + if has_nl: + yield grouping.Token(T.Operator, '(') + yield grouping.Token(T.Text, "'") + cnt = 0 + for token in stream: + cnt += 1 + if token.is_whitespace() and '\n' in token.value: + if cnt == 1: + continue + after_lb = token.value.split('\n', 1)[1] + yield grouping.Token(T.Text, "'") + yield grouping.Token(T.Whitespace, '\n') + for i in range(len(varname)+4): + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, "'") + if after_lb: # it's the indendation + yield grouping.Token(T.Whitespace, after_lb) + continue + elif token.value and "'" in token.value: + token.value = token.value.replace("'", "\\'") + yield grouping.Token(T.Text, token.value or '') + yield grouping.Token(T.Text, "'") + if has_nl: + yield grouping.Token(T.Operator, ')') + + def process(self, stack, stmt): + self.cnt += 1 + if self.cnt > 1: + varname = '%s%d' % (self.varname, self.cnt) + else: + varname = self.varname + has_nl = len(stmt.to_unicode().strip().splitlines()) > 1 + stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl) + return stmt + + +class OutputPHPFilter(Filter): + + def __init__(self, varname='sql'): + self.varname = '$%s' % varname + self.count = 0 + + def _process(self, stream, varname): + if self.count > 1: + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, '"') + cnt = 0 + for token in stream: + if token.is_whitespace() and '\n' in token.value: + cnt += 1 + if cnt == 1: + continue + after_lb = token.value.split('\n', 1)[1] + yield grouping.Token(T.Text, '"') + yield grouping.Token(T.Operator, ';') + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Punctuation, '.') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, '"') + if after_lb: + yield grouping.Token(T.Text, after_lb) + continue + elif '"' in token.value: + token.value = token.value.replace('"', '\\"') + yield grouping.Token(T.Text, token.value) + yield grouping.Token(T.Text, '"') + yield grouping.Token(T.Punctuation, ';') + + def process(self, stack, stmt): + self.count += 1 + if self.count > 1: + varname = '%s%d' % (self.varname, self.count) + else: + varname = self.varname + stmt.tokens = tuple(self._process(stmt.tokens, varname)) + return stmt + diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py new file mode 100644 index 0000000..9d443ca --- /dev/null +++ b/sqlparse/formatter.py @@ -0,0 +1,163 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL formatter""" + +import logging + +from sqlparse import SQLParseError +from sqlparse import filters + + +def validate_options(options): + """Validates options.""" + kwcase = options.get('keyword_case', None) + if kwcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) + + idcase = options.get('identifier_case', None) + if idcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for identifier_case: %r' % idcase) + + ofrmt = options.get('output_format', None) + if ofrmt not in [None, 'sql', 'python', 'php']: + raise SQLParseError('Unknown output format: %r' % ofrmt) + + strip_comments = options.get('strip_comments', False) + if strip_comments not in [True, False]: + raise SQLParseError('Invalid value for strip_comments: %r' + % strip_comments) + + strip_ws = options.get('strip_whitespace', False) + if strip_ws not in [True, False]: + raise SQLParseError('Invalid value for strip_whitespace: %r' + % strip_ws) + + reindent = options.get('reindent', False) + if reindent not in [True, False]: + raise SQLParseError('Invalid value for reindent: %r' + % reindent) + elif reindent: + options['strip_whitespace'] = True + indent_tabs = options.get('indent_tabs', False) + if indent_tabs not in [True, False]: + raise SQLParserError('Invalid value for indent_tabs: %r' % indent_tabs) + elif indent_tabs: + options['indent_char'] = '\t' + else: + options['indent_char'] = ' ' + indent_width = options.get('indent_width', 2) + try: + indent_width = int(indent_width) + except (TypeError, ValueError): + raise SQLParseError('indent_width requires an integer') + if indent_width < 1: + raise SQLParseError('indent_width requires an positive integer') + options['indent_width'] = indent_width + + right_margin = options.get('right_margin', None) + if right_margin is not None: + try: + right_margin = int(right_margin) + except (TypeError, ValueError): + raise SQLParseError('right_margin requires an integer') + if right_margin < 10: + raise SQLParseError('right_margin requires an integer > 10') + options['right_margin'] = right_margin + + return options + + +def build_filter_stack(stack, options): + """Setup and return a filter stack. + + Args: + stack: :class:`~sqlparse.filters.FilterStack` instance + options: Dictionary with options validated by validate_options. + """ + # Token filter + if 'keyword_case' in options: + stack.preprocess.append( + filters.KeywordCaseFilter(options['keyword_case'])) + + if 'identifier_case' in options: + stack.preprocess.append( + filters.IdentifierCaseFilter(options['identifier_case'])) + + # After grouping + if options.get('strip_comments', False): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripCommentsFilter()) + + if (options.get('strip_whitespace', False) + or options.get('reindent', False)): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripWhitespaceFilter()) + + if options.get('reindent', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.ReindentFilter(char=options['indent_char'], + width=options['indent_width'])) + + if options.get('right_margin', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.RightMarginFilter(width=options['right_margin'])) + + # Serializer + if options.get('output_format'): + frmt = options['output_format'] + if frmt.lower() == 'php': + fltr = filters.OutputPHPFilter() + elif frmt.lower() == 'python': + fltr = filters.OutputPythonFilter() + else: + fltr = None + if fltr is not None: + stack.postprocess.append(fltr) + + return stack + + +def format(statement, **options): + import filters + logging.info('OPTIONS %r', options) + lexer = Lexer() +# lexer.add_filter('whitespace') + lexer.add_filter(filters.GroupFilter()) + if options.get('reindent', False): + lexer.add_filter(filters.StripWhitespaceFilter()) + lexer.add_filter(filters.IndentFilter( + n_indents=options.get('n_indents', 2))) + if options.get('ltrim', False): + lexer.add_filter(filters.LTrimFilter()) + keyword_case = options.get('keyword_case', None) + if keyword_case is not None: + assert keyword_case in ('lower', 'upper', 'capitalize') + lexer.add_filter(filters.KeywordCaseFilter(case=keyword_case)) + identifier_case = options.get('identifier_case', None) + if identifier_case is not None: + assert identifier_case in ('lower', 'upper', 'capitalize') + lexer.add_filter(filters.IdentifierCaseFilter(case=identifier_case)) + if options.get('strip_comments', False): + lexer.add_filter(filters.StripCommentsFilter()) + right_margin = options.get('right_margin', None) + if right_margin is not None: + right_margin = int(right_margin) + assert right_margin > 0 + lexer.add_filter(filters.RightMarginFilter(margin=right_margin)) + lexer.add_filter(filters.UngroupFilter()) + if options.get('output_format', None): + ofrmt = options['output_format'] + assert ofrmt in ('sql', 'python', 'php') + if ofrmt == 'python': + lexer.add_filter(filters.OutputPythonFilter()) + elif ofrmt == 'php': + lexer.add_filter(filters.OutputPHPFilter()) + tokens = [] + for ttype, value in lexer.get_tokens(unicode(statement)): + tokens.append((ttype, value)) + return statement.__class__(tokens) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py new file mode 100644 index 0000000..3f0632e --- /dev/null +++ b/sqlparse/keywords.py @@ -0,0 +1,589 @@ +from sqlparse.tokens import * + +KEYWORDS = { + 'ABORT': Keyword, + 'ABS': Keyword, + 'ABSOLUTE': Keyword, + 'ACCESS': Keyword, + 'ADA': Keyword, + 'ADD': Keyword, + 'ADMIN': Keyword, + 'AFTER': Keyword, + 'AGGREGATE': Keyword, + 'ALIAS': Keyword, + 'ALL': Keyword, + 'ALLOCATE': Keyword, + 'ANALYSE': Keyword, + 'ANALYZE': Keyword, + 'AND': Keyword, + 'ANY': Keyword, + 'ARE': Keyword, + 'AS': Keyword, + 'ASC': Keyword, + 'ASENSITIVE': Keyword, + 'ASSERTION': Keyword, + 'ASSIGNMENT': Keyword, + 'ASYMMETRIC': Keyword, + 'AT': Keyword, + 'ATOMIC': Keyword, + 'AUTHORIZATION': Keyword, + 'AVG': Keyword, + + 'BACKWARD': Keyword, + 'BEFORE': Keyword, + 'BEGIN': Keyword, + 'BETWEEN': Keyword, + 'BITVAR': Keyword, + 'BIT_LENGTH': Keyword, + 'BOTH': Keyword, + 'BREADTH': Keyword, + 'BY': Keyword, + + 'C': Keyword, + 'CACHE': Keyword, + 'CALL': Keyword, + 'CALLED': Keyword, + 'CARDINALITY': Keyword, + 'CASCADE': Keyword, + 'CASCADED': Keyword, + 'CASE': Keyword, + 'CAST': Keyword, + 'CATALOG': Keyword, + 'CATALOG_NAME': Keyword, + 'CHAIN': Keyword, + 'CHARACTERISTICS': Keyword, + 'CHARACTER_LENGTH': Keyword, + 'CHARACTER_SET_CATALOG': Keyword, + 'CHARACTER_SET_NAME': Keyword, + 'CHARACTER_SET_SCHEMA': Keyword, + 'CHAR_LENGTH': Keyword, + 'CHECK': Keyword, + 'CHECKED': Keyword, + 'CHECKPOINT': Keyword, + 'CLASS': Keyword, + 'CLASS_ORIGIN': Keyword, + 'CLOB': Keyword, + 'CLOSE': Keyword, + 'CLUSTER': Keyword, + 'COALSECE': Keyword, + 'COBOL': Keyword, + 'COLLATE': Keyword, + 'COLLATION': Keyword, + 'COLLATION_CATALOG': Keyword, + 'COLLATION_NAME': Keyword, + 'COLLATION_SCHEMA': Keyword, + 'COLUMN': Keyword, + 'COLUMN_NAME': Keyword, + 'COMMAND_FUNCTION': Keyword, + 'COMMAND_FUNCTION_CODE': Keyword, + 'COMMENT': Keyword, + 'COMMIT': Keyword, + 'COMMITTED': Keyword, + 'COMPLETION': Keyword, + 'CONDITION_NUMBER': Keyword, + 'CONNECT': Keyword, + 'CONNECTION': Keyword, + 'CONNECTION_NAME': Keyword, + 'CONSTRAINT': Keyword, + 'CONSTRAINTS': Keyword, + 'CONSTRAINT_CATALOG': Keyword, + 'CONSTRAINT_NAME': Keyword, + 'CONSTRAINT_SCHEMA': Keyword, + 'CONSTRUCTOR': Keyword, + 'CONTAINS': Keyword, + 'CONTINUE': Keyword, + 'CONVERSION': Keyword, + 'CONVERT': Keyword, + 'COPY': Keyword, + 'CORRESPONTING': Keyword, + 'COUNT': Keyword, + 'CREATEDB': Keyword, + 'CREATEUSER': Keyword, + 'CROSS': Keyword, + 'CUBE': Keyword, + 'CURRENT': Keyword, + 'CURRENT_DATE': Keyword, + 'CURRENT_PATH': Keyword, + 'CURRENT_ROLE': Keyword, + 'CURRENT_TIME': Keyword, + 'CURRENT_TIMESTAMP': Keyword, + 'CURRENT_USER': Keyword, + 'CURSOR': Keyword, + 'CURSOR_NAME': Keyword, + 'CYCLE': Keyword, + + 'DATA': Keyword, + 'DATABASE': Keyword, + 'DATETIME_INTERVAL_CODE': Keyword, + 'DATETIME_INTERVAL_PRECISION': Keyword, + 'DAY': Keyword, + 'DEALLOCATE': Keyword, + 'DECLARE': Keyword, + 'DEFAULT': Keyword, + 'DEFAULTS': Keyword, + 'DEFERRABLE': Keyword, + 'DEFERRED': Keyword, + 'DEFINED': Keyword, + 'DEFINER': Keyword, + 'DELIMITER': Keyword, + 'DELIMITERS': Keyword, + 'DEREF': Keyword, + 'DESC': Keyword, + 'DESCRIBE': Keyword, + 'DESCRIPTOR': Keyword, + 'DESTROY': Keyword, + 'DESTRUCTOR': Keyword, + 'DETERMINISTIC': Keyword, + 'DIAGNOSTICS': Keyword, + 'DICTIONARY': Keyword, + 'DISCONNECT': Keyword, + 'DISPATCH': Keyword, + 'DISTINCT': Keyword, + 'DO': Keyword, + 'DOMAIN': Keyword, + 'DYNAMIC': Keyword, + 'DYNAMIC_FUNCTION': Keyword, + 'DYNAMIC_FUNCTION_CODE': Keyword, + + 'EACH': Keyword, + 'ELSE': Keyword, + 'ENCODING': Keyword, + 'ENCRYPTED': Keyword, + 'END': Keyword, + 'END-EXEC': Keyword, + 'EQUALS': Keyword, + 'ESCAPE': Keyword, + 'EVERY': Keyword, + 'EXCEPT': Keyword, + 'ESCEPTION': Keyword, + 'EXCLUDING': Keyword, + 'EXCLUSIVE': Keyword, + 'EXEC': Keyword, + 'EXECUTE': Keyword, + 'EXISTING': Keyword, + 'EXISTS': Keyword, + 'EXTERNAL': Keyword, + 'EXTRACT': Keyword, + + 'FALSE': Keyword, + 'FETCH': Keyword, + 'FINAL': Keyword, + 'FIRST': Keyword, + 'FOR': Keyword, + 'FORCE': Keyword, + 'FOREIGN': Keyword, + 'FORTRAN': Keyword, + 'FORWARD': Keyword, + 'FOUND': Keyword, + 'FREE': Keyword, + 'FREEZE': Keyword, + 'FROM': Keyword, + 'FULL': Keyword, + 'FUNCTION': Keyword, + + 'G': Keyword, + 'GENERAL': Keyword, + 'GENERATED': Keyword, + 'GET': Keyword, + 'GLOBAL': Keyword, + 'GO': Keyword, + 'GOTO': Keyword, + 'GRANT': Keyword, + 'GRANTED': Keyword, + 'GROUP': Keyword, + 'GROUPING': Keyword, + + 'HANDLER': Keyword, + 'HAVING': Keyword, + 'HIERARCHY': Keyword, + 'HOLD': Keyword, + 'HOST': Keyword, + + 'IDENTITY': Keyword, + 'IF': Keyword, + 'IGNORE': Keyword, + 'ILIKE': Keyword, + 'IMMEDIATE': Keyword, + 'IMMUTABLE': Keyword, + + 'IMPLEMENTATION': Keyword, + 'IMPLICIT': Keyword, + 'IN': Keyword, + 'INCLUDING': Keyword, + 'INCREMENT': Keyword, + 'INDEX': Keyword, + + 'INDITCATOR': Keyword, + 'INFIX': Keyword, + 'INHERITS': Keyword, + 'INITIALIZE': Keyword, + 'INITIALLY': Keyword, + 'INNER': Keyword, + 'INOUT': Keyword, + 'INPUT': Keyword, + 'INSENSITIVE': Keyword, + 'INSTANTIABLE': Keyword, + 'INSTEAD': Keyword, + 'INTERSECT': Keyword, + 'INTO': Keyword, + 'INVOKER': Keyword, + 'IS': Keyword, + 'ISNULL': Keyword, + 'ISOLATION': Keyword, + 'ITERATE': Keyword, + + 'JOIN': Keyword, + + 'K': Keyword, + 'KEY': Keyword, + 'KEY_MEMBER': Keyword, + 'KEY_TYPE': Keyword, + + 'LANCOMPILER': Keyword, + 'LANGUAGE': Keyword, + 'LARGE': Keyword, + 'LAST': Keyword, + 'LATERAL': Keyword, + 'LEADING': Keyword, + 'LEFT': Keyword, + 'LENGTH': Keyword, + 'LESS': Keyword, + 'LEVEL': Keyword, + 'LIKE': Keyword, + 'LILMIT': Keyword, + 'LISTEN': Keyword, + 'LOAD': Keyword, + 'LOCAL': Keyword, + 'LOCALTIME': Keyword, + 'LOCALTIMESTAMP': Keyword, + 'LOCATION': Keyword, + 'LOCATOR': Keyword, + 'LOCK': Keyword, + 'LOWER': Keyword, + + 'M': Keyword, + 'MAP': Keyword, + 'MATCH': Keyword, + 'MAX': Keyword, + 'MAXVALUE': Keyword, + 'MESSAGE_LENGTH': Keyword, + 'MESSAGE_OCTET_LENGTH': Keyword, + 'MESSAGE_TEXT': Keyword, + 'METHOD': Keyword, + 'MIN': Keyword, + 'MINUTE': Keyword, + 'MINVALUE': Keyword, + 'MOD': Keyword, + 'MODE': Keyword, + 'MODIFIES': Keyword, + 'MODIFY': Keyword, + 'MONTH': Keyword, + 'MORE': Keyword, + 'MOVE': Keyword, + 'MUMPS': Keyword, + + 'NAMES': Keyword, + 'NATIONAL': Keyword, + 'NATURAL': Keyword, + 'NCHAR': Keyword, + 'NCLOB': Keyword, + 'NEW': Keyword, + 'NEXT': Keyword, + 'NO': Keyword, + 'NOCREATEDB': Keyword, + 'NOCREATEUSER': Keyword, + 'NONE': Keyword, + 'NOT': Keyword, + 'NOTHING': Keyword, + 'NOTIFY': Keyword, + 'NOTNULL': Keyword, + 'NULL': Keyword, + 'NULLABLE': Keyword, + 'NULLIF': Keyword, + + 'OBJECT': Keyword, + 'OCTET_LENGTH': Keyword, + 'OF': Keyword, + 'OFF': Keyword, + 'OFFSET': Keyword, + 'OIDS': Keyword, + 'OLD': Keyword, + 'ON': Keyword, + 'ONLY': Keyword, + 'OPEN': Keyword, + 'OPERATION': Keyword, + 'OPERATOR': Keyword, + 'OPTION': Keyword, + 'OPTIONS': Keyword, + 'OR': Keyword, + 'ORDER': Keyword, + 'ORDINALITY': Keyword, + 'OUT': Keyword, + 'OUTER': Keyword, + 'OUTPUT': Keyword, + 'OVERLAPS': Keyword, + 'OVERLAY': Keyword, + 'OVERRIDING': Keyword, + 'OWNER': Keyword, + + 'PAD': Keyword, + 'PARAMETER': Keyword, + 'PARAMETERS': Keyword, + 'PARAMETER_MODE': Keyword, + 'PARAMATER_NAME': Keyword, + 'PARAMATER_ORDINAL_POSITION': Keyword, + 'PARAMETER_SPECIFIC_CATALOG': Keyword, + 'PARAMETER_SPECIFIC_NAME': Keyword, + 'PARAMATER_SPECIFIC_SCHEMA': Keyword, + 'PARTIAL': Keyword, + 'PASCAL': Keyword, + 'PENDANT': Keyword, + 'PLACING': Keyword, + 'PLI': Keyword, + 'POSITION': Keyword, + 'POSTFIX': Keyword, + 'PRECISION': Keyword, + 'PREFIX': Keyword, + 'PREORDER': Keyword, + 'PREPARE': Keyword, + 'PRESERVE': Keyword, + 'PRIMARY': Keyword, + 'PRIOR': Keyword, + 'PRIVILEGES': Keyword, + 'PROCEDURAL': Keyword, + 'PROCEDURE': Keyword, + 'PUBLIC': Keyword, + + 'RAISE': Keyword, + 'READ': Keyword, + 'READS': Keyword, + 'RECHECK': Keyword, + 'RECURSIVE': Keyword, + 'REF': Keyword, + 'REFERENCES': Keyword, + 'REFERENCING': Keyword, + 'REINDEX': Keyword, + 'RELATIVE': Keyword, + 'RENAME': Keyword, + 'REPEATABLE': Keyword, + 'REPLACE': Keyword, + 'RESET': Keyword, + 'RESTART': Keyword, + 'RESTRICT': Keyword, + 'RESULT': Keyword, + 'RETURN': Keyword, + 'RETURNED_LENGTH': Keyword, + 'RETURNED_OCTET_LENGTH': Keyword, + 'RETURNED_SQLSTATE': Keyword, + 'RETURNS': Keyword, + 'REVOKE': Keyword, + 'RIGHT': Keyword, + 'ROLE': Keyword, + 'ROLLBACK': Keyword, + 'ROLLUP': Keyword, + 'ROUTINE': Keyword, + 'ROUTINE_CATALOG': Keyword, + 'ROUTINE_NAME': Keyword, + 'ROUTINE_SCHEMA': Keyword, + 'ROW': Keyword, + 'ROWS': Keyword, + 'ROW_COUNT': Keyword, + 'RULE': Keyword, + + 'SAVE_POINT': Keyword, + 'SCALE': Keyword, + 'SCHEMA': Keyword, + 'SCHEMA_NAME': Keyword, + 'SCOPE': Keyword, + 'SCROLL': Keyword, + 'SEARCH': Keyword, + 'SECOND': Keyword, + 'SECURITY': Keyword, + 'SELF': Keyword, + 'SENSITIVE': Keyword, + 'SERIALIZABLE': Keyword, + 'SERVER_NAME': Keyword, + 'SESSION': Keyword, + 'SESSION_USER': Keyword, + 'SETOF': Keyword, + 'SETS': Keyword, + 'SHARE': Keyword, + 'SHOW': Keyword, + 'SIMILAR': Keyword, + 'SIMPLE': Keyword, + 'SIZE': Keyword, + 'SOME': Keyword, + 'SOURCE': Keyword, + 'SPACE': Keyword, + 'SPECIFIC': Keyword, + 'SPECIFICTYPE': Keyword, + 'SPECIFIC_NAME': Keyword, + 'SQL': Keyword, + 'SQLCODE': Keyword, + 'SQLERROR': Keyword, + 'SQLEXCEPTION': Keyword, + 'SQLSTATE': Keyword, + 'SQLWARNINIG': Keyword, + 'STABLE': Keyword, + 'START': Keyword, + 'STATE': Keyword, + 'STATEMENT': Keyword, + 'STATIC': Keyword, + 'STATISTICS': Keyword, + 'STDIN': Keyword, + 'STDOUT': Keyword, + 'STORAGE': Keyword, + 'STRICT': Keyword, + 'STRUCTURE': Keyword, + 'STYPE': Keyword, + 'SUBCLASS_ORIGIN': Keyword, + 'SUBLIST': Keyword, + 'SUBSTRING': Keyword, + 'SUM': Keyword, + 'SYMMETRIC': Keyword, + 'SYSID': Keyword, + 'SYSTEM': Keyword, + 'SYSTEM_USER': Keyword, + + 'TABLE': Keyword, + 'TABLE_NAME': Keyword, + ' TEMP': Keyword, + 'TEMPLATE': Keyword, + 'TEMPORARY': Keyword, + 'TERMINATE': Keyword, + 'THAN': Keyword, + 'THEN': Keyword, + 'TIMESTAMP': Keyword, + 'TIMEZONE_HOUR': Keyword, + 'TIMEZONE_MINUTE': Keyword, + 'TO': Keyword, + 'TOAST': Keyword, + 'TRAILING': Keyword, + 'TRANSATION': Keyword, + 'TRANSACTIONS_COMMITTED': Keyword, + 'TRANSACTIONS_ROLLED_BACK': Keyword, + 'TRANSATION_ACTIVE': Keyword, + 'TRANSFORM': Keyword, + 'TRANSFORMS': Keyword, + 'TRANSLATE': Keyword, + 'TRANSLATION': Keyword, + 'TREAT': Keyword, + 'TRIGGER': Keyword, + 'TRIGGER_CATALOG': Keyword, + 'TRIGGER_NAME': Keyword, + 'TRIGGER_SCHEMA': Keyword, + 'TRIM': Keyword, + 'TRUE': Keyword, + 'TRUNCATE': Keyword, + 'TRUSTED': Keyword, + 'TYPE': Keyword, + + 'UNCOMMITTED': Keyword, + 'UNDER': Keyword, + 'UNENCRYPTED': Keyword, + 'UNION': Keyword, + 'UNIQUE': Keyword, + 'UNKNOWN': Keyword, + 'UNLISTEN': Keyword, + 'UNNAMED': Keyword, + 'UNNEST': Keyword, + 'UNTIL': Keyword, + 'UPPER': Keyword, + 'USAGE': Keyword, + 'USER': Keyword, + 'USER_DEFINED_TYPE_CATALOG': Keyword, + 'USER_DEFINED_TYPE_NAME': Keyword, + 'USER_DEFINED_TYPE_SCHEMA': Keyword, + 'USING': Keyword, + + 'VACUUM': Keyword, + 'VALID': Keyword, + 'VALIDATOR': Keyword, + 'VALUES': Keyword, + 'VARIABLE': Keyword, + 'VERBOSE': Keyword, + 'VERSION': Keyword, + 'VIEW': Keyword, + 'VOLATILE': Keyword, + + 'WHEN': Keyword, + 'WHENEVER': Keyword, + 'WHERE': Keyword, + 'WITH': Keyword, + 'WITHOUT': Keyword, + 'WORK': Keyword, + 'WRITE': Keyword, + + 'YEAR': Keyword, + + 'ZONE': Keyword, + + + 'ARRAY': Name.Builtin, + 'BIGINT': Name.Builtin, + 'BINARY': Name.Builtin, + 'BIT': Name.Builtin, + 'BLOB': Name.Builtin, + 'BOOLEAN': Name.Builtin, + 'CHAR': Name.Builtin, + 'CHARACTER': Name.Builtin, + 'DATE': Name.Builtin, + 'DEC': Name.Builtin, + 'DECIMAL': Name.Builtin, + 'FLOAT': Name.Builtin, + 'INT': Name.Builtin, + 'INTEGER': Name.Builtin, + 'INTERVAL': Name.Builtin, + 'NUMBER': Name.Builtin, + 'NUMERIC': Name.Builtin, + 'REAL': Name.Builtin, + 'SERIAL': Name.Builtin, + 'SMALLINT': Name.Builtin, + 'VARCHAR': Name.Builtin, + 'VARYING': Name.Builtin, + 'INT8': Name.Builtin, + 'SERIAL8': Name.Builtin, + 'TEXT': Name.Builtin, + } + + +KEYWORDS_COMMON = { + 'SELECT': Keyword.DML, + 'INSERT': Keyword.DML, + 'DELETE': Keyword.DML, + 'UPDATE': Keyword.DML, + 'DROP': Keyword.DDL, + 'CREATE': Keyword.DDL, + 'ALTER': Keyword.DDL, + + 'WHERE': Keyword, + 'FROM': Keyword, + 'INNER': Keyword, + 'JOIN': Keyword, + 'AND': Keyword, + 'OR': Keyword, + 'LIKE': Keyword, + 'ON': Keyword, + 'IN': Keyword, + + 'BY': Keyword, + 'GROUP': Keyword, + 'ORDER': Keyword, + 'LEFT': Keyword, + 'OUTER': Keyword, + + 'IF': Keyword, + 'END': Keyword, + 'THEN': Keyword, + 'LOOP': Keyword, + 'AS': Keyword, + 'ELSE': Keyword, + 'FOR': Keyword, + + 'CASE': Keyword, + 'WHEN': Keyword, + 'MIN': Keyword, + 'MAX': Keyword, + 'DISTINCT': Keyword, + + } diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py new file mode 100644 index 0000000..b635fc6 --- /dev/null +++ b/sqlparse/lexer.py @@ -0,0 +1,310 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL Lexer""" + +# This code is based on the SqlLexer in pygments. +# http://pygments.org/ +# It's separated from the rest of pygments to increase performance +# and to allow some customizations. + +import re + +from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON +from sqlparse.tokens import * +from sqlparse.tokens import _TokenType + + +class include(str): + pass + +class combined(tuple): + """Indicates a state combined from multiple states.""" + + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def __init__(self, *args): + # tuple.__init__ doesn't do anything + pass + +def is_keyword(value): + test = value.upper() + return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value + + +def apply_filters(stream, filters, lexer=None): + """ + Use this method to apply an iterable of filters to + a stream. If lexer is given it's forwarded to the + filter, otherwise the filter receives `None`. + """ + def _apply(filter_, stream): + for token in filter_.filter(lexer, stream): + yield token + for filter_ in filters: + stream = _apply(filter_, stream) + return stream + + +class LexerMeta(type): + """ + Metaclass for Lexer, creates the self._tokens attribute from + self.tokens on the first instantiation. + """ + + def _process_state(cls, unprocessed, processed, state): + assert type(state) is str, "wrong state name %r" % state + assert state[0] != '#', "invalid state name %r" % state + if state in processed: + return processed[state] + tokens = processed[state] = [] + rflags = cls.flags + for tdef in unprocessed[state]: + if isinstance(tdef, include): + # it's a state reference + assert tdef != state, "circular state reference %r" % state + tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) + continue + + assert type(tdef) is tuple, "wrong rule def %r" % tdef + + try: + rex = re.compile(tdef[0], rflags).match + except Exception, err: + raise ValueError("uncompilable regex %r in state %r of %r: %s" % + (tdef[0], state, cls, err)) + + assert type(tdef[1]) is _TokenType or callable(tdef[1]), \ + 'token type must be simple type or callable, not %r' % (tdef[1],) + + if len(tdef) == 2: + new_state = None + else: + tdef2 = tdef[2] + if isinstance(tdef2, str): + # an existing state + if tdef2 == '#pop': + new_state = -1 + elif tdef2 in unprocessed: + new_state = (tdef2,) + elif tdef2 == '#push': + new_state = tdef2 + elif tdef2[:5] == '#pop:': + new_state = -int(tdef2[5:]) + else: + assert False, 'unknown new state %r' % tdef2 + elif isinstance(tdef2, combined): + # combine a new state from existing ones + new_state = '_tmp_%d' % cls._tmpname + cls._tmpname += 1 + itokens = [] + for istate in tdef2: + assert istate != state, 'circular state ref %r' % istate + itokens.extend(cls._process_state(unprocessed, + processed, istate)) + processed[new_state] = itokens + new_state = (new_state,) + elif isinstance(tdef2, tuple): + # push more than one state + for state in tdef2: + assert (state in unprocessed or + state in ('#pop', '#push')), \ + 'unknown new state ' + state + new_state = tdef2 + else: + assert False, 'unknown new state def %r' % tdef2 + tokens.append((rex, tdef[1], new_state)) + return tokens + + def process_tokendef(cls): + cls._all_tokens = {} + cls._tmpname = 0 + processed = cls._all_tokens[cls.__name__] = {} + #tokendefs = tokendefs or cls.tokens[name] + for state in cls.tokens.keys(): + cls._process_state(cls.tokens, processed, state) + return processed + + def __call__(cls, *args, **kwds): + if not hasattr(cls, '_tokens'): + cls._all_tokens = {} + cls._tmpname = 0 + if hasattr(cls, 'token_variants') and cls.token_variants: + # don't process yet + pass + else: + cls._tokens = cls.process_tokendef() + + return type.__call__(cls, *args, **kwds) + + + + +class Lexer: + + __metaclass__ = LexerMeta + + encoding = 'utf-8' + stripall = False + stripnl = False + tabsize = 0 + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'--.*?(\r|\n|\r\n)', Comment.Single), + (r'(\r|\n|\r\n)', Newline), + (r'\s+', Whitespace), + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r':=', Assignment), + (r'::', Punctuation), + (r'[*]', Wildcard), + (r'[+/<>=~!@#%^&|`?^-]', Operator), + (r'[0-9]+', Number.Integer), + # TODO: Backslash escapes? + (r"'(''|[^'])*'", String.Single), + (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL + (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword), + (r'END( IF| LOOP)?', Keyword), + (r'CREATE( OR REPLACE)?', Keyword.DDL), + (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword), + (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin), + (r'[;:()\[\],\.]', Punctuation), + ], + 'multiline-comments': [ + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[^/\*]+', Comment.Multiline), + (r'[/*]', Comment.Multiline) + ] + } + + def __init__(self): + self.filters = [] + + def add_filter(self, filter_, **options): + from sqlparse.filters import Filter + if not isinstance(filter_, Filter): + filter_ = filter_(**options) + self.filters.append(filter_) + + def get_tokens(self, text, unfiltered=False): + """ + Return an iterable of (tokentype, value) pairs generated from + `text`. If `unfiltered` is set to `True`, the filtering mechanism + is bypassed even if filters are defined. + + Also preprocess the text, i.e. expand tabs and strip it if + wanted and applies registered filters. + """ + if not isinstance(text, unicode): + if self.encoding == 'guess': + try: + text = text.decode('utf-8') + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + except UnicodeDecodeError: + text = text.decode('latin1') + elif self.encoding == 'chardet': + try: + import chardet + except ImportError: + raise ImportError('To enable chardet encoding guessing, ' + 'please install the chardet library ' + 'from http://chardet.feedparser.org/') + enc = chardet.detect(text) + text = text.decode(enc['encoding']) + else: + text = text.decode(self.encoding) + if self.stripall: + text = text.strip() + elif self.stripnl: + text = text.strip('\n') + if self.tabsize > 0: + text = text.expandtabs(self.tabsize) +# if not text.endswith('\n'): +# text += '\n' + + def streamer(): + for i, t, v in self.get_tokens_unprocessed(text): + yield t, v + stream = streamer() + if not unfiltered: + stream = apply_filters(stream, self.filters, self) + return stream + + + def get_tokens_unprocessed(self, text, stack=('root',)): + """ + Split ``text`` into (tokentype, text) pairs. + + ``stack`` is the inital stack (default: ``['root']``) + """ + pos = 0 + tokendefs = self._tokens + statestack = list(stack) + statetokens = tokendefs[statestack[-1]] + known_names = {} + while 1: + for rexmatch, action, new_state in statetokens: + m = rexmatch(text, pos) + if m: + # print rex.pattern + value = m.group() + if value in known_names: + yield pos, known_names[value], value + elif type(action) is _TokenType: + yield pos, action, value + elif hasattr(action, '__call__'): + ttype, value = action(value) + known_names[value] = ttype + yield pos, ttype, value + else: + for item in action(self, m): + yield item + pos = m.end() + if new_state is not None: + # state transition + if isinstance(new_state, tuple): + for state in new_state: + if state == '#pop': + statestack.pop() + elif state == '#push': + statestack.append(statestack[-1]) + else: + statestack.append(state) + elif isinstance(new_state, int): + # pop + del statestack[new_state:] + elif new_state == '#push': + statestack.append(statestack[-1]) + else: + assert False, "wrong state def: %r" % new_state + statetokens = tokendefs[statestack[-1]] + break + else: + try: + if text[pos] == '\n': + # at EOL, reset state to "root" + pos += 1 + statestack = ['root'] + statetokens = tokendefs['root'] + yield pos, Text, u'\n' + continue + yield pos, Error, text[pos] + pos += 1 + except IndexError: + break + + +def tokenize(sql): + """Tokenize sql. + + Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream + of ``(token type, value)`` items. + """ + lexer = Lexer() + return lexer.get_tokens(sql) diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py new file mode 100644 index 0000000..2c63c41 --- /dev/null +++ b/sqlparse/tokens.py @@ -0,0 +1,131 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +# The Token implementation is based on pygment's token system written +# by Georg Brandl. +# http://pygments.org/ + +"""Tokens""" + +try: + set +except NameError: + from sets import Set as set + + +class _TokenType(tuple): + parent = None + + def split(self): + buf = [] + node = self + while node is not None: + buf.append(node) + node = node.parent + buf.reverse() + return buf + + def __init__(self, *args): + # no need to call super.__init__ + self.subtypes = set() + + def __contains__(self, val): + return self is val or ( + type(val) is self.__class__ and + val[:len(self)] == self + ) + + def __getattr__(self, val): + if not val or not val[0].isupper(): + return tuple.__getattribute__(self, val) + new = _TokenType(self + (val,)) + setattr(self, val, new) + self.subtypes.add(new) + new.parent = self + return new + + def __hash__(self): + return hash(tuple(self)) + + def __repr__(self): + return 'Token' + (self and '.' or '') + '.'.join(self) + + +Token = _TokenType() + +# Special token types +Text = Token.Text +Whitespace = Text.Whitespace +Newline = Whitespace.Newline +Error = Token.Error +# Text that doesn't belong to this lexer (e.g. HTML in PHP) +Other = Token.Other + +# Common token types for source code +Keyword = Token.Keyword +Name = Token.Name +Literal = Token.Literal +String = Literal.String +Number = Literal.Number +Punctuation = Token.Punctuation +Operator = Token.Operator +Wildcard = Token.Wildcard +Comment = Token.Comment +Assignment = Token.Assignement + +# Generic types for non-source code +Generic = Token.Generic + +# String and some others are not direct childs of Token. +# alias them: +Token.Token = Token +Token.String = String +Token.Number = Number + +# SQL specific tokens +DML = Keyword.DML +DDL = Keyword.DDL +Command = Keyword.Command + +Group = Token.Group +Group.Parenthesis = Token.Group.Parenthesis +Group.Comment = Token.Group.Comment +Group.Where = Token.Group.Where + + +def is_token_subtype(ttype, other): + """ + Return True if ``ttype`` is a subtype of ``other``. + + exists for backwards compatibility. use ``ttype in other`` now. + """ + return ttype in other + + +def string_to_tokentype(s): + """ + Convert a string into a token type:: + + >>> string_to_token('String.Double') + Token.Literal.String.Double + >>> string_to_token('Token.Literal.Number') + Token.Literal.Number + >>> string_to_token('') + Token + + Tokens that are already tokens are returned unchanged: + + >>> string_to_token(String) + Token.Literal.String + """ + if isinstance(s, _TokenType): + return s + if not s: + return Token + node = Token + for item in s.split('.'): + node = getattr(node, item) + return node + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/__init__.py diff --git a/tests/files/begintag.sql b/tests/files/begintag.sql new file mode 100644 index 0000000..699b365 --- /dev/null +++ b/tests/files/begintag.sql @@ -0,0 +1,4 @@ +begin; +update foo + set bar = 1; +commit;
\ No newline at end of file diff --git a/tests/files/dashcomment.sql b/tests/files/dashcomment.sql new file mode 100644 index 0000000..0d5ac62 --- /dev/null +++ b/tests/files/dashcomment.sql @@ -0,0 +1,5 @@ +select * from user; +--select * from host; +select * from user; +select * -- foo; +from foo;
\ No newline at end of file diff --git a/tests/files/function.sql b/tests/files/function.sql new file mode 100644 index 0000000..d19227f --- /dev/null +++ b/tests/files/function.sql @@ -0,0 +1,13 @@ +CREATE OR REPLACE FUNCTION foo( + p_in1 VARCHAR + , p_in2 INTEGER +) RETURNS INTEGER AS + + DECLARE + v_foo INTEGER; + BEGIN + SELECT * + FROM foo + INTO v_foo; + RETURN v_foo.id; + END;
\ No newline at end of file diff --git a/tests/files/function_psql.sql b/tests/files/function_psql.sql new file mode 100644 index 0000000..e485f7a --- /dev/null +++ b/tests/files/function_psql.sql @@ -0,0 +1,72 @@ +CREATE OR REPLACE FUNCTION public.delete_data ( + p_tabelle VARCHAR + , p_key VARCHAR + , p_value INTEGER +) RETURNS INTEGER AS +$$ +DECLARE + p_retval INTEGER; + v_constraint RECORD; + v_count INTEGER; + v_data RECORD; + v_fieldname VARCHAR; + v_sql VARCHAR; + v_key VARCHAR; + v_value INTEGER; +BEGIN + v_sql := 'SELECT COUNT(*) FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + --RAISE NOTICE '%', v_sql; + EXECUTE v_sql INTO v_count; + IF v_count::integer != 0 THEN + SELECT att.attname + INTO v_key + FROM pg_attribute att + LEFT JOIN pg_constraint con ON con.conrelid = att.attrelid + AND con.conkey[1] = att.attnum + AND con.contype = 'p', pg_type typ, pg_class rel, pg_namespace ns + WHERE att.attrelid = rel.oid + AND att.attnum > 0 + AND typ.oid = att.atttypid + AND att.attisdropped = false + AND rel.relname = p_tabelle + AND con.conkey[1] = 1 + AND ns.oid = rel.relnamespace + AND ns.nspname = 'public' + ORDER BY att.attnum; + v_sql := 'SELECT ' || v_key || ' AS id FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + FOR v_data IN EXECUTE v_sql + LOOP + --RAISE NOTICE ' -> % %', p_tabelle, v_data.id; + FOR v_constraint IN SELECT t.constraint_name + , t.constraint_type + , t.table_name + , c.column_name + FROM public.v_table_constraints t + , public.v_constraint_columns c + WHERE t.constraint_name = c.constraint_name + AND t.constraint_type = 'FOREIGN KEY' + AND c.table_name = p_tabelle + AND t.table_schema = 'public' + AND c.table_schema = 'public' + LOOP + v_fieldname := substring(v_constraint.constraint_name from 1 for length(v_constraint.constraint_name) - length(v_constraint.column_name) - 1); + IF (v_constraint.table_name = p_tabelle) AND (p_value = v_data.id) THEN + --RAISE NOTICE 'Skip (Selbstverweis)'; + CONTINUE; + ELSE + PERFORM delete_data(v_constraint.table_name::varchar, v_fieldname::varchar, v_data.id::integer); + END IF; + END LOOP; + END LOOP; + v_sql := 'DELETE FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + --RAISE NOTICE '%', v_sql; + EXECUTE v_sql; + p_retval := 1; + ELSE + --RAISE NOTICE ' -> Keine Sätze gefunden'; + p_retval := 0; + END IF; + RETURN p_retval; +END; +$$ +LANGUAGE plpgsql;
\ No newline at end of file diff --git a/tests/files/function_psql2.sql b/tests/files/function_psql2.sql new file mode 100644 index 0000000..b5d494c --- /dev/null +++ b/tests/files/function_psql2.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION update_something() RETURNS void AS +$body$ +BEGIN + raise notice 'foo'; +END; +$body$ +LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
\ No newline at end of file diff --git a/tests/files/function_psql3.sql b/tests/files/function_psql3.sql new file mode 100644 index 0000000..b25d818 --- /dev/null +++ b/tests/files/function_psql3.sql @@ -0,0 +1,8 @@ +CREATE OR REPLACE FUNCTION foo() RETURNS integer AS +$body$ +DECLARE +BEGIN + select * from foo; +END; +$body$ +LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER;
\ No newline at end of file diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100755 index 0000000..1c7960e --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Test runner for sqlparse.""" + +import os +import sys +import unittest + +sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../')) + + +def main(): + """Create a TestSuite and run it.""" + loader = unittest.TestLoader() + suite = unittest.TestSuite() + fnames = [os.path.split(f)[-1] for f in sys.argv[1:]] + for fname in os.listdir(os.path.dirname(__file__)): + if (not fname.startswith('test_') or not fname.endswith('.py') + or (fnames and fname not in fnames)): + continue + modname = os.path.splitext(fname)[0] + mod = __import__(os.path.splitext(fname)[0]) + suite.addTests(loader.loadTestsFromModule(mod)) + unittest.TextTestRunner(verbosity=2).run(suite) + + + + +if __name__ == '__main__': + main() diff --git a/tests/test_format.py b/tests/test_format.py new file mode 100644 index 0000000..5748704 --- /dev/null +++ b/tests/test_format.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +from tests.utils import TestCaseBase + +import sqlparse + + +class TestFormat(TestCaseBase): + + def test_keywordcase(self): + sql = 'select * from bar; -- select foo\n' + res = sqlparse.format(sql, keyword_case='upper') + self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- select foo\n') + res = sqlparse.format(sql, keyword_case='capitalize') + self.ndiffAssertEqual(res, 'Select * From bar; -- select foo\n') + res = sqlparse.format(sql.upper(), keyword_case='lower') + self.ndiffAssertEqual(res, 'select * from BAR; -- SELECT FOO\n') + self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql, + keyword_case='foo') + + def test_identifiercase(self): + sql = 'select * from bar; -- select foo\n' + res = sqlparse.format(sql, identifier_case='upper') + self.ndiffAssertEqual(res, 'select * from BAR; -- select foo\n') + res = sqlparse.format(sql, identifier_case='capitalize') + self.ndiffAssertEqual(res, 'select * from Bar; -- select foo\n') + res = sqlparse.format(sql.upper(), identifier_case='lower') + self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- SELECT FOO\n') + self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql, + identifier_case='foo') + sql = 'select * from "foo"."bar"' + res = sqlparse.format(sql, identifier_case="upper") + self.ndiffAssertEqual(res, 'select * from "FOO"."BAR"') + + def test_strip_comments_single(self): + sql = 'select *-- statement starts here\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select * from foo') + sql = 'select * -- statement starts here\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select * from foo') + sql = 'select-- foo\nfrom -- bar\nwhere' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select from where') + + def test_strip_comments_multi(self): + sql = '/* sql starts here */\nselect' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = '/* sql starts here */ select' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = '/*\n * sql starts here\n */\nselect' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = 'select (/* sql starts here */ select 2)' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select (select 2)') + + def test_strip_ws(self): + f = lambda sql: sqlparse.format(sql, strip_whitespace=True) + s = 'select\n* from foo\n\twhere ( 1 = 2 )\n' + self.ndiffAssertEqual(f(s), 'select * from foo where (1 = 2)') + s = 'select -- foo\nfrom bar\n' + self.ndiffAssertEqual(f(s), 'select -- foo\nfrom bar') + + +class TestFormatReindent(TestCaseBase): + + def test_stmts(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select foo; select bar' + self.ndiffAssertEqual(f(s), 'select foo;\n\nselect bar') + s = 'select foo' + self.ndiffAssertEqual(f(s), 'select foo') + s = 'select foo; -- test\n select bar' + self.ndiffAssertEqual(f(s), 'select foo; -- test\n\nselect bar') + + def test_keywords(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo union select * from bar;' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'union', + 'select *', + 'from bar;'])) + + def test_parenthesis(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select count(*) from (select * from foo);' + self.ndiffAssertEqual(f(s), + '\n'.join(['select count(*)', + 'from', + ' (select *', + ' from foo);', + ]) + ) + + def test_where(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;' + self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n' + 'where bar = 1\n' + ' and baz = 2\n' + ' or bzz = 3;')) + s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);' + self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n' + 'where bar = 1\n' + ' and (baz = 2\n' + ' or bzz = 3);')) + + def test_join(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'join bar on 1 = 2'])) + s = 'select * from foo inner join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'inner join bar on 1 = 2'])) + s = 'select * from foo left outer join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'left outer join bar on 1 = 2'] + )) + + def test_identifier_list(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select foo, bar, baz from table1, table2 where 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select foo,', + ' bar,', + ' baz', + 'from table1,', + ' table2', + 'where 1 = 2'])) + + def test_case(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end' + self.ndiffAssertEqual(f(s), '\n'.join(['case when foo = 1 then 2', + ' when foo = 3 then 4', + ' else 5', + 'end'])) + + diff --git a/tests/test_grouping.py b/tests/test_grouping.py new file mode 100644 index 0000000..fc3bea5 --- /dev/null +++ b/tests/test_grouping.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +import sqlparse +from sqlparse import tokens as T +from sqlparse.engine.grouping import * + +from tests.utils import TestCaseBase + + +class TestGrouping(TestCaseBase): + + def test_parenthesis(self): + s ='x1 (x2 (x3) x2) foo (y2) bar' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, str(parsed)) + self.assertEqual(len(parsed.tokens), 9) + self.assert_(isinstance(parsed.tokens[2], Parenthesis)) + self.assert_(isinstance(parsed.tokens[-3], Parenthesis)) + self.assertEqual(len(parsed.tokens[2].tokens), 7) + self.assert_(isinstance(parsed.tokens[2].tokens[3], Parenthesis)) + self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3) + + def test_comments(self): + s = '/*\n * foo\n */ \n bar' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, unicode(parsed)) + self.assertEqual(len(parsed.tokens), 2) + + def test_identifiers(self): + s = 'select foo.bar from "myscheme"."table" where fail. order' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[2], Identifier)) + self.assert_(isinstance(parsed.tokens[6], Identifier)) + self.assert_(isinstance(parsed.tokens[8], Where)) + s = 'select * from foo where foo.id = 1' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[-1].tokens[2], Identifier)) + s = 'select * from (select "foo"."id" from foo)' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[-1].tokens[3], Identifier)) + + def test_where(self): + s = 'select * from foo where bar = 1 order by id desc' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertTrue(len(p.tokens), 16) + s = 'select x from (select y from foo where bar = 1) z' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertTrue(isinstance(p.tokens[-3].tokens[-1], Where)) + + def test_typecast(self): + s = 'select foo::integer from bar' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_typecast(), 'integer') + self.assertEqual(p.tokens[2].get_name(), 'foo') + s = 'select (current_database())::information_schema.sql_identifier' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_typecast(), + 'information_schema.sql_identifier') + + def test_alias(self): + s = 'select foo as bar from mytable' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_real_name(), 'foo') + self.assertEqual(p.tokens[2].get_alias(), 'bar') + s = 'select foo from mytable t1' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[6].get_real_name(), 'mytable') + self.assertEqual(p.tokens[6].get_alias(), 't1') + s = 'select foo::integer as bar from mytable' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_alias(), 'bar') + s = ('SELECT DISTINCT ' + '(current_database())::information_schema.sql_identifier AS view') + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[4].get_alias(), 'view') diff --git a/tests/test_parse.py b/tests/test_parse.py new file mode 100644 index 0000000..536b6f6 --- /dev/null +++ b/tests/test_parse.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +"""Tests sqlparse function.""" + +from utils import TestCaseBase + +import sqlparse + + +class SQLParseTest(TestCaseBase): + """Tests sqlparse.parse().""" + + def test_tokenize(self): + sql = 'select * from foo;' + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.assertEqual(str(stmts[0]), sql) + + def test_multistatement(self): + sql1 = 'select * from foo;' + sql2 = 'select * from bar;' + stmts = sqlparse.parse(sql1+sql2) + self.assertEqual(len(stmts), 2) + self.assertEqual(str(stmts[0]), sql1) + self.assertEqual(str(stmts[1]), sql2) + + def test_newlines(self): + sql = u'select\n*from foo;' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r\n*from foo' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r*from foo' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r\n*from foo\n' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) diff --git a/tests/test_split.py b/tests/test_split.py new file mode 100644 index 0000000..782b226 --- /dev/null +++ b/tests/test_split.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +# Tests splitting functions. + +import unittest + +from utils import load_file, TestCaseBase + +import sqlparse + + +class SQLSplitTest(TestCaseBase): + """Tests sqlparse.sqlsplit().""" + + _sql1 = 'select * from foo;' + _sql2 = 'select * from bar;' + + def test_split_semicolon(self): + sql2 = 'select * from foo where bar = \'foo;bar\';' + stmts = sqlparse.parse(''.join([self._sql1, sql2])) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(unicode(stmts[0]), self._sql1) + self.ndiffAssertEqual(unicode(stmts[1]), sql2) + + def test_create_function(self): + sql = load_file('function.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql(self): + sql = load_file('function_psql.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql3(self): + sql = load_file('function_psql3.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql2(self): + sql = load_file('function_psql2.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_dashcomments(self): + sql = load_file('dashcomment.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 3) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_begintag(self): + sql = load_file('begintag.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 3) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_dropif(self): + sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;' + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_comment_with_umlaut(self): + sql = (u'select * from foo;\n' + u'-- Testing an umlaut: ä\n' + u'select * from bar;') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_comment_end_of_line(self): + sql = ('select * from foo; -- foo\n' + 'select * from bar;') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + # make sure the comment belongs to first query + self.ndiffAssertEqual(unicode(stmts[0]), 'select * from foo; -- foo\n') + + def test_casewhen(self): + sql = ('SELECT case when val = 1 then 2 else null end as foo;\n' + 'comment on table actor is \'The actor table.\';') + stmts = sqlparse.split(sql) + self.assertEqual(len(stmts), 2) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py new file mode 100644 index 0000000..7106b3c --- /dev/null +++ b/tests/test_tokenize.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +import unittest +import types + +from sqlparse import lexer +from sqlparse.tokens import * + + +class TestTokenize(unittest.TestCase): + + def test_simple(self): + sql = 'select * from foo;' + stream = lexer.tokenize(sql) + self.assert_(type(stream) is types.GeneratorType) + tokens = list(stream) + self.assertEqual(len(tokens), 8) + self.assertEqual(len(tokens[0]), 2) + self.assertEqual(tokens[0], (Keyword.DML, u'select')) + self.assertEqual(tokens[-1], (Punctuation, u';')) + diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..a78b460 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +"""Helpers for testing.""" + +import codecs +import difflib +import os +import unittest +from StringIO import StringIO + +NL = '\n' +DIR_PATH = os.path.abspath(os.path.dirname(__file__)) +PARENT_DIR = os.path.dirname(DIR_PATH) +FILES_DIR = os.path.join(DIR_PATH, 'files') + + +def load_file(filename, encoding='utf-8'): + """Opens filename with encoding and return it's contents.""" + f = codecs.open(os.path.join(FILES_DIR, filename), 'r', encoding) + data = f.read() + f.close() + return data + + +class TestCaseBase(unittest.TestCase): + """Base class for test cases with some additional checks.""" + + # Adopted from Python's tests. + def ndiffAssertEqual(self, first, second): + """Like failUnlessEqual except use ndiff for readable output.""" + if first <> second: + sfirst = unicode(first) + ssecond = unicode(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + print fp.getvalue() + raise self.failureException, fp.getvalue() |