From 8ddc98b8000cd88aa3fd53881cd3d3df8ee1a9b3 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Fri, 6 May 2016 20:48:42 -0700
Subject: fix test to run locally

---
 tests/test_functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_functions.py b/tests/test_functions.py
index 425ab7f..9207815 100644
--- a/tests/test_functions.py
+++ b/tests/test_functions.py
@@ -13,6 +13,7 @@ sys.path.insert(0, '..')
 
 from sqlparse.filters import compact
 from sqlparse.functions import getcolumns, getlimit, IsType
+from tests.utils import FILES_DIR
 
 
 class Test_IncludeStatement(TestCase):
@@ -27,7 +28,7 @@ class Test_IncludeStatement(TestCase):
 
     def test_includeStatement(self):
         stream = tokenize(self.sql)
-        includeStatement = IncludeStatement('tests/files',
+        includeStatement = IncludeStatement(FILES_DIR,
                                             raiseexceptions=True)
         stream = includeStatement.process(None, stream)
         stream = compact(stream)
-- 
cgit v1.2.1


From 43c14e081cadb8ac386b0895266a5b57a8329587 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Sat, 23 Apr 2016 18:00:47 -0700
Subject: Add editorconfig, update gitignore, filename, authors

---
 .editorconfig |  23 +++++
 .gitignore    |   3 +
 AUTHORS       |   1 +
 CHANGELOG     | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 CHANGES       | 324 ----------------------------------------------------------
 COPYING       |  25 -----
 LICENSE       |  25 +++++
 7 files changed, 376 insertions(+), 349 deletions(-)
 create mode 100644 .editorconfig
 create mode 100644 CHANGELOG
 delete mode 100644 CHANGES
 delete mode 100644 COPYING
 create mode 100644 LICENSE

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..6fa8b7b
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,23 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+end_of_line = crlf
+charset = utf-8
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{py,ini,yaml,yml,rst}]
+indent_style = space
+indent_size = 4
+continuation_indent_size = 4
+trim_trailing_whitespace = true
+
+[{Makefile,*.bat}]
+indent_style = tab
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.gitignore b/.gitignore
index 6dde1c3..438de5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# PyCharm
+.idea/
+
 *.pyc
 docs/build
 dist
diff --git a/AUTHORS b/AUTHORS
index 0f34f06..9831fa1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,7 @@ Alphabetical list of contributors:
 * spigwitmer <itgpmc@gmail.com>
 * Tim Graham <timograham@gmail.com>
 * Victor Hahn <info@victor-hahn.de>
+* Victor Uriarte <vmuriart@gmail.com>
 * vthriller <farreva232@yandex.ru>
 * wayne.wuw <wayne.wuw@alibaba-inc.com>
 * Yago Riveiro <yago.riveiro@gmail.com>
diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 0000000..374c060
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,324 @@
+Development Version
+-------------------
+
+IMPORTANT: The supported Python versions have changed with this release.
+sqlparse 0.2.x supports Python 2.7 and Python >= 3.3.
+
+Internal Changes
+* sqlparse.SQLParseError was removed from top-level module and moved to
+  sqlparse.exceptions.
+* sqlparse.sql.Token.to_unicode was removed.
+
+Enhancements
+* Support WHILE loops (issue215, by shenlongxing).
+* Better support for CTEs (issue217, by Andrew Tipton).
+
+Bug Fixes
+* Leading whitespaces are now removed when format() is called with
+  strip_whitespace=True (issue213, by shenlongxing).
+* Fix typo in keywords list (issue229, by cbeloni).
+* Fix parsing of functions in comparisons (issue230, by saaj).
+* Minor bug fixes (issue101).
+
+
+Release 0.1.19 (Mar 07, 2015)
+-----------------------------
+Bug Fixes
+* Fix IndexError when statement contains WITH clauses (issue205).
+
+
+Release 0.1.18 (Oct 25, 2015)
+-----------------------------
+
+Bug Fixes
+* Remove universal wheel support, added in 0.1.17 by mistake.
+
+
+Release 0.1.17 (Oct 24, 2015)
+-----------------------------
+
+Enhancements
+* Speed up parsing of large SQL statements (pull request: issue201, fixes the
+  following issues: issue199, issue135, issue62, issue41, by Ryan Wooden).
+
+Bug Fixes
+* Fix another splitter bug regarding DECLARE (issue194).
+
+Misc
+* Packages on PyPI are signed from now on.
+
+
+Release 0.1.16 (Jul 26, 2015)
+-----------------------------
+
+Bug Fixes
+* Fix a regression in get_alias() introduced in 0.1.15 (issue185).
+* Fix a bug in the splitter regarding DECLARE (issue193).
+* sqlformat command line tool doesn't duplicat newlines anymore (issue191).
+* Don't mix up MySQL comments starting with hash and MSSQL
+  temp tables (issue192).
+* Statement.get_type() now ignores comments at the beginning of
+  a statement (issue186).
+
+
+Release 0.1.15 (Apr 15, 2015)
+-----------------------------
+
+Bug Fixes
+* Fix a regression for identifiers with square bracktes
+  notation (issue153, by darikg).
+* Add missing SQL types (issue154, issue155, issue156, by jukebox).
+* Fix parsing of multi-line comments (issue172, by JacekPliszka).
+* Fix parsing of escaped backslashes (issue174, by caseyching).
+* Fix parsing of identifiers starting with underscore (issue175).
+* Fix misinterpretation of IN keyword (issue183).
+
+Enhancements
+* Improve formatting of HAVING statements.
+* Improve parsing of inline comments (issue163).
+* Group comments to parent object (issue128, issue160).
+* Add double precision builtin (issue169, by darikg).
+* Add support for square bracket array indexing (issue170, issue176,
+  issue177 by darikg).
+* Improve grouping of aliased elements (issue167, by darikg).
+* Support comments starting with '#' character (issue178).
+
+
+Release 0.1.14 (Nov 30, 2014)
+-----------------------------
+
+Bug Fixes
+* Floats in UPDATE statements are now handled correctly (issue145).
+* Properly handle string literals in comparisons (issue148, change proposed
+  by aadis).
+* Fix indentation when using tabs (issue146).
+
+Enhancements
+* Improved formatting in list when newlines precede commas (issue140).
+
+
+Release 0.1.13 (Oct 09, 2014)
+-----------------------------
+
+Bug Fixes
+* Fix a regression in handling of NULL keywords introduced in 0.1.12.
+
+
+Release 0.1.12 (Sep 20, 2014)
+-----------------------------
+
+Bug Fixes
+* Fix handling of NULL keywords in aliased identifiers.
+* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller).
+* Fix handling of modulo operators without spaces (by gavinwahl).
+
+Enhancements
+* Improve parsing of identifier lists containing placeholders.
+* Speed up query parsing of unquoted lines (by Michael Schuller).
+
+
+Release 0.1.11 (Feb 07, 2014)
+-----------------------------
+
+Bug Fixes
+* Fix incorrect parsing of string literals containing line breaks (issue118).
+* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124,
+  by Cristian Orellana).
+* Improve parsing of string literals in columns.
+* Fix parsing and formatting of statements containing EXCEPT keyword.
+* Fix Function.get_parameters() (issue126/127, by spigwitmer).
+
+Enhancements
+* Classify DML keywords (issue116, by Victor Hahn).
+* Add missing FOREACH keyword.
+* Grouping of BEGIN/END blocks.
+
+Other
+* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox
+  still support it out of the box.
+
+
+Release 0.1.10 (Nov 02, 2013)
+-----------------------------
+
+Bug Fixes
+* Removed buffered reading again, it obviously causes wrong parsing in some rare
+  cases (issue114).
+* Fix regression in setup.py introduced 10 months ago (issue115).
+
+Enhancements
+* Improved support for JOINs, by Alexander Beedie.
+
+
+Release 0.1.9 (Sep 28, 2013)
+----------------------------
+
+Bug Fixes
+* Fix an regression introduced in 0.1.5 where sqlparse didn't properly
+  distinguished between single and double quoted strings when tagging
+  identifier (issue111).
+
+Enhancements
+* New option to truncate long string literals when formatting.
+* Scientific numbers are pares correctly (issue107).
+* Support for arithmetic expressions (issue109, issue106; by prudhvi).
+
+
+Release 0.1.8 (Jun 29, 2013)
+----------------------------
+
+Bug Fixes
+* Whitespaces within certain keywords are now allowed (issue97, patch proposed
+  by xcombelle).
+
+Enhancements
+* Improve parsing of assignments in UPDATE statements (issue90).
+* Add STRAIGHT_JOIN statement (by Yago Riveiro).
+* Function.get_parameters() now returns the parameter if only one parameter is
+  given (issue94, by wayne.wuw).
+* sqlparse.split() now removes leading and trailing whitespaces from splitted
+  statements.
+* Add USE as keyword token (by mulos).
+* Improve parsing of PEP249-style placeholders (issue103).
+
+
+Release 0.1.7 (Apr 06, 2013)
+----------------------------
+
+Bug Fixes
+ * Fix Python 3 compatibility of sqlformat script (by Piet Delport).
+ * Fix parsing of SQL statements that contain binary data (by Alexey
+   Malyshev).
+ * Fix a bug where keywords were identified as aliased identifiers in
+   invalid SQL statements.
+ * Fix parsing of identifier lists where identifiers are keywords too
+   (issue10).
+
+Enhancements
+ * Top-level API functions now accept encoding keyword to parse
+   statements in certain encodings more reliable (issue20).
+ * Improve parsing speed when SQL contains CLOBs or BLOBs (issue86).
+ * Improve formatting of ORDER BY clauses (issue89).
+ * Formatter now tries to detect runaway indentations caused by
+   parsing errors or invalid SQL statements. When re-indenting such
+   statements the formatter flips back to column 0 before going crazy.
+
+Other
+ * Documentation updates.
+
+
+Release 0.1.6 (Jan 01, 2013)
+----------------------------
+
+sqlparse is now compatible with Python 3 without any patches. The
+Python 3 version is generated during install by 2to3. You'll need
+distribute to install sqlparse for Python 3.
+
+Bug Fixes
+ * Fix parsing error with dollar-quoted procedure bodies (issue83).
+
+Other
+ * Documentation updates.
+ * Test suite now uses tox and py.test.
+ * py3k fixes (by vthriller).
+ * py3k fixes in setup.py (by Florian Bauer).
+ * setup.py now requires distribute (by Florian Bauer).
+
+
+Release 0.1.5 (Nov 13, 2012)
+----------------------------
+
+Bug Fixes
+ * Improve handling of quoted identifiers (issue78).
+ * Improve grouping and formatting of identifiers with operators (issue53).
+ * Improve grouping and formatting of concatenated strings (issue53).
+ * Improve handling of varchar() (by Mike Amy).
+ * Clean up handling of various SQL elements.
+ * Switch to py.test and clean up tests.
+ * Several minor fixes.
+
+Other
+ * Deprecate sqlparse.SQLParseError. Please use
+   sqlparse.exceptions.SQLParseError instead.
+ * Add caching to speed up processing.
+ * Add experimental filters for token processing.
+ * Add sqlformat.parsestream (by quest).
+
+
+Release 0.1.4 (Apr 20, 2012)
+----------------------------
+
+Bug Fixes
+ * Avoid "stair case" effects when identifiers, functions,
+   placeholders or keywords are mixed in identifier lists (issue45,
+   issue49, issue52) and when asterisks are used as operators
+   (issue58).
+ * Make keyword detection more restrict (issue47).
+ * Improve handling of CASE statements (issue46).
+ * Fix statement splitting when parsing recursive statements (issue57,
+   thanks to piranna).
+ * Fix for negative numbers (issue56, thanks to kevinjqiu).
+ * Pretty format comments in identifier lists (issue59).
+ * Several minor bug fixes and improvements.
+
+
+Release 0.1.3 (Jul 29, 2011)
+----------------------------
+
+Bug Fixes
+ * Improve parsing of floats (thanks to Kris).
+ * When formatting a statement a space before LIMIT was removed (issue35).
+ * Fix strip_comments flag (issue38, reported by ooberm...@gmail.com).
+ * Avoid parsing names as keywords (issue39, reported by djo...@taket.org).
+ * Make sure identifier lists in subselects are grouped (issue40,
+   reported by djo...@taket.org).
+ * Split statements with IF as functions correctly (issue33 and
+   issue29, reported by charles....@unige.ch).
+ * Relax detection of keywords, esp. when used as function names
+   (issue36, nyuhu...@gmail.com).
+ * Don't treat single characters as keywords (issue32).
+ * Improve parsing of stand-alone comments (issue26).
+ * Detection of placeholders in paramterized queries (issue22,
+   reported by Glyph Lefkowitz).
+ * Add parsing of MS Access column names with braces (issue27,
+   reported by frankz...@gmail.com).
+
+Other
+ * Replace Django by Flask in App Engine frontend (issue11).
+
+
+Release 0.1.2 (Nov 23, 2010)
+----------------------------
+
+Bug Fixes
+ * Fixed incorrect detection of keyword fragments embed in names (issue7,
+   reported and initial patch by andyboyko).
+ * Stricter detection of identfier aliases (issue8, reported by estama).
+ * WHERE grouping consumed closing parenthesis (issue9, reported by estama).
+ * Fixed an issue with trailing whitespaces (reported by Kris).
+ * Better detection of escaped single quotes (issue13, reported by
+   Martin Brochhaus, patch by bluemaro with test case by Dan Carley).
+ * Ignore identifier in double-quotes when changing cases (issue 21).
+ * Lots of minor fixes targeting encoding, indentation, statement
+   parsing and more (issues 12, 14, 15, 16, 18, 19).
+ * Code cleanup with a pinch of refactoring.
+
+
+Release 0.1.1 (May 6, 2009)
+---------------------------
+
+Bug Fixes
+ * Lexers preserves original line breaks (issue1).
+ * Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
+   prefixed with @.
+ * Improved parsing of identifier lists (issue2).
+ * Recursive recognition of AS (issue4) and CASE.
+ * Improved support for UPDATE statements.
+
+Other
+ * Code cleanup and better test coverage.
+
+
+Release 0.1.0 (Apr 8, 2009)
+---------------------------
+ * Initial release.
diff --git a/CHANGES b/CHANGES
deleted file mode 100644
index 374c060..0000000
--- a/CHANGES
+++ /dev/null
@@ -1,324 +0,0 @@
-Development Version
--------------------
-
-IMPORTANT: The supported Python versions have changed with this release.
-sqlparse 0.2.x supports Python 2.7 and Python >= 3.3.
-
-Internal Changes
-* sqlparse.SQLParseError was removed from top-level module and moved to
-  sqlparse.exceptions.
-* sqlparse.sql.Token.to_unicode was removed.
-
-Enhancements
-* Support WHILE loops (issue215, by shenlongxing).
-* Better support for CTEs (issue217, by Andrew Tipton).
-
-Bug Fixes
-* Leading whitespaces are now removed when format() is called with
-  strip_whitespace=True (issue213, by shenlongxing).
-* Fix typo in keywords list (issue229, by cbeloni).
-* Fix parsing of functions in comparisons (issue230, by saaj).
-* Minor bug fixes (issue101).
-
-
-Release 0.1.19 (Mar 07, 2015)
------------------------------
-Bug Fixes
-* Fix IndexError when statement contains WITH clauses (issue205).
-
-
-Release 0.1.18 (Oct 25, 2015)
------------------------------
-
-Bug Fixes
-* Remove universal wheel support, added in 0.1.17 by mistake.
-
-
-Release 0.1.17 (Oct 24, 2015)
------------------------------
-
-Enhancements
-* Speed up parsing of large SQL statements (pull request: issue201, fixes the
-  following issues: issue199, issue135, issue62, issue41, by Ryan Wooden).
-
-Bug Fixes
-* Fix another splitter bug regarding DECLARE (issue194).
-
-Misc
-* Packages on PyPI are signed from now on.
-
-
-Release 0.1.16 (Jul 26, 2015)
------------------------------
-
-Bug Fixes
-* Fix a regression in get_alias() introduced in 0.1.15 (issue185).
-* Fix a bug in the splitter regarding DECLARE (issue193).
-* sqlformat command line tool doesn't duplicat newlines anymore (issue191).
-* Don't mix up MySQL comments starting with hash and MSSQL
-  temp tables (issue192).
-* Statement.get_type() now ignores comments at the beginning of
-  a statement (issue186).
-
-
-Release 0.1.15 (Apr 15, 2015)
------------------------------
-
-Bug Fixes
-* Fix a regression for identifiers with square bracktes
-  notation (issue153, by darikg).
-* Add missing SQL types (issue154, issue155, issue156, by jukebox).
-* Fix parsing of multi-line comments (issue172, by JacekPliszka).
-* Fix parsing of escaped backslashes (issue174, by caseyching).
-* Fix parsing of identifiers starting with underscore (issue175).
-* Fix misinterpretation of IN keyword (issue183).
-
-Enhancements
-* Improve formatting of HAVING statements.
-* Improve parsing of inline comments (issue163).
-* Group comments to parent object (issue128, issue160).
-* Add double precision builtin (issue169, by darikg).
-* Add support for square bracket array indexing (issue170, issue176,
-  issue177 by darikg).
-* Improve grouping of aliased elements (issue167, by darikg).
-* Support comments starting with '#' character (issue178).
-
-
-Release 0.1.14 (Nov 30, 2014)
------------------------------
-
-Bug Fixes
-* Floats in UPDATE statements are now handled correctly (issue145).
-* Properly handle string literals in comparisons (issue148, change proposed
-  by aadis).
-* Fix indentation when using tabs (issue146).
-
-Enhancements
-* Improved formatting in list when newlines precede commas (issue140).
-
-
-Release 0.1.13 (Oct 09, 2014)
------------------------------
-
-Bug Fixes
-* Fix a regression in handling of NULL keywords introduced in 0.1.12.
-
-
-Release 0.1.12 (Sep 20, 2014)
------------------------------
-
-Bug Fixes
-* Fix handling of NULL keywords in aliased identifiers.
-* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller).
-* Fix handling of modulo operators without spaces (by gavinwahl).
-
-Enhancements
-* Improve parsing of identifier lists containing placeholders.
-* Speed up query parsing of unquoted lines (by Michael Schuller).
-
-
-Release 0.1.11 (Feb 07, 2014)
------------------------------
-
-Bug Fixes
-* Fix incorrect parsing of string literals containing line breaks (issue118).
-* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124,
-  by Cristian Orellana).
-* Improve parsing of string literals in columns.
-* Fix parsing and formatting of statements containing EXCEPT keyword.
-* Fix Function.get_parameters() (issue126/127, by spigwitmer).
-
-Enhancements
-* Classify DML keywords (issue116, by Victor Hahn).
-* Add missing FOREACH keyword.
-* Grouping of BEGIN/END blocks.
-
-Other
-* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox
-  still support it out of the box.
-
-
-Release 0.1.10 (Nov 02, 2013)
------------------------------
-
-Bug Fixes
-* Removed buffered reading again, it obviously causes wrong parsing in some rare
-  cases (issue114).
-* Fix regression in setup.py introduced 10 months ago (issue115).
-
-Enhancements
-* Improved support for JOINs, by Alexander Beedie.
-
-
-Release 0.1.9 (Sep 28, 2013)
-----------------------------
-
-Bug Fixes
-* Fix an regression introduced in 0.1.5 where sqlparse didn't properly
-  distinguished between single and double quoted strings when tagging
-  identifier (issue111).
-
-Enhancements
-* New option to truncate long string literals when formatting.
-* Scientific numbers are pares correctly (issue107).
-* Support for arithmetic expressions (issue109, issue106; by prudhvi).
-
-
-Release 0.1.8 (Jun 29, 2013)
-----------------------------
-
-Bug Fixes
-* Whitespaces within certain keywords are now allowed (issue97, patch proposed
-  by xcombelle).
-
-Enhancements
-* Improve parsing of assignments in UPDATE statements (issue90).
-* Add STRAIGHT_JOIN statement (by Yago Riveiro).
-* Function.get_parameters() now returns the parameter if only one parameter is
-  given (issue94, by wayne.wuw).
-* sqlparse.split() now removes leading and trailing whitespaces from splitted
-  statements.
-* Add USE as keyword token (by mulos).
-* Improve parsing of PEP249-style placeholders (issue103).
-
-
-Release 0.1.7 (Apr 06, 2013)
-----------------------------
-
-Bug Fixes
- * Fix Python 3 compatibility of sqlformat script (by Piet Delport).
- * Fix parsing of SQL statements that contain binary data (by Alexey
-   Malyshev).
- * Fix a bug where keywords were identified as aliased identifiers in
-   invalid SQL statements.
- * Fix parsing of identifier lists where identifiers are keywords too
-   (issue10).
-
-Enhancements
- * Top-level API functions now accept encoding keyword to parse
-   statements in certain encodings more reliable (issue20).
- * Improve parsing speed when SQL contains CLOBs or BLOBs (issue86).
- * Improve formatting of ORDER BY clauses (issue89).
- * Formatter now tries to detect runaway indentations caused by
-   parsing errors or invalid SQL statements. When re-indenting such
-   statements the formatter flips back to column 0 before going crazy.
-
-Other
- * Documentation updates.
-
-
-Release 0.1.6 (Jan 01, 2013)
-----------------------------
-
-sqlparse is now compatible with Python 3 without any patches. The
-Python 3 version is generated during install by 2to3. You'll need
-distribute to install sqlparse for Python 3.
-
-Bug Fixes
- * Fix parsing error with dollar-quoted procedure bodies (issue83).
-
-Other
- * Documentation updates.
- * Test suite now uses tox and py.test.
- * py3k fixes (by vthriller).
- * py3k fixes in setup.py (by Florian Bauer).
- * setup.py now requires distribute (by Florian Bauer).
-
-
-Release 0.1.5 (Nov 13, 2012)
-----------------------------
-
-Bug Fixes
- * Improve handling of quoted identifiers (issue78).
- * Improve grouping and formatting of identifiers with operators (issue53).
- * Improve grouping and formatting of concatenated strings (issue53).
- * Improve handling of varchar() (by Mike Amy).
- * Clean up handling of various SQL elements.
- * Switch to py.test and clean up tests.
- * Several minor fixes.
-
-Other
- * Deprecate sqlparse.SQLParseError. Please use
-   sqlparse.exceptions.SQLParseError instead.
- * Add caching to speed up processing.
- * Add experimental filters for token processing.
- * Add sqlformat.parsestream (by quest).
-
-
-Release 0.1.4 (Apr 20, 2012)
-----------------------------
-
-Bug Fixes
- * Avoid "stair case" effects when identifiers, functions,
-   placeholders or keywords are mixed in identifier lists (issue45,
-   issue49, issue52) and when asterisks are used as operators
-   (issue58).
- * Make keyword detection more restrict (issue47).
- * Improve handling of CASE statements (issue46).
- * Fix statement splitting when parsing recursive statements (issue57,
-   thanks to piranna).
- * Fix for negative numbers (issue56, thanks to kevinjqiu).
- * Pretty format comments in identifier lists (issue59).
- * Several minor bug fixes and improvements.
-
-
-Release 0.1.3 (Jul 29, 2011)
-----------------------------
-
-Bug Fixes
- * Improve parsing of floats (thanks to Kris).
- * When formatting a statement a space before LIMIT was removed (issue35).
- * Fix strip_comments flag (issue38, reported by ooberm...@gmail.com).
- * Avoid parsing names as keywords (issue39, reported by djo...@taket.org).
- * Make sure identifier lists in subselects are grouped (issue40,
-   reported by djo...@taket.org).
- * Split statements with IF as functions correctly (issue33 and
-   issue29, reported by charles....@unige.ch).
- * Relax detection of keywords, esp. when used as function names
-   (issue36, nyuhu...@gmail.com).
- * Don't treat single characters as keywords (issue32).
- * Improve parsing of stand-alone comments (issue26).
- * Detection of placeholders in paramterized queries (issue22,
-   reported by Glyph Lefkowitz).
- * Add parsing of MS Access column names with braces (issue27,
-   reported by frankz...@gmail.com).
-
-Other
- * Replace Django by Flask in App Engine frontend (issue11).
-
-
-Release 0.1.2 (Nov 23, 2010)
-----------------------------
-
-Bug Fixes
- * Fixed incorrect detection of keyword fragments embed in names (issue7,
-   reported and initial patch by andyboyko).
- * Stricter detection of identfier aliases (issue8, reported by estama).
- * WHERE grouping consumed closing parenthesis (issue9, reported by estama).
- * Fixed an issue with trailing whitespaces (reported by Kris).
- * Better detection of escaped single quotes (issue13, reported by
-   Martin Brochhaus, patch by bluemaro with test case by Dan Carley).
- * Ignore identifier in double-quotes when changing cases (issue 21).
- * Lots of minor fixes targeting encoding, indentation, statement
-   parsing and more (issues 12, 14, 15, 16, 18, 19).
- * Code cleanup with a pinch of refactoring.
-
-
-Release 0.1.1 (May 6, 2009)
----------------------------
-
-Bug Fixes
- * Lexers preserves original line breaks (issue1).
- * Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
-   prefixed with @.
- * Improved parsing of identifier lists (issue2).
- * Recursive recognition of AS (issue4) and CASE.
- * Improved support for UPDATE statements.
-
-Other
- * Code cleanup and better test coverage.
-
-
-Release 0.1.0 (Apr 8, 2009)
----------------------------
- * Initial release.
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 7b158da..0000000
--- a/COPYING
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2009, Andi Albrecht <albrecht.andi@gmail.com>
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of the authors nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..7b158da
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,25 @@
+Copyright (c) 2009, Andi Albrecht <albrecht.andi@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the authors nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
-- 
cgit v1.2.1


From e006e1689d5bb9ad614b5155e2ff17f7e51b83d8 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 18:45:41 -0700
Subject: Update compat and utils

---
 sqlparse/compat.py | 23 ++++++++++----
 sqlparse/utils.py  | 88 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 13 deletions(-)

diff --git a/sqlparse/compat.py b/sqlparse/compat.py
index 6b26384..334883b 100644
--- a/sqlparse/compat.py
+++ b/sqlparse/compat.py
@@ -14,29 +14,40 @@ PY2 = sys.version_info[0] == 2
 PY3 = sys.version_info[0] == 3
 
 if PY3:
+    def u(s):
+        return str(s)
+
+
+    range = range
     text_type = str
     string_types = (str,)
     from io import StringIO
 
-    def u(s):
-        return str(s)
 
 elif PY2:
+    def u(s, encoding=None):
+        encoding = encoding or 'unicode-escape'
+        try:
+            return unicode(s)
+        except UnicodeDecodeError:
+            return unicode(s, encoding)
+
+
+    range = xrange
     text_type = unicode
     string_types = (basestring,)
-    from StringIO import StringIO  # flake8: noqa
-
-    def u(s):
-        return unicode(s)
+    from StringIO import StringIO
 
 
 # Directly copied from six:
 def with_metaclass(meta, *bases):
     """Create a base class with a metaclass."""
+
     # This requires a bit of explanation: the basic idea is to make a dummy
     # metaclass for one level of class instantiation that replaces itself with
     # the actual metaclass.
     class metaclass(meta):
         def __new__(cls, name, this_bases, d):
             return meta(name, bases, d)
+
     return type.__new__(metaclass, 'temporary_class', (), {})
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 7db9a96..f2372c2 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -1,16 +1,13 @@
-'''
-Created on 17/05/2012
-
-@author: piranna
-'''
-
+import itertools
 import re
-from collections import OrderedDict
+from collections import OrderedDict, deque
+from contextlib import contextmanager
 
 
 class Cache(OrderedDict):
     """Cache with LRU algorithm using an OrderedDict as basis
     """
+
     def __init__(self, maxsize=100):
         OrderedDict.__init__(self)
 
@@ -113,3 +110,80 @@ def split_unquoted_newlines(text):
         else:
             outputlines[-1] += line
     return outputlines
+
+
+def remove_quotes(val):
+    """Helper that removes surrounding quotes from strings."""
+    if val is None:
+        return
+    if val[0] in ('"', "'") and val[0] == val[-1]:
+        val = val[1:-1]
+    return val
+
+
+def recurse(*cls):
+    def wrap(f):
+        def wrapped_f(tlist):
+            for sgroup in tlist.get_sublists():
+                if not isinstance(sgroup, cls):
+                    wrapped_f(sgroup)
+            f(tlist)
+
+        return wrapped_f
+
+    return wrap
+
+
+def imt(token, i=None, m=None, t=None):
+    """Aid function to refactor comparisons for Instance, Match and TokenType
+    Aid fun
+    :param token:
+    :param i: Class or Tuple/List of Classes
+    :param m: Tuple of TokenType & Value. Can be list of Tuple for multiple
+    :param t: TokenType or Tuple/List of TokenTypes
+    :return:  bool
+    """
+    t = (t,) if t and not isinstance(t, (list, tuple)) else t
+    m = (m,) if m and not isinstance(m, (list,)) else m
+
+    if token is None:
+        return False
+    elif i is not None and isinstance(token, i):
+        return True
+    elif m is not None and any((token.match(*x) for x in m)):
+        return True
+    elif t is not None and token.ttype in t:
+        return True
+    else:
+        return False
+
+
+def find_matching(tlist, token, M1, M2):
+    idx = tlist.token_index(token)
+    depth = 0
+    for token in tlist[idx:]:
+        if token.match(*M1):
+            depth += 1
+        elif token.match(*M2):
+            depth -= 1
+            if depth == 0:
+                return token
+
+
+def consume(iterator, n):
+    """Advance the iterator n-steps ahead. If n is none, consume entirely."""
+    deque(itertools.islice(iterator, n), maxlen=0)
+
+
+@contextmanager
+def offset(filter_, n=0):
+    filter_.offset += n
+    yield
+    filter_.offset -= n
+
+
+@contextmanager
+def indent(filter_, n=1):
+    filter_.indent += n
+    yield
+    filter_.indent -= n
-- 
cgit v1.2.1


From d725e0c81afc6907ad5fec71a53f724fa0e3f5c3 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 19:04:21 -0700
Subject: update sql

---
 sqlparse/sql.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index f357572..8a485f3 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -77,7 +77,7 @@ class Token(object):
 
         if regex:
             if isinstance(values, string_types):
-                values = set([values])
+                values = {values}
 
             if self.ttype is T.Keyword:
                 values = set(re.compile(v, re.IGNORECASE) for v in values)
@@ -150,7 +150,7 @@ class TokenList(Token):
         if tokens is None:
             tokens = []
         self.tokens = tokens
-        Token.__init__(self, None, self._to_string())
+        super(TokenList, self).__init__(None, self.__str__())
 
     def __unicode__(self):
         return self._to_string()
@@ -213,12 +213,12 @@ class TokenList(Token):
             else:
                 yield token
 
-#    def __iter__(self):
-#        return self
-#
-#    def next(self):
-#        for token in self.tokens:
-#            yield token
+    # def __iter__(self):
+    #     return self
+    #
+    # def next(self):
+    #     for token in self.tokens:
+    #         yield token
 
     def is_group(self):
         return True
-- 
cgit v1.2.1


From dbf8a624e091e1da24a7a90c4ff59d88ce816b8f Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 19:17:47 -0700
Subject: adding powerful _token_matching and imt helper

---
 sqlparse/sql.py | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 8a485f3..ccb6924 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -7,6 +7,7 @@ import sys
 
 from sqlparse import tokens as T
 from sqlparse.compat import string_types, u
+from sqlparse.utils import imt
 
 
 class Token(object):
@@ -232,6 +233,27 @@ class TokenList(Token):
     def _groupable_tokens(self):
         return self.tokens
 
+    def _token_matching(self, funcs, start=0, end=None, reverse=False):
+        """next token that match functions"""
+        if start is None:
+            return None
+
+        if not isinstance(start, int):
+            start = self.token_index(start) + 1
+
+        if not isinstance(funcs, (list, tuple)):
+            funcs = (funcs,)
+
+        if reverse:
+            iterable = iter(reversed(self.tokens[end:start - 1]))
+        else:
+            iterable = self.tokens[start:end]
+
+        for token in iterable:
+            for func in funcs:
+                if func(token):
+                    return token
+
     def token_first(self, ignore_whitespace=True, ignore_comments=False):
         """Returns the first child token.
 
@@ -244,10 +266,14 @@ class TokenList(Token):
         for token in self.tokens:
             if ignore_whitespace and token.is_whitespace():
                 continue
-            if ignore_comments and isinstance(token, Comment):
+            if ignore_comments and imt(token, i=Comment):
                 continue
             return token
 
+    def token_next_by(self, i=None, m=None, t=None, idx=0, end=None):
+        funcs = lambda tk: imt(tk, i, m, t)
+        return self._token_matching(funcs, idx, end)
+
     def token_next_by_instance(self, idx, clss, end=None):
         """Returns the next token matching a class.
 
@@ -403,7 +429,7 @@ class TokenList(Token):
 
         # "name alias" or "complicated column expression alias"
         if len(self.tokens) > 2 \
-           and self.token_next_by_type(0, T.Whitespace) is not None:
+            and self.token_next_by_type(0, T.Whitespace) is not None:
             return self._get_first_name(reverse=True)
 
         return None
@@ -677,11 +703,9 @@ class Function(TokenList):
         """Return a list of parameters."""
         parenthesis = self.tokens[-1]
         for t in parenthesis.tokens:
-            if isinstance(t, IdentifierList):
+            if imt(t, i=IdentifierList):
                 return t.get_identifiers()
-            elif (isinstance(t, Identifier) or
-                  isinstance(t, Function) or
-                  t.ttype in T.Literal):
+            elif imt(t, i=(Function, Identifier), t=T.Literal):
                 return [t, ]
         return []
 
-- 
cgit v1.2.1


From 5a1830554f9c1d6b626f57fd88c19c6f7063b434 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 19:42:08 -0700
Subject: generalize group_tokens for more use cases

---
 sqlparse/engine/grouping.py | 14 ++++----------
 sqlparse/sql.py             | 34 +++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 982488b..ab519f0 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -422,19 +422,13 @@ def group_order(tlist):
 
 def align_comments(tlist):
     [align_comments(sgroup) for sgroup in tlist.get_sublists()]
-    idx = 0
-    token = tlist.token_next_by_instance(idx, sql.Comment)
+    token = tlist.token_next_by(i=sql.Comment)
     while token:
         before = tlist.token_prev(tlist.token_index(token))
         if isinstance(before, sql.TokenList):
-            grp = tlist.tokens_between(before, token)[1:]
-            before.tokens.extend(grp)
-            for t in grp:
-                tlist.tokens.remove(t)
-            idx = tlist.token_index(before) + 1
-        else:
-            idx = tlist.token_index(token) + 1
-        token = tlist.token_next_by_instance(idx, sql.Comment)
+            tokens = tlist.tokens_between(before, token)
+            token = tlist.group_tokens(sql.TokenList, tokens, extend=True)
+        token = tlist.token_next_by(i=sql.Comment, idx=token)
 
 
 def group(tlist):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index ccb6924..a9884a5 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -388,20 +388,36 @@ class TokenList(Token):
         start_idx = self.token_index(start)
         return self.tokens[start_idx:end_idx]
 
-    def group_tokens(self, grp_cls, tokens, ignore_ws=False):
+    def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False):
         """Replace tokens by an instance of *grp_cls*."""
-        idx = self.token_index(tokens[0])
         if ignore_ws:
             while tokens and tokens[-1].is_whitespace():
                 tokens = tokens[:-1]
-        for t in tokens:
-            self.tokens.remove(t)
-        grp = grp_cls(tokens)
+
+        left = tokens[0]
+        idx = self.token_index(left)
+
+        if extend:
+            if not isinstance(left, grp_cls):
+                grp = grp_cls([left])
+                self.tokens.remove(left)
+                self.tokens.insert(idx, grp)
+                left = grp
+                left.parent = self
+            tokens = tokens[1:]
+            left.tokens.extend(tokens)
+            left.value = left.__str__()
+
+        else:
+            left = grp_cls(tokens)
+            left.parent = self
+            self.tokens.insert(idx, left)
+
         for token in tokens:
-            token.parent = grp
-        grp.parent = self
-        self.tokens.insert(idx, grp)
-        return grp
+            token.parent = left
+            self.tokens.remove(token)
+
+        return left
 
     def insert_before(self, where, token):
         """Inserts *token* before *where*."""
-- 
cgit v1.2.1


From f26719dc8d2c9cf4bf85501bb68cc4ed3f4da86d Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 19:53:54 -0700
Subject: Introduce @recurse to refactor recursion

---
 sqlparse/engine/grouping.py | 18 +++++++-----------
 sqlparse/utils.py           |  5 +++++
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index ab519f0..df967c3 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -4,6 +4,7 @@ import itertools
 
 from sqlparse import sql
 from sqlparse import tokens as T
+from sqlparse.utils import recurse
 
 
 def _group_left_right(tlist, ttype, value, cls,
@@ -229,9 +230,8 @@ def group_identifier(tlist):
         token = _next_token(tlist, idx)
 
 
+@recurse(sql.IdentifierList)
 def group_identifier_list(tlist):
-    [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.IdentifierList)]
     # Allowed list items
     fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
                                             sql.Case)),
@@ -327,9 +327,8 @@ def group_brackets(tlist):
         token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
 
 
+@recurse(sql.Comment)
 def group_comments(tlist):
-    [group_comments(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.Comment)]
     idx = 0
     token = tlist.token_next_by_type(idx, T.Comment)
     while token:
@@ -348,9 +347,8 @@ def group_comments(tlist):
         token = tlist.token_next_by_type(idx, T.Comment)
 
 
+@recurse(sql.Where)
 def group_where(tlist):
-    [group_where(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.Where)]
     idx = 0
     token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
     stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')
@@ -368,10 +366,9 @@ def group_where(tlist):
         token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
 
 
+@recurse(sql.Identifier, sql.Function, sql.Case)
 def group_aliased(tlist):
     clss = (sql.Identifier, sql.Function, sql.Case)
-    [group_aliased(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, clss)]
     idx = 0
     token = tlist.token_next_by_instance(idx, clss)
     while token:
@@ -390,9 +387,8 @@ def group_typecasts(tlist):
     _group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
 
 
+@recurse(sql.Function)
 def group_functions(tlist):
-    [group_functions(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.Function)]
     idx = 0
     token = tlist.token_next_by_type(idx, T.Name)
     while token:
@@ -420,8 +416,8 @@ def group_order(tlist):
         token = tlist.token_next_by_type(idx, T.Keyword.Order)
 
 
+@recurse()
 def align_comments(tlist):
-    [align_comments(sgroup) for sgroup in tlist.get_sublists()]
     token = tlist.token_next_by(i=sql.Comment)
     while token:
         before = tlist.token_prev(tlist.token_index(token))
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index f2372c2..66dd8bc 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -122,6 +122,11 @@ def remove_quotes(val):
 
 
 def recurse(*cls):
+    """Function decorator to help with recursion
+
+    :param cls: Classes to not recurse over
+    :return: function
+    """
     def wrap(f):
         def wrapped_f(tlist):
             for sgroup in tlist.get_sublists():
-- 
cgit v1.2.1


From 4364b250c6b0632e7a39f8153e864a7f3d63833e Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 20:03:20 -0700
Subject: Add group matching M_tokens and refactor group matching

remove slots in subclasses
---
 sqlparse/engine/grouping.py | 112 +++++++++-----------------------------------
 sqlparse/sql.py             |  34 ++++++--------
 sqlparse/utils.py           |   2 +-
 3 files changed, 38 insertions(+), 110 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index df967c3..a34706f 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -4,7 +4,7 @@ import itertools
 
 from sqlparse import sql
 from sqlparse import tokens as T
-from sqlparse.utils import recurse
+from sqlparse.utils import recurse, imt, find_matching
 
 
 def _group_left_right(tlist, ttype, value, cls,
@@ -47,68 +47,36 @@ def _group_left_right(tlist, ttype, value, cls,
                                            ttype, value)
 
 
-def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value):
-    depth = 1
-    for tok in tlist.tokens[idx:]:
-        if tok.match(start_ttype, start_value):
-            depth += 1
-        elif tok.match(end_ttype, end_value):
-            depth -= 1
-            if depth == 1:
-                return tok
-    return None
-
-
-def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
-                    cls, include_semicolon=False, recurse=False):
-
-    [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
-                     cls, include_semicolon) for sgroup in tlist.get_sublists()
-     if recurse]
-    if isinstance(tlist, cls):
-        idx = 1
-    else:
-        idx = 0
-    token = tlist.token_next_match(idx, start_ttype, start_value)
+def _group_matching(tlist, cls):
+    """Groups Tokens that have beginning and end. ie. parenthesis, brackets.."""
+    idx = 1 if imt(tlist, i=cls) else 0
+
+    token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
     while token:
-        tidx = tlist.token_index(token)
-        end = _find_matching(tidx, tlist, start_ttype, start_value,
-                             end_ttype, end_value)
-        if end is None:
-            idx = tidx + 1
-        else:
-            if include_semicolon:
-                next_ = tlist.token_next(tlist.token_index(end))
-                if next_ and next_.match(T.Punctuation, ';'):
-                    end = next_
-            group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
-            _group_matching(group, start_ttype, start_value,
-                            end_ttype, end_value, cls, include_semicolon)
-            idx = tlist.token_index(group) + 1
-        token = tlist.token_next_match(idx, start_ttype, start_value)
+        end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
+        if end is not None:
+            token = tlist.group_tokens(cls, tlist.tokens_between(token, end))
+            _group_matching(token, cls)
+        token = tlist.token_next_by(m=cls.M_OPEN, idx=token)
 
 
 def group_if(tlist):
-    _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
+    _group_matching(tlist, sql.If)
 
 
 def group_for(tlist):
-    _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
-                    sql.For, True)
+    _group_matching(tlist, sql.For)
 
 
 def group_foreach(tlist):
-    _group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP',
-                    sql.For, True)
+    _group_matching(tlist, sql.For)
 
 
 def group_begin(tlist):
-    _group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END',
-                    sql.Begin, True)
+    _group_matching(tlist, sql.Begin)
 
 
 def group_as(tlist):
-
     def _right_valid(token):
         # Currently limited to DML/DDL. Maybe additional more non SQL reserved
         # keywords should appear here (see issue8).
@@ -130,7 +98,6 @@ def group_assignment(tlist):
 
 
 def group_comparison(tlist):
-
     def _parts_valid(token):
         return (token.ttype in (T.String.Symbol, T.String.Single,
                                 T.Name, T.Number, T.Number.Float,
@@ -140,13 +107,13 @@ def group_comparison(tlist):
                                       sql.Function))
                 or (token.ttype is T.Keyword
                     and token.value.upper() in ['NULL', ]))
+
     _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
                       check_left=_parts_valid, check_right=_parts_valid)
 
 
 def group_case(tlist):
-    _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
-                    include_semicolon=True, recurse=True)
+    _group_matching(tlist, sql.Case)
 
 
 def group_identifier(tlist):
@@ -222,7 +189,7 @@ def group_identifier(tlist):
                 and (isinstance(identifier_tokens[0], (sql.Function,
                                                        sql.Parenthesis))
                      or identifier_tokens[0].ttype in (
-                     T.Literal.Number.Integer, T.Literal.Number.Float))):
+                    T.Literal.Number.Integer, T.Literal.Number.Float))):
             group = tlist.group_tokens(sql.Identifier, identifier_tokens)
             idx = tlist.token_index(group, start=idx) + 1
         else:
@@ -284,47 +251,11 @@ def group_identifier_list(tlist):
 
 
 def group_brackets(tlist):
-    """Group parentheses () or square brackets []
-
-        This is just like _group_matching, but complicated by the fact that
-        round brackets can contain square bracket groups and vice versa
-    """
-
-    if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)):
-        idx = 1
-    else:
-        idx = 0
-
-    # Find the first opening bracket
-    token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
-
-    while token:
-        start_val = token.value  # either '(' or '['
-        if start_val == '(':
-            end_val = ')'
-            group_class = sql.Parenthesis
-        else:
-            end_val = ']'
-            group_class = sql.SquareBrackets
-
-        tidx = tlist.token_index(token)
-
-        # Find the corresponding closing bracket
-        end = _find_matching(tidx, tlist, T.Punctuation, start_val,
-                             T.Punctuation, end_val)
-
-        if end is None:
-            idx = tidx + 1
-        else:
-            group = tlist.group_tokens(group_class,
-                                       tlist.tokens_between(token, end))
+    _group_matching(tlist, sql.SquareBrackets)
 
-            # Check for nested bracket groups within this group
-            group_brackets(group)
-            idx = tlist.token_index(group) + 1
 
-        # Find the next opening bracket
-        token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
+def group_parenthesis(tlist):
+    _group_matching(tlist, sql.Parenthesis)
 
 
 @recurse(sql.Comment)
@@ -431,6 +362,7 @@ def group(tlist):
     for func in [
         group_comments,
         group_brackets,
+        group_parenthesis,
         group_functions,
         group_where,
         group_case,
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index a9884a5..777d582 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -552,8 +552,6 @@ class Identifier(TokenList):
     Identifiers may have aliases or typecasts.
     """
 
-    __slots__ = ('value', 'ttype', 'tokens')
-
     def is_wildcard(self):
         """Return ``True`` if this identifier contains a wildcard."""
         token = self.token_next_by_type(0, T.Wildcard)
@@ -588,8 +586,6 @@ class Identifier(TokenList):
 class IdentifierList(TokenList):
     """A list of :class:`~sqlparse.sql.Identifier`\'s."""
 
-    __slots__ = ('value', 'ttype', 'tokens')
-
     def get_identifiers(self):
         """Returns the identifiers.
 
@@ -602,7 +598,8 @@ class IdentifierList(TokenList):
 
 class Parenthesis(TokenList):
     """Tokens between parenthesis."""
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Punctuation, '(')
+    M_CLOSE = (T.Punctuation, ')')
 
     @property
     def _groupable_tokens(self):
@@ -611,8 +608,8 @@ class Parenthesis(TokenList):
 
 class SquareBrackets(TokenList):
     """Tokens between square brackets"""
-
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Punctuation, '[')
+    M_CLOSE = (T.Punctuation, ']')
 
     @property
     def _groupable_tokens(self):
@@ -621,22 +618,22 @@ class SquareBrackets(TokenList):
 
 class Assignment(TokenList):
     """An assignment like 'var := val;'"""
-    __slots__ = ('value', 'ttype', 'tokens')
 
 
 class If(TokenList):
     """An 'if' clause with possible 'else if' or 'else' parts."""
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Keyword, 'IF')
+    M_CLOSE = (T.Keyword, 'END IF')
 
 
 class For(TokenList):
     """A 'FOR' loop."""
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Keyword, ('FOR', 'FOREACH'))
+    M_CLOSE = (T.Keyword, 'END LOOP')
 
 
 class Comparison(TokenList):
     """A comparison used for example in WHERE clauses."""
-    __slots__ = ('value', 'ttype', 'tokens')
 
     @property
     def left(self):
@@ -649,7 +646,6 @@ class Comparison(TokenList):
 
 class Comment(TokenList):
     """A comment."""
-    __slots__ = ('value', 'ttype', 'tokens')
 
     def is_multiline(self):
         return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
@@ -657,13 +653,15 @@ class Comment(TokenList):
 
 class Where(TokenList):
     """A WHERE clause."""
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Keyword, 'WHERE')
+    M_CLOSE = (T.Keyword,
+               ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING'))
 
 
 class Case(TokenList):
     """A CASE statement with one or more WHEN and possibly an ELSE part."""
-
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Keyword, 'CASE')
+    M_CLOSE = (T.Keyword, 'END')
 
     def get_cases(self):
         """Returns a list of 2-tuples (condition, value).
@@ -713,8 +711,6 @@ class Case(TokenList):
 class Function(TokenList):
     """A function or procedure call."""
 
-    __slots__ = ('value', 'ttype', 'tokens')
-
     def get_parameters(self):
         """Return a list of parameters."""
         parenthesis = self.tokens[-1]
@@ -728,5 +724,5 @@ class Function(TokenList):
 
 class Begin(TokenList):
     """A BEGIN/END block."""
-
-    __slots__ = ('value', 'ttype', 'tokens')
+    M_OPEN = (T.Keyword, 'BEGIN')
+    M_CLOSE = (T.Keyword, 'END')
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 66dd8bc..90acb5c 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -166,7 +166,7 @@ def imt(token, i=None, m=None, t=None):
 def find_matching(tlist, token, M1, M2):
     idx = tlist.token_index(token)
     depth = 0
-    for token in tlist[idx:]:
+    for token in tlist.tokens[idx:]:
         if token.match(*M1):
             depth += 1
         elif token.match(*M2):
-- 
cgit v1.2.1


From b5176ccebcb5f16913aa87e514e5605515ce1471 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 20:38:04 -0700
Subject: group all the one-offs

---
 sqlparse/engine/grouping.py | 52 ++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index a34706f..6e0ae91 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -280,21 +280,18 @@ def group_comments(tlist):
 
 @recurse(sql.Where)
 def group_where(tlist):
-    idx = 0
-    token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
-    stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')
+    token = tlist.token_next_by(m=sql.Where.M_OPEN)
     while token:
-        tidx = tlist.token_index(token)
-        end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords)
+        end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token)
+
         if end is None:
-            end = tlist._groupable_tokens[-1]
+            tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1])
         else:
-            end = tlist.tokens[tlist.token_index(end) - 1]
-        group = tlist.group_tokens(sql.Where,
-                                   tlist.tokens_between(token, end),
-                                   ignore_ws=True)
-        idx = tlist.token_index(group)
-        token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+            tokens = tlist.tokens_between(
+                token, tlist.tokens[tlist.token_index(end) - 1])
+
+        token = tlist.group_tokens(sql.Where, tokens)
+        token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token)
 
 
 @recurse(sql.Identifier, sql.Function, sql.Case)
@@ -320,38 +317,31 @@ def group_typecasts(tlist):
 
 @recurse(sql.Function)
 def group_functions(tlist):
-    idx = 0
-    token = tlist.token_next_by_type(idx, T.Name)
+    token = tlist.token_next_by(t=T.Name)
     while token:
         next_ = tlist.token_next(token)
-        if not isinstance(next_, sql.Parenthesis):
-            idx = tlist.token_index(token) + 1
-        else:
-            func = tlist.group_tokens(sql.Function,
-                                      tlist.tokens_between(token, next_))
-            idx = tlist.token_index(func) + 1
-        token = tlist.token_next_by_type(idx, T.Name)
+        if imt(next_, i=sql.Parenthesis):
+            tokens = tlist.tokens_between(token, next_)
+            token = tlist.group_tokens(sql.Function, tokens)
+        token = tlist.token_next_by(t=T.Name, idx=token)
 
 
 def group_order(tlist):
-    idx = 0
-    token = tlist.token_next_by_type(idx, T.Keyword.Order)
+    """Group together Identifier and Asc/Desc token"""
+    token = tlist.token_next_by(t=T.Keyword.Order)
     while token:
         prev = tlist.token_prev(token)
-        if isinstance(prev, sql.Identifier):
-            ido = tlist.group_tokens(sql.Identifier,
-                                     tlist.tokens_between(prev, token))
-            idx = tlist.token_index(ido) + 1
-        else:
-            idx = tlist.token_index(token) + 1
-        token = tlist.token_next_by_type(idx, T.Keyword.Order)
+        if imt(prev, i=sql.Identifier, t=T.Number):
+            tokens = tlist.tokens_between(prev, token)
+            token = tlist.group_tokens(sql.Identifier, tokens)
+        token = tlist.token_next_by(t=T.Keyword.Order, idx=token)
 
 
 @recurse()
 def align_comments(tlist):
     token = tlist.token_next_by(i=sql.Comment)
     while token:
-        before = tlist.token_prev(tlist.token_index(token))
+        before = tlist.token_prev(token)
         if isinstance(before, sql.TokenList):
             tokens = tlist.tokens_between(before, token)
             token = tlist.group_tokens(sql.TokenList, tokens, extend=True)
-- 
cgit v1.2.1


From 015bf8393286301ca0d4dfd087b4ed305fa71d4a Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 21:09:09 -0700
Subject: refactor _group_left_right

---
 sqlparse/engine/grouping.py | 103 +++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 64 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 6e0ae91..ec3cd86 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -6,45 +6,33 @@ from sqlparse import sql
 from sqlparse import tokens as T
 from sqlparse.utils import recurse, imt, find_matching
 
+M_ROLE = (T.Keyword, ('null', 'role'))
+M_SEMICOLON = (T.Punctuation, ';')
 
-def _group_left_right(tlist, ttype, value, cls,
-                      check_right=lambda t: True,
-                      check_left=lambda t: True,
-                      include_semicolon=False):
-    [_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
-                       include_semicolon) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, cls)]
-    idx = 0
-    token = tlist.token_next_match(idx, ttype, value)
+T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
+T_STRING = (T.String, T.String.Single, T.String.Symbol)
+T_NAME = (T.Name, T.Name.Placeholder)
+
+
+def _group_left_right(tlist, m, cls,
+                      valid_left=lambda t: t is not None,
+                      valid_right=lambda t: t is not None,
+                      semicolon=False):
+    """Groups together tokens that are joined by a middle token. ie. x < y"""
+    [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon)
+     for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)]
+
+    token = tlist.token_next_by(m=m)
     while token:
-        right = tlist.token_next(tlist.token_index(token))
-        left = tlist.token_prev(tlist.token_index(token))
-        if right is None or not check_right(right):
-            token = tlist.token_next_match(tlist.token_index(token) + 1,
-                                           ttype, value)
-        elif left is None or not check_left(left):
-            token = tlist.token_next_match(tlist.token_index(token) + 1,
-                                           ttype, value)
-        else:
-            if include_semicolon:
-                sright = tlist.token_next_match(tlist.token_index(right),
-                                                T.Punctuation, ';')
-                if sright is not None:
-                    # only overwrite "right" if a semicolon is actually
-                    # present.
-                    right = sright
-            tokens = tlist.tokens_between(left, right)[1:]
-            if not isinstance(left, cls):
-                new = cls([left])
-                new_idx = tlist.token_index(left)
-                tlist.tokens.remove(left)
-                tlist.tokens.insert(new_idx, new)
-                left = new
-            left.tokens.extend(tokens)
-            for t in tokens:
-                tlist.tokens.remove(t)
-            token = tlist.token_next_match(tlist.token_index(left) + 1,
-                                           ttype, value)
+        left, right = tlist.token_prev(token), tlist.token_next(token)
+
+        if valid_left(left) and valid_right(right):
+            if semicolon:
+                sright = tlist.token_next_by(m=M_SEMICOLON, idx=right)
+                right = sright or right  # only overwrite if a semicolon present.
+            tokens = tlist.tokens_between(left, right)
+            token = tlist.group_tokens(cls, tokens, extend=True)
+        token = tlist.token_next_by(m=m, idx=token)
 
 
 def _group_matching(tlist, cls):
@@ -77,39 +65,26 @@ def group_begin(tlist):
 
 
 def group_as(tlist):
-    def _right_valid(token):
-        # Currently limited to DML/DDL. Maybe additional more non SQL reserved
-        # keywords should appear here (see issue8).
-        return token.ttype not in (T.DML, T.DDL)
-
-    def _left_valid(token):
-        if token.ttype is T.Keyword and token.value in ('NULL',):
-            return True
-        return token.ttype is not T.Keyword
-
-    _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
-                      check_right=_right_valid,
-                      check_left=_left_valid)
+    lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.value == 'NULL'
+    rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL))
+    _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier,
+                      valid_left=lfunc, valid_right=rfunc)
 
 
 def group_assignment(tlist):
-    _group_left_right(tlist, T.Assignment, ':=', sql.Assignment,
-                      include_semicolon=True)
+    _group_left_right(tlist, (T.Assignment, ':='), sql.Assignment,
+                      semicolon=True)
 
 
 def group_comparison(tlist):
-    def _parts_valid(token):
-        return (token.ttype in (T.String.Symbol, T.String.Single,
-                                T.Name, T.Number, T.Number.Float,
-                                T.Number.Integer, T.Literal,
-                                T.Literal.Number.Integer, T.Name.Placeholder)
-                or isinstance(token, (sql.Identifier, sql.Parenthesis,
-                                      sql.Function))
-                or (token.ttype is T.Keyword
-                    and token.value.upper() in ['NULL', ]))
+    I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier)
+    T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME
+
+    func = lambda tk: imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or (
+        imt(tk, t=T.Keyword) and tk.value.upper() == 'NULL')
 
-    _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
-                      check_left=_parts_valid, check_right=_parts_valid)
+    _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison,
+                      valid_left=func, valid_right=func)
 
 
 def group_case(tlist):
@@ -312,7 +287,7 @@ def group_aliased(tlist):
 
 
 def group_typecasts(tlist):
-    _group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
+    _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier)
 
 
 @recurse(sql.Function)
-- 
cgit v1.2.1


From e4bf805cad1a41b989b59ac58f526c6bc9b0e338 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 21:39:54 -0700
Subject: refactor remove quotes

---
 sqlparse/sql.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 777d582..673e452 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -7,7 +7,7 @@ import sys
 
 from sqlparse import tokens as T
 from sqlparse.compat import string_types, u
-from sqlparse.utils import imt
+from sqlparse.utils import imt, remove_quotes
 
 
 class Token(object):
@@ -185,14 +185,6 @@ class TokenList(Token):
             if (token.is_group() and (max_depth is None or depth < max_depth)):
                 token._pprint_tree(max_depth, depth + 1)
 
-    def _remove_quotes(self, val):
-        """Helper that removes surrounding quotes from strings."""
-        if not val:
-            return val
-        if val[0] in ('"', '\'') and val[-1] == val[0]:
-            val = val[1:-1]
-        return val
-
     def get_token_at_offset(self, offset):
         """Returns the token that is on position offset."""
         idx = 0
@@ -482,7 +474,7 @@ class TokenList(Token):
         prev_ = self.token_prev(self.token_index(dot))
         if prev_ is None:  # something must be verry wrong here..
             return None
-        return self._remove_quotes(prev_.value)
+        return remove_quotes(prev_.value)
 
     def _get_first_name(self, idx=None, reverse=False, keywords=False):
         """Returns the name of the first token with a name"""
@@ -499,7 +491,7 @@ class TokenList(Token):
 
         for tok in tokens:
             if tok.ttype in types:
-                return self._remove_quotes(tok.value)
+                return remove_quotes(tok.value)
             elif isinstance(tok, Identifier) or isinstance(tok, Function):
                 return tok.get_name()
         return None
-- 
cgit v1.2.1


From 2d0c6d6d33e4bb974037f0eeb68d6a05262e4373 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 21:40:25 -0700
Subject: refactor identifier list and alias grouping

---
 sqlparse/engine/grouping.py | 88 ++++++++++++++-------------------------------
 1 file changed, 26 insertions(+), 62 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index ec3cd86..11c2b38 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -8,6 +8,7 @@ from sqlparse.utils import recurse, imt, find_matching
 
 M_ROLE = (T.Keyword, ('null', 'role'))
 M_SEMICOLON = (T.Punctuation, ';')
+M_COMMA = (T.Punctuation, ',')
 
 T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
 T_STRING = (T.String, T.String.Single, T.String.Symbol)
@@ -174,55 +175,21 @@ def group_identifier(tlist):
 
 @recurse(sql.IdentifierList)
 def group_identifier_list(tlist):
-    # Allowed list items
-    fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
-                                            sql.Case)),
-                   lambda t: t.is_whitespace(),
-                   lambda t: t.ttype == T.Name,
-                   lambda t: t.ttype == T.Wildcard,
-                   lambda t: t.match(T.Keyword, 'null'),
-                   lambda t: t.match(T.Keyword, 'role'),
-                   lambda t: t.ttype == T.Number.Integer,
-                   lambda t: t.ttype == T.String.Single,
-                   lambda t: t.ttype == T.Name.Placeholder,
-                   lambda t: t.ttype == T.Keyword,
-                   lambda t: isinstance(t, sql.Comparison),
-                   lambda t: isinstance(t, sql.Comment),
-                   lambda t: t.ttype == T.Comment.Multiline,
-                   ]
-    tcomma = tlist.token_next_match(0, T.Punctuation, ',')
-    start = None
-    while tcomma is not None:
-        # Go back one idx to make sure to find the correct tcomma
-        idx = tlist.token_index(tcomma)
-        before = tlist.token_prev(idx)
-        after = tlist.token_next(idx)
-        # Check if the tokens around tcomma belong to a list
-        bpassed = apassed = False
-        for func in fend1_funcs:
-            if before is not None and func(before):
-                bpassed = True
-            if after is not None and func(after):
-                apassed = True
-        if not bpassed or not apassed:
-            # Something's wrong here, skip ahead to next ","
-            start = None
-            tcomma = tlist.token_next_match(idx + 1,
-                                            T.Punctuation, ',')
-        else:
-            if start is None:
-                start = before
-            after_idx = tlist.token_index(after, start=idx)
-            next_ = tlist.token_next(after_idx)
-            if next_ is None or not next_.match(T.Punctuation, ','):
-                # Reached the end of the list
-                tokens = tlist.tokens_between(start, after)
-                group = tlist.group_tokens(sql.IdentifierList, tokens)
-                start = None
-                tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
-                                                T.Punctuation, ',')
-            else:
-                tcomma = next_
+    I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
+                    sql.IdentifierList)  # sql.Operation
+    T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME +
+                    (T.Keyword, T.Comment, T.Wildcard))
+
+    func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST)
+    token = tlist.token_next_by(m=M_COMMA)
+
+    while token:
+        before, after = tlist.token_prev(token), tlist.token_next(token)
+
+        if func(before) and func(after):
+            tokens = tlist.tokens_between(before, after)
+            token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True)
+        token = tlist.token_next_by(m=M_COMMA, idx=token)
 
 
 def group_brackets(tlist):
@@ -269,21 +236,18 @@ def group_where(tlist):
         token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token)
 
 
-@recurse(sql.Identifier, sql.Function, sql.Case)
+@recurse()
 def group_aliased(tlist):
-    clss = (sql.Identifier, sql.Function, sql.Case)
-    idx = 0
-    token = tlist.token_next_by_instance(idx, clss)
+    I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
+               )  # sql.Operation)
+
+    token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
     while token:
-        next_ = tlist.token_next(tlist.token_index(token))
-        if next_ is not None and isinstance(next_, clss):
-            if not next_.value.upper().startswith('VARCHAR'):
-                grp = tlist.tokens_between(token, next_)[1:]
-                token.tokens.extend(grp)
-                for t in grp:
-                    tlist.tokens.remove(t)
-        idx = tlist.token_index(token) + 1
-        token = tlist.token_next_by_instance(idx, clss)
+        next_ = tlist.token_next(token)
+        if imt(next_, i=sql.Identifier):
+            tokens = tlist.tokens_between(token, next_)
+            token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
+        token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token)
 
 
 def group_typecasts(tlist):
-- 
cgit v1.2.1


From 1e770d9b99983fa82375b71c5b3f5d8c6bb1a72e Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Tue, 10 May 2016 21:50:46 -0700
Subject: separate identifier grouping into its components

leaving sql.Operations in for future PR
small behavior changed for invalid identifier
---
 sqlparse/engine/grouping.py | 127 +++++++++++++++++---------------------------
 tests/test_grouping.py      |   6 +--
 2 files changed, 53 insertions(+), 80 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 11c2b38..8fb4af1 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import itertools
-
 from sqlparse import sql
 from sqlparse import tokens as T
 from sqlparse.utils import recurse, imt, find_matching
@@ -92,85 +90,57 @@ def group_case(tlist):
     _group_matching(tlist, sql.Case)
 
 
+@recurse(sql.Identifier)
 def group_identifier(tlist):
-    def _consume_cycle(tl, i):
-        # TODO: Usage of Wildcard token is ambivalent here.
-        x = itertools.cycle((
-            lambda y: (y.match(T.Punctuation, '.')
-                       or y.ttype in (T.Operator,
-                                      T.Wildcard,
-                                      T.Name)
-                       or isinstance(y, sql.SquareBrackets)),
-            lambda y: (y.ttype in (T.String.Symbol,
-                                   T.Name,
-                                   T.Wildcard,
-                                   T.Literal.String.Single,
-                                   T.Literal.Number.Integer,
-                                   T.Literal.Number.Float)
-                       or isinstance(y, (sql.Parenthesis,
-                                         sql.SquareBrackets,
-                                         sql.Function)))))
-        for t in tl.tokens[i:]:
-            # Don't take whitespaces into account.
-            if t.ttype is T.Whitespace:
-                yield t
-                continue
-            if next(x)(t):
-                yield t
-            else:
-                if isinstance(t, sql.Comment) and t.is_multiline():
-                    yield t
-                if t.ttype is T.Keyword.Order:
-                    yield t
-                return
-
-    def _next_token(tl, i):
-        # chooses the next token. if two tokens are found then the
-        # first is returned.
-        t1 = tl.token_next_by_type(
-            i, (T.String.Symbol, T.Name, T.Literal.Number.Integer,
-                T.Literal.Number.Float))
-
-        i1 = tl.token_index(t1, start=i) if t1 else None
-        t2_end = None if i1 is None else i1 + 1
-        t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis),
-                                       end=t2_end)
-
-        if t1 and t2:
-            i2 = tl.token_index(t2, start=i)
-            if i1 > i2:
-                return t2
-            else:
-                return t1
-        elif t1:
-            return t1
-        else:
-            return t2
+    T_IDENT = (T.String.Symbol, T.Name)
+
+    token = tlist.token_next_by(t=T_IDENT)
+    while token:
+        token = tlist.group_tokens(sql.Identifier, [token, ])
+        token = tlist.token_next_by(t=T_IDENT, idx=token)
 
-    # bottom up approach: group subgroups first
-    [group_identifier(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.Identifier)]
 
-    # real processing
-    idx = 0
-    token = _next_token(tlist, idx)
+def group_period(tlist):
+    lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier),
+                           t=(T.Name, T.String.Symbol,))
+
+    rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function),
+                           t=(T.Name, T.String.Symbol, T.Wildcard))
+
+    _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier,
+                      valid_left=lfunc, valid_right=rfunc)
+
+
+def group_arrays(tlist):
+    token = tlist.token_next_by(i=sql.SquareBrackets)
     while token:
-        identifier_tokens = [token] + list(
-            _consume_cycle(tlist,
-                           tlist.token_index(token, start=idx) + 1))
-        # remove trailing whitespace
-        if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace:
-            identifier_tokens = identifier_tokens[:-1]
-        if not (len(identifier_tokens) == 1
-                and (isinstance(identifier_tokens[0], (sql.Function,
-                                                       sql.Parenthesis))
-                     or identifier_tokens[0].ttype in (
-                    T.Literal.Number.Integer, T.Literal.Number.Float))):
-            group = tlist.group_tokens(sql.Identifier, identifier_tokens)
-            idx = tlist.token_index(group, start=idx) + 1
-        else:
-            idx += 1
-        token = _next_token(tlist, idx)
+        prev = tlist.token_prev(idx=token)
+        if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function),
+               t=(T.Name, T.String.Symbol,)):
+            tokens = tlist.tokens_between(prev, token)
+            token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
+        token = tlist.token_next_by(i=sql.SquareBrackets, idx=token)
+
+
+@recurse(sql.Identifier)
+def group_operator(tlist):
+    I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
+               sql.Identifier,)  # sql.Operation)
+    # wilcards wouldn't have operations next to them
+    T_CYCLE = T_NUMERICAL + T_STRING + T_NAME  # + T.Wildcard
+    func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE)
+
+    token = tlist.token_next_by(t=(T.Operator, T.Wildcard))
+    while token:
+        left, right = tlist.token_prev(token), tlist.token_next(token)
+
+        if func(left) and func(right):
+            token.ttype = T.Operator
+            tokens = tlist.tokens_between(left, right)
+            # token = tlist.group_tokens(sql.Operation, tokens)
+            token = tlist.group_tokens(sql.Identifier, tokens)
+
+        token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token)
 
 
 @recurse(sql.IdentifierList)
@@ -295,7 +265,10 @@ def group(tlist):
         group_functions,
         group_where,
         group_case,
+        group_period,
+        group_arrays,
         group_identifier,
+        group_operator,
         group_order,
         group_typecasts,
         group_as,
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 7dc1269..daaec9b 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -89,9 +89,9 @@ class TestGrouping(TestCaseBase):
         p = sqlparse.parse('a.')[0]
         self.assert_(isinstance(p.tokens[0], sql.Identifier))
         self.assertEqual(p.tokens[0].has_alias(), False)
-        self.assertEqual(p.tokens[0].get_name(), None)
-        self.assertEqual(p.tokens[0].get_real_name(), None)
-        self.assertEqual(p.tokens[0].get_parent_name(), 'a')
+        self.assertEqual(p.tokens[0].get_name(), 'a')
+        self.assertEqual(p.tokens[0].get_real_name(), 'a')
+        self.assertEqual(p.tokens[0].get_parent_name(), None)
 
     def test_identifier_as_invalid(self):  # issue8
         p = sqlparse.parse('foo as select *')[0]
-- 
cgit v1.2.1


From 6748b48adc76491d3cdef5794ddd0731df0d3418 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Wed, 11 May 2016 04:22:10 -0700
Subject: refactor sql.py functions

---
 examples/column_defs_lowlevel.py |   2 +-
 sqlparse/filters.py              |   4 +-
 sqlparse/sql.py                  | 112 +++++++++++----------------------------
 3 files changed, 32 insertions(+), 86 deletions(-)

diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py
index 9e945d4..e804bb2 100644
--- a/examples/column_defs_lowlevel.py
+++ b/examples/column_defs_lowlevel.py
@@ -15,7 +15,7 @@ SQL = """CREATE TABLE foo (
 parsed = sqlparse.parse(SQL)[0]
 
 # extract the parenthesis which holds column definitions
-par = parsed.token_next_by_instance(0, sqlparse.sql.Parenthesis)
+par = parsed.token_next_by(i=sqlparse.sql.Parenthesis)
 
 
 def extract_definitions(token_list):
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
index 68e9b1a..72f17d0 100644
--- a/sqlparse/filters.py
+++ b/sqlparse/filters.py
@@ -200,9 +200,7 @@ class StripCommentsFilter:
 
     def _get_next_comment(self, tlist):
         # TODO(andi) Comment types should be unified, see related issue38
-        token = tlist.token_next_by_instance(0, sql.Comment)
-        if token is None:
-            token = tlist.token_next_by_type(0, T.Comment)
+        token = tlist.token_next_by(i=sql.Comment, t=T.Comment)
         return token
 
     def _process(self, tlist):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 673e452..9afdac3 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -255,12 +255,9 @@ class TokenList(Token):
         if *ignore_comments* is ``True`` (default: ``False``), comments are
         ignored too.
         """
-        for token in self.tokens:
-            if ignore_whitespace and token.is_whitespace():
-                continue
-            if ignore_comments and imt(token, i=Comment):
-                continue
-            return token
+        funcs = lambda tk: not ((ignore_whitespace and tk.is_whitespace()) or
+                                (ignore_comments and imt(tk, i=Comment)))
+        return self._token_matching(funcs)
 
     def token_next_by(self, i=None, m=None, t=None, idx=0, end=None):
         funcs = lambda tk: imt(tk, i, m, t)
@@ -274,48 +271,26 @@ class TokenList(Token):
 
         If no matching token can be found ``None`` is returned.
         """
-        if not isinstance(clss, (list, tuple)):
-            clss = (clss,)
-
-        for token in self.tokens[idx:end]:
-            if isinstance(token, clss):
-                return token
+        funcs = lambda tk: imt(tk, i=clss)
+        return self._token_matching(funcs, idx, end)
 
     def token_next_by_type(self, idx, ttypes):
         """Returns next matching token by it's token type."""
-        if not isinstance(ttypes, (list, tuple)):
-            ttypes = [ttypes]
-
-        for token in self.tokens[idx:]:
-            if token.ttype in ttypes:
-                return token
+        funcs = lambda tk: imt(tk, t=ttypes)
+        return self._token_matching(funcs, idx)
 
     def token_next_match(self, idx, ttype, value, regex=False):
         """Returns next token where it's ``match`` method returns ``True``."""
-        if not isinstance(idx, int):
-            idx = self.token_index(idx)
-
-        for n in range(idx, len(self.tokens)):
-            token = self.tokens[n]
-            if token.match(ttype, value, regex):
-                return token
+        funcs = lambda tk: imt(tk, m=(ttype, value, regex))
+        return self._token_matching(funcs, idx)
 
     def token_not_matching(self, idx, funcs):
-        for token in self.tokens[idx:]:
-            passed = False
-            for func in funcs:
-                if func(token):
-                    passed = True
-                    break
-
-            if not passed:
-                return token
+        funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs
+        funcs = [lambda tk: not func(tk) for func in funcs]
+        return self._token_matching(funcs, idx)
 
     def token_matching(self, idx, funcs):
-        for token in self.tokens[idx:]:
-            for func in funcs:
-                if func(token):
-                    return token
+        return self._token_matching(funcs, idx)
 
     def token_prev(self, idx, skip_ws=True):
         """Returns the previous token relative to *idx*.
@@ -323,17 +298,10 @@ class TokenList(Token):
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no previous token.
         """
-        if idx is None:
-            return None
-
-        if not isinstance(idx, int):
-            idx = self.token_index(idx)
-
-        while idx:
-            idx -= 1
-            if self.tokens[idx].is_whitespace() and skip_ws:
-                continue
-            return self.tokens[idx]
+        if isinstance(idx, int):
+            idx += 1  # alot of code usage current pre-compensates for this
+        funcs = lambda tk: not (tk.is_whitespace() and skip_ws)
+        return self._token_matching(funcs, idx, reverse=True)
 
     def token_next(self, idx, skip_ws=True):
         """Returns the next token relative to *idx*.
@@ -341,43 +309,24 @@ class TokenList(Token):
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no next token.
         """
-        if idx is None:
-            return None
-
-        if not isinstance(idx, int):
-            idx = self.token_index(idx)
-
-        while idx < len(self.tokens) - 1:
-            idx += 1
-            if self.tokens[idx].is_whitespace() and skip_ws:
-                continue
-            return self.tokens[idx]
+        if isinstance(idx, int):
+            idx += 1  # alot of code usage current pre-compensates for this
+        funcs = lambda tk: not (tk.is_whitespace() and skip_ws)
+        return self._token_matching(funcs, idx)
 
     def token_index(self, token, start=0):
         """Return list index of token."""
-        if start > 0:
-            # Performing `index` manually is much faster when starting
-            # in the middle of the list of tokens and expecting to find
-            # the token near to the starting index.
-            for i in range(start, len(self.tokens)):
-                if self.tokens[i] == token:
-                    return i
-            return -1
-        return self.tokens.index(token)
-
-    def tokens_between(self, start, end, exclude_end=False):
+        start = self.token_index(start) if not isinstance(start, int) else start
+        return start + self.tokens[start:].index(token)
+
+    def tokens_between(self, start, end, include_end=True):
         """Return all tokens between (and including) start and end.
 
-        If *exclude_end* is ``True`` (default is ``False``) the end token
-        is included too.
+        If *include_end* is ``False`` (default is ``True``) the end token
+        is excluded.
         """
-        # FIXME(andi): rename exclude_end to inlcude_end
-        if exclude_end:
-            offset = 0
-        else:
-            offset = 1
-        end_idx = self.token_index(end) + offset
         start_idx = self.token_index(start)
+        end_idx = include_end + self.token_index(end)
         return self.tokens[start_idx:end_idx]
 
     def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False):
@@ -431,13 +380,12 @@ class TokenList(Token):
         """Returns the alias for this identifier or ``None``."""
 
         # "name AS alias"
-        kw = self.token_next_match(0, T.Keyword, 'AS')
+        kw = self.token_next_by(m=(T.Keyword, 'AS'))
         if kw is not None:
             return self._get_first_name(kw, keywords=True)
 
         # "name alias" or "complicated column expression alias"
-        if len(self.tokens) > 2 \
-            and self.token_next_by_type(0, T.Whitespace) is not None:
+        if len(self.tokens) > 2 and self.token_next_by(t=T.Whitespace):
             return self._get_first_name(reverse=True)
 
         return None
-- 
cgit v1.2.1


From 955996e3e5c49fb6b7f200ceecee2f8082656ac4 Mon Sep 17 00:00:00 2001
From: Victor Uriarte <victor.m.uriarte@intel.com>
Date: Wed, 11 May 2016 04:53:12 -0700
Subject: refactor group_comments

---
 sqlparse/engine/grouping.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 8fb4af1..e30abab 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -172,22 +172,16 @@ def group_parenthesis(tlist):
 
 @recurse(sql.Comment)
 def group_comments(tlist):
-    idx = 0
-    token = tlist.token_next_by_type(idx, T.Comment)
+    token = tlist.token_next_by(t=T.Comment)
     while token:
-        tidx = tlist.token_index(token)
-        end = tlist.token_not_matching(tidx + 1,
-                                       [lambda t: t.ttype in T.Comment,
-                                        lambda t: t.is_whitespace()])
-        if end is None:
-            idx = tidx + 1
-        else:
-            eidx = tlist.token_index(end)
-            grp_tokens = tlist.tokens_between(token,
-                                              tlist.token_prev(eidx, False))
-            group = tlist.group_tokens(sql.Comment, grp_tokens)
-            idx = tlist.token_index(group)
-        token = tlist.token_next_by_type(idx, T.Comment)
+        end = tlist.token_not_matching(
+            token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
+        if end is not None:
+            end = tlist.token_prev(end, False)
+            tokens = tlist.tokens_between(token, end)
+            token = tlist.group_tokens(sql.Comment, tokens)
+
+        token = tlist.token_next_by(t=T.Comment, idx=token)
 
 
 @recurse(sql.Where)
-- 
cgit v1.2.1