From 8ddc98b8000cd88aa3fd53881cd3d3df8ee1a9b3 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Fri, 6 May 2016 20:48:42 -0700 Subject: fix test to run locally --- tests/test_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index 425ab7f..9207815 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -13,6 +13,7 @@ sys.path.insert(0, '..') from sqlparse.filters import compact from sqlparse.functions import getcolumns, getlimit, IsType +from tests.utils import FILES_DIR class Test_IncludeStatement(TestCase): @@ -27,7 +28,7 @@ class Test_IncludeStatement(TestCase): def test_includeStatement(self): stream = tokenize(self.sql) - includeStatement = IncludeStatement('tests/files', + includeStatement = IncludeStatement(FILES_DIR, raiseexceptions=True) stream = includeStatement.process(None, stream) stream = compact(stream) -- cgit v1.2.1 From 43c14e081cadb8ac386b0895266a5b57a8329587 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sat, 23 Apr 2016 18:00:47 -0700 Subject: Add editorconfig, update gitignore, filename, authors --- .editorconfig | 23 +++++ .gitignore | 3 + AUTHORS | 1 + CHANGELOG | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ CHANGES | 324 ---------------------------------------------------------- COPYING | 25 ----- LICENSE | 25 +++++ 7 files changed, 376 insertions(+), 349 deletions(-) create mode 100644 .editorconfig create mode 100644 CHANGELOG delete mode 100644 CHANGES delete mode 100644 COPYING create mode 100644 LICENSE diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6fa8b7b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,23 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = crlf +charset = utf-8 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,ini,yaml,yml,rst}] +indent_style = space +indent_size = 4 +continuation_indent_size = 4 +trim_trailing_whitespace = true + +[{Makefile,*.bat}] +indent_style = tab + +[*.md] +trim_trailing_whitespace = false diff --git a/.gitignore b/.gitignore index 6dde1c3..438de5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# PyCharm +.idea/ + *.pyc docs/build dist diff --git a/AUTHORS b/AUTHORS index 0f34f06..9831fa1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,6 +33,7 @@ Alphabetical list of contributors: * spigwitmer * Tim Graham * Victor Hahn +* Victor Uriarte * vthriller * wayne.wuw * Yago Riveiro diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..374c060 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,324 @@ +Development Version +------------------- + +IMPORTANT: The supported Python versions have changed with this release. +sqlparse 0.2.x supports Python 2.7 and Python >= 3.3. + +Internal Changes +* sqlparse.SQLParseError was removed from top-level module and moved to + sqlparse.exceptions. +* sqlparse.sql.Token.to_unicode was removed. + +Enhancements +* Support WHILE loops (issue215, by shenlongxing). +* Better support for CTEs (issue217, by Andrew Tipton). + +Bug Fixes +* Leading whitespaces are now removed when format() is called with + strip_whitespace=True (issue213, by shenlongxing). +* Fix typo in keywords list (issue229, by cbeloni). +* Fix parsing of functions in comparisons (issue230, by saaj). +* Minor bug fixes (issue101). + + +Release 0.1.19 (Mar 07, 2015) +----------------------------- +Bug Fixes +* Fix IndexError when statement contains WITH clauses (issue205). + + +Release 0.1.18 (Oct 25, 2015) +----------------------------- + +Bug Fixes +* Remove universal wheel support, added in 0.1.17 by mistake. + + +Release 0.1.17 (Oct 24, 2015) +----------------------------- + +Enhancements +* Speed up parsing of large SQL statements (pull request: issue201, fixes the + following issues: issue199, issue135, issue62, issue41, by Ryan Wooden). + +Bug Fixes +* Fix another splitter bug regarding DECLARE (issue194). + +Misc +* Packages on PyPI are signed from now on. + + +Release 0.1.16 (Jul 26, 2015) +----------------------------- + +Bug Fixes +* Fix a regression in get_alias() introduced in 0.1.15 (issue185). +* Fix a bug in the splitter regarding DECLARE (issue193). +* sqlformat command line tool doesn't duplicat newlines anymore (issue191). +* Don't mix up MySQL comments starting with hash and MSSQL + temp tables (issue192). +* Statement.get_type() now ignores comments at the beginning of + a statement (issue186). + + +Release 0.1.15 (Apr 15, 2015) +----------------------------- + +Bug Fixes +* Fix a regression for identifiers with square bracktes + notation (issue153, by darikg). +* Add missing SQL types (issue154, issue155, issue156, by jukebox). +* Fix parsing of multi-line comments (issue172, by JacekPliszka). +* Fix parsing of escaped backslashes (issue174, by caseyching). +* Fix parsing of identifiers starting with underscore (issue175). +* Fix misinterpretation of IN keyword (issue183). + +Enhancements +* Improve formatting of HAVING statements. +* Improve parsing of inline comments (issue163). +* Group comments to parent object (issue128, issue160). +* Add double precision builtin (issue169, by darikg). +* Add support for square bracket array indexing (issue170, issue176, + issue177 by darikg). +* Improve grouping of aliased elements (issue167, by darikg). +* Support comments starting with '#' character (issue178). + + +Release 0.1.14 (Nov 30, 2014) +----------------------------- + +Bug Fixes +* Floats in UPDATE statements are now handled correctly (issue145). +* Properly handle string literals in comparisons (issue148, change proposed + by aadis). +* Fix indentation when using tabs (issue146). + +Enhancements +* Improved formatting in list when newlines precede commas (issue140). + + +Release 0.1.13 (Oct 09, 2014) +----------------------------- + +Bug Fixes +* Fix a regression in handling of NULL keywords introduced in 0.1.12. + + +Release 0.1.12 (Sep 20, 2014) +----------------------------- + +Bug Fixes +* Fix handling of NULL keywords in aliased identifiers. +* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller). +* Fix handling of modulo operators without spaces (by gavinwahl). + +Enhancements +* Improve parsing of identifier lists containing placeholders. +* Speed up query parsing of unquoted lines (by Michael Schuller). + + +Release 0.1.11 (Feb 07, 2014) +----------------------------- + +Bug Fixes +* Fix incorrect parsing of string literals containing line breaks (issue118). +* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124, + by Cristian Orellana). +* Improve parsing of string literals in columns. +* Fix parsing and formatting of statements containing EXCEPT keyword. +* Fix Function.get_parameters() (issue126/127, by spigwitmer). + +Enhancements +* Classify DML keywords (issue116, by Victor Hahn). +* Add missing FOREACH keyword. +* Grouping of BEGIN/END blocks. + +Other +* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox + still support it out of the box. + + +Release 0.1.10 (Nov 02, 2013) +----------------------------- + +Bug Fixes +* Removed buffered reading again, it obviously causes wrong parsing in some rare + cases (issue114). +* Fix regression in setup.py introduced 10 months ago (issue115). + +Enhancements +* Improved support for JOINs, by Alexander Beedie. + + +Release 0.1.9 (Sep 28, 2013) +---------------------------- + +Bug Fixes +* Fix an regression introduced in 0.1.5 where sqlparse didn't properly + distinguished between single and double quoted strings when tagging + identifier (issue111). + +Enhancements +* New option to truncate long string literals when formatting. +* Scientific numbers are pares correctly (issue107). +* Support for arithmetic expressions (issue109, issue106; by prudhvi). + + +Release 0.1.8 (Jun 29, 2013) +---------------------------- + +Bug Fixes +* Whitespaces within certain keywords are now allowed (issue97, patch proposed + by xcombelle). + +Enhancements +* Improve parsing of assignments in UPDATE statements (issue90). +* Add STRAIGHT_JOIN statement (by Yago Riveiro). +* Function.get_parameters() now returns the parameter if only one parameter is + given (issue94, by wayne.wuw). +* sqlparse.split() now removes leading and trailing whitespaces from splitted + statements. +* Add USE as keyword token (by mulos). +* Improve parsing of PEP249-style placeholders (issue103). + + +Release 0.1.7 (Apr 06, 2013) +---------------------------- + +Bug Fixes + * Fix Python 3 compatibility of sqlformat script (by Piet Delport). + * Fix parsing of SQL statements that contain binary data (by Alexey + Malyshev). + * Fix a bug where keywords were identified as aliased identifiers in + invalid SQL statements. + * Fix parsing of identifier lists where identifiers are keywords too + (issue10). + +Enhancements + * Top-level API functions now accept encoding keyword to parse + statements in certain encodings more reliable (issue20). + * Improve parsing speed when SQL contains CLOBs or BLOBs (issue86). + * Improve formatting of ORDER BY clauses (issue89). + * Formatter now tries to detect runaway indentations caused by + parsing errors or invalid SQL statements. When re-indenting such + statements the formatter flips back to column 0 before going crazy. + +Other + * Documentation updates. + + +Release 0.1.6 (Jan 01, 2013) +---------------------------- + +sqlparse is now compatible with Python 3 without any patches. The +Python 3 version is generated during install by 2to3. You'll need +distribute to install sqlparse for Python 3. + +Bug Fixes + * Fix parsing error with dollar-quoted procedure bodies (issue83). + +Other + * Documentation updates. + * Test suite now uses tox and py.test. + * py3k fixes (by vthriller). + * py3k fixes in setup.py (by Florian Bauer). + * setup.py now requires distribute (by Florian Bauer). + + +Release 0.1.5 (Nov 13, 2012) +---------------------------- + +Bug Fixes + * Improve handling of quoted identifiers (issue78). + * Improve grouping and formatting of identifiers with operators (issue53). + * Improve grouping and formatting of concatenated strings (issue53). + * Improve handling of varchar() (by Mike Amy). + * Clean up handling of various SQL elements. + * Switch to py.test and clean up tests. + * Several minor fixes. + +Other + * Deprecate sqlparse.SQLParseError. Please use + sqlparse.exceptions.SQLParseError instead. + * Add caching to speed up processing. + * Add experimental filters for token processing. + * Add sqlformat.parsestream (by quest). + + +Release 0.1.4 (Apr 20, 2012) +---------------------------- + +Bug Fixes + * Avoid "stair case" effects when identifiers, functions, + placeholders or keywords are mixed in identifier lists (issue45, + issue49, issue52) and when asterisks are used as operators + (issue58). + * Make keyword detection more restrict (issue47). + * Improve handling of CASE statements (issue46). + * Fix statement splitting when parsing recursive statements (issue57, + thanks to piranna). + * Fix for negative numbers (issue56, thanks to kevinjqiu). + * Pretty format comments in identifier lists (issue59). + * Several minor bug fixes and improvements. + + +Release 0.1.3 (Jul 29, 2011) +---------------------------- + +Bug Fixes + * Improve parsing of floats (thanks to Kris). + * When formatting a statement a space before LIMIT was removed (issue35). + * Fix strip_comments flag (issue38, reported by ooberm...@gmail.com). + * Avoid parsing names as keywords (issue39, reported by djo...@taket.org). + * Make sure identifier lists in subselects are grouped (issue40, + reported by djo...@taket.org). + * Split statements with IF as functions correctly (issue33 and + issue29, reported by charles....@unige.ch). + * Relax detection of keywords, esp. when used as function names + (issue36, nyuhu...@gmail.com). + * Don't treat single characters as keywords (issue32). + * Improve parsing of stand-alone comments (issue26). + * Detection of placeholders in paramterized queries (issue22, + reported by Glyph Lefkowitz). + * Add parsing of MS Access column names with braces (issue27, + reported by frankz...@gmail.com). + +Other + * Replace Django by Flask in App Engine frontend (issue11). + + +Release 0.1.2 (Nov 23, 2010) +---------------------------- + +Bug Fixes + * Fixed incorrect detection of keyword fragments embed in names (issue7, + reported and initial patch by andyboyko). + * Stricter detection of identfier aliases (issue8, reported by estama). + * WHERE grouping consumed closing parenthesis (issue9, reported by estama). + * Fixed an issue with trailing whitespaces (reported by Kris). + * Better detection of escaped single quotes (issue13, reported by + Martin Brochhaus, patch by bluemaro with test case by Dan Carley). + * Ignore identifier in double-quotes when changing cases (issue 21). + * Lots of minor fixes targeting encoding, indentation, statement + parsing and more (issues 12, 14, 15, 16, 18, 19). + * Code cleanup with a pinch of refactoring. + + +Release 0.1.1 (May 6, 2009) +--------------------------- + +Bug Fixes + * Lexers preserves original line breaks (issue1). + * Improved identifier parsing: backtick quotes, wildcards, T-SQL variables + prefixed with @. + * Improved parsing of identifier lists (issue2). + * Recursive recognition of AS (issue4) and CASE. + * Improved support for UPDATE statements. + +Other + * Code cleanup and better test coverage. + + +Release 0.1.0 (Apr 8, 2009) +--------------------------- + * Initial release. diff --git a/CHANGES b/CHANGES deleted file mode 100644 index 374c060..0000000 --- a/CHANGES +++ /dev/null @@ -1,324 +0,0 @@ -Development Version -------------------- - -IMPORTANT: The supported Python versions have changed with this release. -sqlparse 0.2.x supports Python 2.7 and Python >= 3.3. - -Internal Changes -* sqlparse.SQLParseError was removed from top-level module and moved to - sqlparse.exceptions. -* sqlparse.sql.Token.to_unicode was removed. - -Enhancements -* Support WHILE loops (issue215, by shenlongxing). -* Better support for CTEs (issue217, by Andrew Tipton). - -Bug Fixes -* Leading whitespaces are now removed when format() is called with - strip_whitespace=True (issue213, by shenlongxing). -* Fix typo in keywords list (issue229, by cbeloni). -* Fix parsing of functions in comparisons (issue230, by saaj). -* Minor bug fixes (issue101). - - -Release 0.1.19 (Mar 07, 2015) ------------------------------ -Bug Fixes -* Fix IndexError when statement contains WITH clauses (issue205). - - -Release 0.1.18 (Oct 25, 2015) ------------------------------ - -Bug Fixes -* Remove universal wheel support, added in 0.1.17 by mistake. - - -Release 0.1.17 (Oct 24, 2015) ------------------------------ - -Enhancements -* Speed up parsing of large SQL statements (pull request: issue201, fixes the - following issues: issue199, issue135, issue62, issue41, by Ryan Wooden). - -Bug Fixes -* Fix another splitter bug regarding DECLARE (issue194). - -Misc -* Packages on PyPI are signed from now on. - - -Release 0.1.16 (Jul 26, 2015) ------------------------------ - -Bug Fixes -* Fix a regression in get_alias() introduced in 0.1.15 (issue185). -* Fix a bug in the splitter regarding DECLARE (issue193). -* sqlformat command line tool doesn't duplicat newlines anymore (issue191). -* Don't mix up MySQL comments starting with hash and MSSQL - temp tables (issue192). -* Statement.get_type() now ignores comments at the beginning of - a statement (issue186). - - -Release 0.1.15 (Apr 15, 2015) ------------------------------ - -Bug Fixes -* Fix a regression for identifiers with square bracktes - notation (issue153, by darikg). -* Add missing SQL types (issue154, issue155, issue156, by jukebox). -* Fix parsing of multi-line comments (issue172, by JacekPliszka). -* Fix parsing of escaped backslashes (issue174, by caseyching). -* Fix parsing of identifiers starting with underscore (issue175). -* Fix misinterpretation of IN keyword (issue183). - -Enhancements -* Improve formatting of HAVING statements. -* Improve parsing of inline comments (issue163). -* Group comments to parent object (issue128, issue160). -* Add double precision builtin (issue169, by darikg). -* Add support for square bracket array indexing (issue170, issue176, - issue177 by darikg). -* Improve grouping of aliased elements (issue167, by darikg). -* Support comments starting with '#' character (issue178). - - -Release 0.1.14 (Nov 30, 2014) ------------------------------ - -Bug Fixes -* Floats in UPDATE statements are now handled correctly (issue145). -* Properly handle string literals in comparisons (issue148, change proposed - by aadis). -* Fix indentation when using tabs (issue146). - -Enhancements -* Improved formatting in list when newlines precede commas (issue140). - - -Release 0.1.13 (Oct 09, 2014) ------------------------------ - -Bug Fixes -* Fix a regression in handling of NULL keywords introduced in 0.1.12. - - -Release 0.1.12 (Sep 20, 2014) ------------------------------ - -Bug Fixes -* Fix handling of NULL keywords in aliased identifiers. -* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller). -* Fix handling of modulo operators without spaces (by gavinwahl). - -Enhancements -* Improve parsing of identifier lists containing placeholders. -* Speed up query parsing of unquoted lines (by Michael Schuller). - - -Release 0.1.11 (Feb 07, 2014) ------------------------------ - -Bug Fixes -* Fix incorrect parsing of string literals containing line breaks (issue118). -* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124, - by Cristian Orellana). -* Improve parsing of string literals in columns. -* Fix parsing and formatting of statements containing EXCEPT keyword. -* Fix Function.get_parameters() (issue126/127, by spigwitmer). - -Enhancements -* Classify DML keywords (issue116, by Victor Hahn). -* Add missing FOREACH keyword. -* Grouping of BEGIN/END blocks. - -Other -* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox - still support it out of the box. - - -Release 0.1.10 (Nov 02, 2013) ------------------------------ - -Bug Fixes -* Removed buffered reading again, it obviously causes wrong parsing in some rare - cases (issue114). -* Fix regression in setup.py introduced 10 months ago (issue115). - -Enhancements -* Improved support for JOINs, by Alexander Beedie. - - -Release 0.1.9 (Sep 28, 2013) ----------------------------- - -Bug Fixes -* Fix an regression introduced in 0.1.5 where sqlparse didn't properly - distinguished between single and double quoted strings when tagging - identifier (issue111). - -Enhancements -* New option to truncate long string literals when formatting. -* Scientific numbers are pares correctly (issue107). -* Support for arithmetic expressions (issue109, issue106; by prudhvi). - - -Release 0.1.8 (Jun 29, 2013) ----------------------------- - -Bug Fixes -* Whitespaces within certain keywords are now allowed (issue97, patch proposed - by xcombelle). - -Enhancements -* Improve parsing of assignments in UPDATE statements (issue90). -* Add STRAIGHT_JOIN statement (by Yago Riveiro). -* Function.get_parameters() now returns the parameter if only one parameter is - given (issue94, by wayne.wuw). -* sqlparse.split() now removes leading and trailing whitespaces from splitted - statements. -* Add USE as keyword token (by mulos). -* Improve parsing of PEP249-style placeholders (issue103). - - -Release 0.1.7 (Apr 06, 2013) ----------------------------- - -Bug Fixes - * Fix Python 3 compatibility of sqlformat script (by Piet Delport). - * Fix parsing of SQL statements that contain binary data (by Alexey - Malyshev). - * Fix a bug where keywords were identified as aliased identifiers in - invalid SQL statements. - * Fix parsing of identifier lists where identifiers are keywords too - (issue10). - -Enhancements - * Top-level API functions now accept encoding keyword to parse - statements in certain encodings more reliable (issue20). - * Improve parsing speed when SQL contains CLOBs or BLOBs (issue86). - * Improve formatting of ORDER BY clauses (issue89). - * Formatter now tries to detect runaway indentations caused by - parsing errors or invalid SQL statements. When re-indenting such - statements the formatter flips back to column 0 before going crazy. - -Other - * Documentation updates. - - -Release 0.1.6 (Jan 01, 2013) ----------------------------- - -sqlparse is now compatible with Python 3 without any patches. The -Python 3 version is generated during install by 2to3. You'll need -distribute to install sqlparse for Python 3. - -Bug Fixes - * Fix parsing error with dollar-quoted procedure bodies (issue83). - -Other - * Documentation updates. - * Test suite now uses tox and py.test. - * py3k fixes (by vthriller). - * py3k fixes in setup.py (by Florian Bauer). - * setup.py now requires distribute (by Florian Bauer). - - -Release 0.1.5 (Nov 13, 2012) ----------------------------- - -Bug Fixes - * Improve handling of quoted identifiers (issue78). - * Improve grouping and formatting of identifiers with operators (issue53). - * Improve grouping and formatting of concatenated strings (issue53). - * Improve handling of varchar() (by Mike Amy). - * Clean up handling of various SQL elements. - * Switch to py.test and clean up tests. - * Several minor fixes. - -Other - * Deprecate sqlparse.SQLParseError. Please use - sqlparse.exceptions.SQLParseError instead. - * Add caching to speed up processing. - * Add experimental filters for token processing. - * Add sqlformat.parsestream (by quest). - - -Release 0.1.4 (Apr 20, 2012) ----------------------------- - -Bug Fixes - * Avoid "stair case" effects when identifiers, functions, - placeholders or keywords are mixed in identifier lists (issue45, - issue49, issue52) and when asterisks are used as operators - (issue58). - * Make keyword detection more restrict (issue47). - * Improve handling of CASE statements (issue46). - * Fix statement splitting when parsing recursive statements (issue57, - thanks to piranna). - * Fix for negative numbers (issue56, thanks to kevinjqiu). - * Pretty format comments in identifier lists (issue59). - * Several minor bug fixes and improvements. - - -Release 0.1.3 (Jul 29, 2011) ----------------------------- - -Bug Fixes - * Improve parsing of floats (thanks to Kris). - * When formatting a statement a space before LIMIT was removed (issue35). - * Fix strip_comments flag (issue38, reported by ooberm...@gmail.com). - * Avoid parsing names as keywords (issue39, reported by djo...@taket.org). - * Make sure identifier lists in subselects are grouped (issue40, - reported by djo...@taket.org). - * Split statements with IF as functions correctly (issue33 and - issue29, reported by charles....@unige.ch). - * Relax detection of keywords, esp. when used as function names - (issue36, nyuhu...@gmail.com). - * Don't treat single characters as keywords (issue32). - * Improve parsing of stand-alone comments (issue26). - * Detection of placeholders in paramterized queries (issue22, - reported by Glyph Lefkowitz). - * Add parsing of MS Access column names with braces (issue27, - reported by frankz...@gmail.com). - -Other - * Replace Django by Flask in App Engine frontend (issue11). - - -Release 0.1.2 (Nov 23, 2010) ----------------------------- - -Bug Fixes - * Fixed incorrect detection of keyword fragments embed in names (issue7, - reported and initial patch by andyboyko). - * Stricter detection of identfier aliases (issue8, reported by estama). - * WHERE grouping consumed closing parenthesis (issue9, reported by estama). - * Fixed an issue with trailing whitespaces (reported by Kris). - * Better detection of escaped single quotes (issue13, reported by - Martin Brochhaus, patch by bluemaro with test case by Dan Carley). - * Ignore identifier in double-quotes when changing cases (issue 21). - * Lots of minor fixes targeting encoding, indentation, statement - parsing and more (issues 12, 14, 15, 16, 18, 19). - * Code cleanup with a pinch of refactoring. - - -Release 0.1.1 (May 6, 2009) ---------------------------- - -Bug Fixes - * Lexers preserves original line breaks (issue1). - * Improved identifier parsing: backtick quotes, wildcards, T-SQL variables - prefixed with @. - * Improved parsing of identifier lists (issue2). - * Recursive recognition of AS (issue4) and CASE. - * Improved support for UPDATE statements. - -Other - * Code cleanup and better test coverage. - - -Release 0.1.0 (Apr 8, 2009) ---------------------------- - * Initial release. diff --git a/COPYING b/COPYING deleted file mode 100644 index 7b158da..0000000 --- a/COPYING +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2009, Andi Albrecht -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the authors nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7b158da --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2009, Andi Albrecht +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the authors nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file -- cgit v1.2.1 From e006e1689d5bb9ad614b5155e2ff17f7e51b83d8 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 18:45:41 -0700 Subject: Update compat and utils --- sqlparse/compat.py | 23 ++++++++++---- sqlparse/utils.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/sqlparse/compat.py b/sqlparse/compat.py index 6b26384..334883b 100644 --- a/sqlparse/compat.py +++ b/sqlparse/compat.py @@ -14,29 +14,40 @@ PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] == 3 if PY3: + def u(s): + return str(s) + + + range = range text_type = str string_types = (str,) from io import StringIO - def u(s): - return str(s) elif PY2: + def u(s, encoding=None): + encoding = encoding or 'unicode-escape' + try: + return unicode(s) + except UnicodeDecodeError: + return unicode(s, encoding) + + + range = xrange text_type = unicode string_types = (basestring,) - from StringIO import StringIO # flake8: noqa - - def u(s): - return unicode(s) + from StringIO import StringIO # Directly copied from six: def with_metaclass(meta, *bases): """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy # metaclass for one level of class instantiation that replaces itself with # the actual metaclass. class metaclass(meta): def __new__(cls, name, this_bases, d): return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 7db9a96..f2372c2 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -1,16 +1,13 @@ -''' -Created on 17/05/2012 - -@author: piranna -''' - +import itertools import re -from collections import OrderedDict +from collections import OrderedDict, deque +from contextlib import contextmanager class Cache(OrderedDict): """Cache with LRU algorithm using an OrderedDict as basis """ + def __init__(self, maxsize=100): OrderedDict.__init__(self) @@ -113,3 +110,80 @@ def split_unquoted_newlines(text): else: outputlines[-1] += line return outputlines + + +def remove_quotes(val): + """Helper that removes surrounding quotes from strings.""" + if val is None: + return + if val[0] in ('"', "'") and val[0] == val[-1]: + val = val[1:-1] + return val + + +def recurse(*cls): + def wrap(f): + def wrapped_f(tlist): + for sgroup in tlist.get_sublists(): + if not isinstance(sgroup, cls): + wrapped_f(sgroup) + f(tlist) + + return wrapped_f + + return wrap + + +def imt(token, i=None, m=None, t=None): + """Aid function to refactor comparisons for Instance, Match and TokenType + Aid fun + :param token: + :param i: Class or Tuple/List of Classes + :param m: Tuple of TokenType & Value. Can be list of Tuple for multiple + :param t: TokenType or Tuple/List of TokenTypes + :return: bool + """ + t = (t,) if t and not isinstance(t, (list, tuple)) else t + m = (m,) if m and not isinstance(m, (list,)) else m + + if token is None: + return False + elif i is not None and isinstance(token, i): + return True + elif m is not None and any((token.match(*x) for x in m)): + return True + elif t is not None and token.ttype in t: + return True + else: + return False + + +def find_matching(tlist, token, M1, M2): + idx = tlist.token_index(token) + depth = 0 + for token in tlist[idx:]: + if token.match(*M1): + depth += 1 + elif token.match(*M2): + depth -= 1 + if depth == 0: + return token + + +def consume(iterator, n): + """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + deque(itertools.islice(iterator, n), maxlen=0) + + +@contextmanager +def offset(filter_, n=0): + filter_.offset += n + yield + filter_.offset -= n + + +@contextmanager +def indent(filter_, n=1): + filter_.indent += n + yield + filter_.indent -= n -- cgit v1.2.1 From d725e0c81afc6907ad5fec71a53f724fa0e3f5c3 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 19:04:21 -0700 Subject: update sql --- sqlparse/sql.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index f357572..8a485f3 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -77,7 +77,7 @@ class Token(object): if regex: if isinstance(values, string_types): - values = set([values]) + values = {values} if self.ttype is T.Keyword: values = set(re.compile(v, re.IGNORECASE) for v in values) @@ -150,7 +150,7 @@ class TokenList(Token): if tokens is None: tokens = [] self.tokens = tokens - Token.__init__(self, None, self._to_string()) + super(TokenList, self).__init__(None, self.__str__()) def __unicode__(self): return self._to_string() @@ -213,12 +213,12 @@ class TokenList(Token): else: yield token -# def __iter__(self): -# return self -# -# def next(self): -# for token in self.tokens: -# yield token + # def __iter__(self): + # return self + # + # def next(self): + # for token in self.tokens: + # yield token def is_group(self): return True -- cgit v1.2.1 From dbf8a624e091e1da24a7a90c4ff59d88ce816b8f Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 19:17:47 -0700 Subject: adding powerful _token_matching and imt helper --- sqlparse/sql.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 8a485f3..ccb6924 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -7,6 +7,7 @@ import sys from sqlparse import tokens as T from sqlparse.compat import string_types, u +from sqlparse.utils import imt class Token(object): @@ -232,6 +233,27 @@ class TokenList(Token): def _groupable_tokens(self): return self.tokens + def _token_matching(self, funcs, start=0, end=None, reverse=False): + """next token that match functions""" + if start is None: + return None + + if not isinstance(start, int): + start = self.token_index(start) + 1 + + if not isinstance(funcs, (list, tuple)): + funcs = (funcs,) + + if reverse: + iterable = iter(reversed(self.tokens[end:start - 1])) + else: + iterable = self.tokens[start:end] + + for token in iterable: + for func in funcs: + if func(token): + return token + def token_first(self, ignore_whitespace=True, ignore_comments=False): """Returns the first child token. @@ -244,10 +266,14 @@ class TokenList(Token): for token in self.tokens: if ignore_whitespace and token.is_whitespace(): continue - if ignore_comments and isinstance(token, Comment): + if ignore_comments and imt(token, i=Comment): continue return token + def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): + funcs = lambda tk: imt(tk, i, m, t) + return self._token_matching(funcs, idx, end) + def token_next_by_instance(self, idx, clss, end=None): """Returns the next token matching a class. @@ -403,7 +429,7 @@ class TokenList(Token): # "name alias" or "complicated column expression alias" if len(self.tokens) > 2 \ - and self.token_next_by_type(0, T.Whitespace) is not None: + and self.token_next_by_type(0, T.Whitespace) is not None: return self._get_first_name(reverse=True) return None @@ -677,11 +703,9 @@ class Function(TokenList): """Return a list of parameters.""" parenthesis = self.tokens[-1] for t in parenthesis.tokens: - if isinstance(t, IdentifierList): + if imt(t, i=IdentifierList): return t.get_identifiers() - elif (isinstance(t, Identifier) or - isinstance(t, Function) or - t.ttype in T.Literal): + elif imt(t, i=(Function, Identifier), t=T.Literal): return [t, ] return [] -- cgit v1.2.1 From 5a1830554f9c1d6b626f57fd88c19c6f7063b434 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 19:42:08 -0700 Subject: generalize group_tokens for more use cases --- sqlparse/engine/grouping.py | 14 ++++---------- sqlparse/sql.py | 34 +++++++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 982488b..ab519f0 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -422,19 +422,13 @@ def group_order(tlist): def align_comments(tlist): [align_comments(sgroup) for sgroup in tlist.get_sublists()] - idx = 0 - token = tlist.token_next_by_instance(idx, sql.Comment) + token = tlist.token_next_by(i=sql.Comment) while token: before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): - grp = tlist.tokens_between(before, token)[1:] - before.tokens.extend(grp) - for t in grp: - tlist.tokens.remove(t) - idx = tlist.token_index(before) + 1 - else: - idx = tlist.token_index(token) + 1 - token = tlist.token_next_by_instance(idx, sql.Comment) + tokens = tlist.tokens_between(before, token) + token = tlist.group_tokens(sql.TokenList, tokens, extend=True) + token = tlist.token_next_by(i=sql.Comment, idx=token) def group(tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index ccb6924..a9884a5 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -388,20 +388,36 @@ class TokenList(Token): start_idx = self.token_index(start) return self.tokens[start_idx:end_idx] - def group_tokens(self, grp_cls, tokens, ignore_ws=False): + def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False): """Replace tokens by an instance of *grp_cls*.""" - idx = self.token_index(tokens[0]) if ignore_ws: while tokens and tokens[-1].is_whitespace(): tokens = tokens[:-1] - for t in tokens: - self.tokens.remove(t) - grp = grp_cls(tokens) + + left = tokens[0] + idx = self.token_index(left) + + if extend: + if not isinstance(left, grp_cls): + grp = grp_cls([left]) + self.tokens.remove(left) + self.tokens.insert(idx, grp) + left = grp + left.parent = self + tokens = tokens[1:] + left.tokens.extend(tokens) + left.value = left.__str__() + + else: + left = grp_cls(tokens) + left.parent = self + self.tokens.insert(idx, left) + for token in tokens: - token.parent = grp - grp.parent = self - self.tokens.insert(idx, grp) - return grp + token.parent = left + self.tokens.remove(token) + + return left def insert_before(self, where, token): """Inserts *token* before *where*.""" -- cgit v1.2.1 From f26719dc8d2c9cf4bf85501bb68cc4ed3f4da86d Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 19:53:54 -0700 Subject: Introduce @recurse to refactor recursion --- sqlparse/engine/grouping.py | 18 +++++++----------- sqlparse/utils.py | 5 +++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ab519f0..df967c3 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -4,6 +4,7 @@ import itertools from sqlparse import sql from sqlparse import tokens as T +from sqlparse.utils import recurse def _group_left_right(tlist, ttype, value, cls, @@ -229,9 +230,8 @@ def group_identifier(tlist): token = _next_token(tlist, idx) +@recurse(sql.IdentifierList) def group_identifier_list(tlist): - [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.IdentifierList)] # Allowed list items fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function, sql.Case)), @@ -327,9 +327,8 @@ def group_brackets(tlist): token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) +@recurse(sql.Comment) def group_comments(tlist): - [group_comments(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.Comment)] idx = 0 token = tlist.token_next_by_type(idx, T.Comment) while token: @@ -348,9 +347,8 @@ def group_comments(tlist): token = tlist.token_next_by_type(idx, T.Comment) +@recurse(sql.Where) def group_where(tlist): - [group_where(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.Where)] idx = 0 token = tlist.token_next_match(idx, T.Keyword, 'WHERE') stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING') @@ -368,10 +366,9 @@ def group_where(tlist): token = tlist.token_next_match(idx, T.Keyword, 'WHERE') +@recurse(sql.Identifier, sql.Function, sql.Case) def group_aliased(tlist): clss = (sql.Identifier, sql.Function, sql.Case) - [group_aliased(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, clss)] idx = 0 token = tlist.token_next_by_instance(idx, clss) while token: @@ -390,9 +387,8 @@ def group_typecasts(tlist): _group_left_right(tlist, T.Punctuation, '::', sql.Identifier) +@recurse(sql.Function) def group_functions(tlist): - [group_functions(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.Function)] idx = 0 token = tlist.token_next_by_type(idx, T.Name) while token: @@ -420,8 +416,8 @@ def group_order(tlist): token = tlist.token_next_by_type(idx, T.Keyword.Order) +@recurse() def align_comments(tlist): - [align_comments(sgroup) for sgroup in tlist.get_sublists()] token = tlist.token_next_by(i=sql.Comment) while token: before = tlist.token_prev(tlist.token_index(token)) diff --git a/sqlparse/utils.py b/sqlparse/utils.py index f2372c2..66dd8bc 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -122,6 +122,11 @@ def remove_quotes(val): def recurse(*cls): + """Function decorator to help with recursion + + :param cls: Classes to not recurse over + :return: function + """ def wrap(f): def wrapped_f(tlist): for sgroup in tlist.get_sublists(): -- cgit v1.2.1 From 4364b250c6b0632e7a39f8153e864a7f3d63833e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 20:03:20 -0700 Subject: Add group matching M_tokens and refactor group matching remove slots in subclasses --- sqlparse/engine/grouping.py | 112 +++++++++----------------------------------- sqlparse/sql.py | 34 ++++++-------- sqlparse/utils.py | 2 +- 3 files changed, 38 insertions(+), 110 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index df967c3..a34706f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -4,7 +4,7 @@ import itertools from sqlparse import sql from sqlparse import tokens as T -from sqlparse.utils import recurse +from sqlparse.utils import recurse, imt, find_matching def _group_left_right(tlist, ttype, value, cls, @@ -47,68 +47,36 @@ def _group_left_right(tlist, ttype, value, cls, ttype, value) -def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value): - depth = 1 - for tok in tlist.tokens[idx:]: - if tok.match(start_ttype, start_value): - depth += 1 - elif tok.match(end_ttype, end_value): - depth -= 1 - if depth == 1: - return tok - return None - - -def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, - cls, include_semicolon=False, recurse=False): - - [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, - cls, include_semicolon) for sgroup in tlist.get_sublists() - if recurse] - if isinstance(tlist, cls): - idx = 1 - else: - idx = 0 - token = tlist.token_next_match(idx, start_ttype, start_value) +def _group_matching(tlist, cls): + """Groups Tokens that have beginning and end. ie. parenthesis, brackets..""" + idx = 1 if imt(tlist, i=cls) else 0 + + token = tlist.token_next_by(m=cls.M_OPEN, idx=idx) while token: - tidx = tlist.token_index(token) - end = _find_matching(tidx, tlist, start_ttype, start_value, - end_ttype, end_value) - if end is None: - idx = tidx + 1 - else: - if include_semicolon: - next_ = tlist.token_next(tlist.token_index(end)) - if next_ and next_.match(T.Punctuation, ';'): - end = next_ - group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) - _group_matching(group, start_ttype, start_value, - end_ttype, end_value, cls, include_semicolon) - idx = tlist.token_index(group) + 1 - token = tlist.token_next_match(idx, start_ttype, start_value) + end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) + if end is not None: + token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + _group_matching(token, cls) + token = tlist.token_next_by(m=cls.M_OPEN, idx=token) def group_if(tlist): - _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True) + _group_matching(tlist, sql.If) def group_for(tlist): - _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', - sql.For, True) + _group_matching(tlist, sql.For) def group_foreach(tlist): - _group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP', - sql.For, True) + _group_matching(tlist, sql.For) def group_begin(tlist): - _group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END', - sql.Begin, True) + _group_matching(tlist, sql.Begin) def group_as(tlist): - def _right_valid(token): # Currently limited to DML/DDL. Maybe additional more non SQL reserved # keywords should appear here (see issue8). @@ -130,7 +98,6 @@ def group_assignment(tlist): def group_comparison(tlist): - def _parts_valid(token): return (token.ttype in (T.String.Symbol, T.String.Single, T.Name, T.Number, T.Number.Float, @@ -140,13 +107,13 @@ def group_comparison(tlist): sql.Function)) or (token.ttype is T.Keyword and token.value.upper() in ['NULL', ])) + _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison, check_left=_parts_valid, check_right=_parts_valid) def group_case(tlist): - _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case, - include_semicolon=True, recurse=True) + _group_matching(tlist, sql.Case) def group_identifier(tlist): @@ -222,7 +189,7 @@ def group_identifier(tlist): and (isinstance(identifier_tokens[0], (sql.Function, sql.Parenthesis)) or identifier_tokens[0].ttype in ( - T.Literal.Number.Integer, T.Literal.Number.Float))): + T.Literal.Number.Integer, T.Literal.Number.Float))): group = tlist.group_tokens(sql.Identifier, identifier_tokens) idx = tlist.token_index(group, start=idx) + 1 else: @@ -284,47 +251,11 @@ def group_identifier_list(tlist): def group_brackets(tlist): - """Group parentheses () or square brackets [] - - This is just like _group_matching, but complicated by the fact that - round brackets can contain square bracket groups and vice versa - """ - - if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)): - idx = 1 - else: - idx = 0 - - # Find the first opening bracket - token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) - - while token: - start_val = token.value # either '(' or '[' - if start_val == '(': - end_val = ')' - group_class = sql.Parenthesis - else: - end_val = ']' - group_class = sql.SquareBrackets - - tidx = tlist.token_index(token) - - # Find the corresponding closing bracket - end = _find_matching(tidx, tlist, T.Punctuation, start_val, - T.Punctuation, end_val) - - if end is None: - idx = tidx + 1 - else: - group = tlist.group_tokens(group_class, - tlist.tokens_between(token, end)) + _group_matching(tlist, sql.SquareBrackets) - # Check for nested bracket groups within this group - group_brackets(group) - idx = tlist.token_index(group) + 1 - # Find the next opening bracket - token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) +def group_parenthesis(tlist): + _group_matching(tlist, sql.Parenthesis) @recurse(sql.Comment) @@ -431,6 +362,7 @@ def group(tlist): for func in [ group_comments, group_brackets, + group_parenthesis, group_functions, group_where, group_case, diff --git a/sqlparse/sql.py b/sqlparse/sql.py index a9884a5..777d582 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -552,8 +552,6 @@ class Identifier(TokenList): Identifiers may have aliases or typecasts. """ - __slots__ = ('value', 'ttype', 'tokens') - def is_wildcard(self): """Return ``True`` if this identifier contains a wildcard.""" token = self.token_next_by_type(0, T.Wildcard) @@ -588,8 +586,6 @@ class Identifier(TokenList): class IdentifierList(TokenList): """A list of :class:`~sqlparse.sql.Identifier`\'s.""" - __slots__ = ('value', 'ttype', 'tokens') - def get_identifiers(self): """Returns the identifiers. @@ -602,7 +598,8 @@ class IdentifierList(TokenList): class Parenthesis(TokenList): """Tokens between parenthesis.""" - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Punctuation, '(') + M_CLOSE = (T.Punctuation, ')') @property def _groupable_tokens(self): @@ -611,8 +608,8 @@ class Parenthesis(TokenList): class SquareBrackets(TokenList): """Tokens between square brackets""" - - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Punctuation, '[') + M_CLOSE = (T.Punctuation, ']') @property def _groupable_tokens(self): @@ -621,22 +618,22 @@ class SquareBrackets(TokenList): class Assignment(TokenList): """An assignment like 'var := val;'""" - __slots__ = ('value', 'ttype', 'tokens') class If(TokenList): """An 'if' clause with possible 'else if' or 'else' parts.""" - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Keyword, 'IF') + M_CLOSE = (T.Keyword, 'END IF') class For(TokenList): """A 'FOR' loop.""" - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Keyword, ('FOR', 'FOREACH')) + M_CLOSE = (T.Keyword, 'END LOOP') class Comparison(TokenList): """A comparison used for example in WHERE clauses.""" - __slots__ = ('value', 'ttype', 'tokens') @property def left(self): @@ -649,7 +646,6 @@ class Comparison(TokenList): class Comment(TokenList): """A comment.""" - __slots__ = ('value', 'ttype', 'tokens') def is_multiline(self): return self.tokens and self.tokens[0].ttype == T.Comment.Multiline @@ -657,13 +653,15 @@ class Comment(TokenList): class Where(TokenList): """A WHERE clause.""" - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Keyword, 'WHERE') + M_CLOSE = (T.Keyword, + ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')) class Case(TokenList): """A CASE statement with one or more WHEN and possibly an ELSE part.""" - - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Keyword, 'CASE') + M_CLOSE = (T.Keyword, 'END') def get_cases(self): """Returns a list of 2-tuples (condition, value). @@ -713,8 +711,6 @@ class Case(TokenList): class Function(TokenList): """A function or procedure call.""" - __slots__ = ('value', 'ttype', 'tokens') - def get_parameters(self): """Return a list of parameters.""" parenthesis = self.tokens[-1] @@ -728,5 +724,5 @@ class Function(TokenList): class Begin(TokenList): """A BEGIN/END block.""" - - __slots__ = ('value', 'ttype', 'tokens') + M_OPEN = (T.Keyword, 'BEGIN') + M_CLOSE = (T.Keyword, 'END') diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 66dd8bc..90acb5c 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -166,7 +166,7 @@ def imt(token, i=None, m=None, t=None): def find_matching(tlist, token, M1, M2): idx = tlist.token_index(token) depth = 0 - for token in tlist[idx:]: + for token in tlist.tokens[idx:]: if token.match(*M1): depth += 1 elif token.match(*M2): -- cgit v1.2.1 From b5176ccebcb5f16913aa87e514e5605515ce1471 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 20:38:04 -0700 Subject: group all the one-offs --- sqlparse/engine/grouping.py | 52 ++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index a34706f..6e0ae91 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -280,21 +280,18 @@ def group_comments(tlist): @recurse(sql.Where) def group_where(tlist): - idx = 0 - token = tlist.token_next_match(idx, T.Keyword, 'WHERE') - stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING') + token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - tidx = tlist.token_index(token) - end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords) + end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token) + if end is None: - end = tlist._groupable_tokens[-1] + tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) else: - end = tlist.tokens[tlist.token_index(end) - 1] - group = tlist.group_tokens(sql.Where, - tlist.tokens_between(token, end), - ignore_ws=True) - idx = tlist.token_index(group) - token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + tokens = tlist.tokens_between( + token, tlist.tokens[tlist.token_index(end) - 1]) + + token = tlist.group_tokens(sql.Where, tokens) + token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) @recurse(sql.Identifier, sql.Function, sql.Case) @@ -320,38 +317,31 @@ def group_typecasts(tlist): @recurse(sql.Function) def group_functions(tlist): - idx = 0 - token = tlist.token_next_by_type(idx, T.Name) + token = tlist.token_next_by(t=T.Name) while token: next_ = tlist.token_next(token) - if not isinstance(next_, sql.Parenthesis): - idx = tlist.token_index(token) + 1 - else: - func = tlist.group_tokens(sql.Function, - tlist.tokens_between(token, next_)) - idx = tlist.token_index(func) + 1 - token = tlist.token_next_by_type(idx, T.Name) + if imt(next_, i=sql.Parenthesis): + tokens = tlist.tokens_between(token, next_) + token = tlist.group_tokens(sql.Function, tokens) + token = tlist.token_next_by(t=T.Name, idx=token) def group_order(tlist): - idx = 0 - token = tlist.token_next_by_type(idx, T.Keyword.Order) + """Group together Identifier and Asc/Desc token""" + token = tlist.token_next_by(t=T.Keyword.Order) while token: prev = tlist.token_prev(token) - if isinstance(prev, sql.Identifier): - ido = tlist.group_tokens(sql.Identifier, - tlist.tokens_between(prev, token)) - idx = tlist.token_index(ido) + 1 - else: - idx = tlist.token_index(token) + 1 - token = tlist.token_next_by_type(idx, T.Keyword.Order) + if imt(prev, i=sql.Identifier, t=T.Number): + tokens = tlist.tokens_between(prev, token) + token = tlist.group_tokens(sql.Identifier, tokens) + token = tlist.token_next_by(t=T.Keyword.Order, idx=token) @recurse() def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment) while token: - before = tlist.token_prev(tlist.token_index(token)) + before = tlist.token_prev(token) if isinstance(before, sql.TokenList): tokens = tlist.tokens_between(before, token) token = tlist.group_tokens(sql.TokenList, tokens, extend=True) -- cgit v1.2.1 From 015bf8393286301ca0d4dfd087b4ed305fa71d4a Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 21:09:09 -0700 Subject: refactor _group_left_right --- sqlparse/engine/grouping.py | 103 +++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6e0ae91..ec3cd86 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -6,45 +6,33 @@ from sqlparse import sql from sqlparse import tokens as T from sqlparse.utils import recurse, imt, find_matching +M_ROLE = (T.Keyword, ('null', 'role')) +M_SEMICOLON = (T.Punctuation, ';') -def _group_left_right(tlist, ttype, value, cls, - check_right=lambda t: True, - check_left=lambda t: True, - include_semicolon=False): - [_group_left_right(sgroup, ttype, value, cls, check_right, check_left, - include_semicolon) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, cls)] - idx = 0 - token = tlist.token_next_match(idx, ttype, value) +T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float) +T_STRING = (T.String, T.String.Single, T.String.Symbol) +T_NAME = (T.Name, T.Name.Placeholder) + + +def _group_left_right(tlist, m, cls, + valid_left=lambda t: t is not None, + valid_right=lambda t: t is not None, + semicolon=False): + """Groups together tokens that are joined by a middle token. ie. x < y""" + [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon) + for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] + + token = tlist.token_next_by(m=m) while token: - right = tlist.token_next(tlist.token_index(token)) - left = tlist.token_prev(tlist.token_index(token)) - if right is None or not check_right(right): - token = tlist.token_next_match(tlist.token_index(token) + 1, - ttype, value) - elif left is None or not check_left(left): - token = tlist.token_next_match(tlist.token_index(token) + 1, - ttype, value) - else: - if include_semicolon: - sright = tlist.token_next_match(tlist.token_index(right), - T.Punctuation, ';') - if sright is not None: - # only overwrite "right" if a semicolon is actually - # present. - right = sright - tokens = tlist.tokens_between(left, right)[1:] - if not isinstance(left, cls): - new = cls([left]) - new_idx = tlist.token_index(left) - tlist.tokens.remove(left) - tlist.tokens.insert(new_idx, new) - left = new - left.tokens.extend(tokens) - for t in tokens: - tlist.tokens.remove(t) - token = tlist.token_next_match(tlist.token_index(left) + 1, - ttype, value) + left, right = tlist.token_prev(token), tlist.token_next(token) + + if valid_left(left) and valid_right(right): + if semicolon: + sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) + right = sright or right # only overwrite if a semicolon present. + tokens = tlist.tokens_between(left, right) + token = tlist.group_tokens(cls, tokens, extend=True) + token = tlist.token_next_by(m=m, idx=token) def _group_matching(tlist, cls): @@ -77,39 +65,26 @@ def group_begin(tlist): def group_as(tlist): - def _right_valid(token): - # Currently limited to DML/DDL. Maybe additional more non SQL reserved - # keywords should appear here (see issue8). - return token.ttype not in (T.DML, T.DDL) - - def _left_valid(token): - if token.ttype is T.Keyword and token.value in ('NULL',): - return True - return token.ttype is not T.Keyword - - _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier, - check_right=_right_valid, - check_left=_left_valid) + lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.value == 'NULL' + rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL)) + _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier, + valid_left=lfunc, valid_right=rfunc) def group_assignment(tlist): - _group_left_right(tlist, T.Assignment, ':=', sql.Assignment, - include_semicolon=True) + _group_left_right(tlist, (T.Assignment, ':='), sql.Assignment, + semicolon=True) def group_comparison(tlist): - def _parts_valid(token): - return (token.ttype in (T.String.Symbol, T.String.Single, - T.Name, T.Number, T.Number.Float, - T.Number.Integer, T.Literal, - T.Literal.Number.Integer, T.Name.Placeholder) - or isinstance(token, (sql.Identifier, sql.Parenthesis, - sql.Function)) - or (token.ttype is T.Keyword - and token.value.upper() in ['NULL', ])) + I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier) + T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME + + func = lambda tk: imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or ( + imt(tk, t=T.Keyword) and tk.value.upper() == 'NULL') - _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison, - check_left=_parts_valid, check_right=_parts_valid) + _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, + valid_left=func, valid_right=func) def group_case(tlist): @@ -312,7 +287,7 @@ def group_aliased(tlist): def group_typecasts(tlist): - _group_left_right(tlist, T.Punctuation, '::', sql.Identifier) + _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) @recurse(sql.Function) -- cgit v1.2.1 From e4bf805cad1a41b989b59ac58f526c6bc9b0e338 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 21:39:54 -0700 Subject: refactor remove quotes --- sqlparse/sql.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 777d582..673e452 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -7,7 +7,7 @@ import sys from sqlparse import tokens as T from sqlparse.compat import string_types, u -from sqlparse.utils import imt +from sqlparse.utils import imt, remove_quotes class Token(object): @@ -185,14 +185,6 @@ class TokenList(Token): if (token.is_group() and (max_depth is None or depth < max_depth)): token._pprint_tree(max_depth, depth + 1) - def _remove_quotes(self, val): - """Helper that removes surrounding quotes from strings.""" - if not val: - return val - if val[0] in ('"', '\'') and val[-1] == val[0]: - val = val[1:-1] - return val - def get_token_at_offset(self, offset): """Returns the token that is on position offset.""" idx = 0 @@ -482,7 +474,7 @@ class TokenList(Token): prev_ = self.token_prev(self.token_index(dot)) if prev_ is None: # something must be verry wrong here.. return None - return self._remove_quotes(prev_.value) + return remove_quotes(prev_.value) def _get_first_name(self, idx=None, reverse=False, keywords=False): """Returns the name of the first token with a name""" @@ -499,7 +491,7 @@ class TokenList(Token): for tok in tokens: if tok.ttype in types: - return self._remove_quotes(tok.value) + return remove_quotes(tok.value) elif isinstance(tok, Identifier) or isinstance(tok, Function): return tok.get_name() return None -- cgit v1.2.1 From 2d0c6d6d33e4bb974037f0eeb68d6a05262e4373 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 21:40:25 -0700 Subject: refactor identifier list and alias grouping --- sqlparse/engine/grouping.py | 88 ++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 62 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ec3cd86..11c2b38 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -8,6 +8,7 @@ from sqlparse.utils import recurse, imt, find_matching M_ROLE = (T.Keyword, ('null', 'role')) M_SEMICOLON = (T.Punctuation, ';') +M_COMMA = (T.Punctuation, ',') T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float) T_STRING = (T.String, T.String.Single, T.String.Symbol) @@ -174,55 +175,21 @@ def group_identifier(tlist): @recurse(sql.IdentifierList) def group_identifier_list(tlist): - # Allowed list items - fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function, - sql.Case)), - lambda t: t.is_whitespace(), - lambda t: t.ttype == T.Name, - lambda t: t.ttype == T.Wildcard, - lambda t: t.match(T.Keyword, 'null'), - lambda t: t.match(T.Keyword, 'role'), - lambda t: t.ttype == T.Number.Integer, - lambda t: t.ttype == T.String.Single, - lambda t: t.ttype == T.Name.Placeholder, - lambda t: t.ttype == T.Keyword, - lambda t: isinstance(t, sql.Comparison), - lambda t: isinstance(t, sql.Comment), - lambda t: t.ttype == T.Comment.Multiline, - ] - tcomma = tlist.token_next_match(0, T.Punctuation, ',') - start = None - while tcomma is not None: - # Go back one idx to make sure to find the correct tcomma - idx = tlist.token_index(tcomma) - before = tlist.token_prev(idx) - after = tlist.token_next(idx) - # Check if the tokens around tcomma belong to a list - bpassed = apassed = False - for func in fend1_funcs: - if before is not None and func(before): - bpassed = True - if after is not None and func(after): - apassed = True - if not bpassed or not apassed: - # Something's wrong here, skip ahead to next "," - start = None - tcomma = tlist.token_next_match(idx + 1, - T.Punctuation, ',') - else: - if start is None: - start = before - after_idx = tlist.token_index(after, start=idx) - next_ = tlist.token_next(after_idx) - if next_ is None or not next_.match(T.Punctuation, ','): - # Reached the end of the list - tokens = tlist.tokens_between(start, after) - group = tlist.group_tokens(sql.IdentifierList, tokens) - start = None - tcomma = tlist.token_next_match(tlist.token_index(group) + 1, - T.Punctuation, ',') - else: - tcomma = next_ + I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, + sql.IdentifierList) # sql.Operation + T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + + (T.Keyword, T.Comment, T.Wildcard)) + + func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) + token = tlist.token_next_by(m=M_COMMA) + + while token: + before, after = tlist.token_prev(token), tlist.token_next(token) + + if func(before) and func(after): + tokens = tlist.tokens_between(before, after) + token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) + token = tlist.token_next_by(m=M_COMMA, idx=token) def group_brackets(tlist): @@ -269,21 +236,18 @@ def group_where(tlist): token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) -@recurse(sql.Identifier, sql.Function, sql.Case) +@recurse() def group_aliased(tlist): - clss = (sql.Identifier, sql.Function, sql.Case) - idx = 0 - token = tlist.token_next_by_instance(idx, clss) + I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, + ) # sql.Operation) + + token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tlist.token_index(token)) - if next_ is not None and isinstance(next_, clss): - if not next_.value.upper().startswith('VARCHAR'): - grp = tlist.tokens_between(token, next_)[1:] - token.tokens.extend(grp) - for t in grp: - tlist.tokens.remove(t) - idx = tlist.token_index(token) + 1 - token = tlist.token_next_by_instance(idx, clss) + next_ = tlist.token_next(token) + if imt(next_, i=sql.Identifier): + tokens = tlist.tokens_between(token, next_) + token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token) def group_typecasts(tlist): -- cgit v1.2.1 From 1e770d9b99983fa82375b71c5b3f5d8c6bb1a72e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 10 May 2016 21:50:46 -0700 Subject: separate identifier grouping into its components leaving sql.Operations in for future PR small behavior changed for invalid identifier --- sqlparse/engine/grouping.py | 127 +++++++++++++++++--------------------------- tests/test_grouping.py | 6 +-- 2 files changed, 53 insertions(+), 80 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 11c2b38..8fb4af1 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -import itertools - from sqlparse import sql from sqlparse import tokens as T from sqlparse.utils import recurse, imt, find_matching @@ -92,85 +90,57 @@ def group_case(tlist): _group_matching(tlist, sql.Case) +@recurse(sql.Identifier) def group_identifier(tlist): - def _consume_cycle(tl, i): - # TODO: Usage of Wildcard token is ambivalent here. - x = itertools.cycle(( - lambda y: (y.match(T.Punctuation, '.') - or y.ttype in (T.Operator, - T.Wildcard, - T.Name) - or isinstance(y, sql.SquareBrackets)), - lambda y: (y.ttype in (T.String.Symbol, - T.Name, - T.Wildcard, - T.Literal.String.Single, - T.Literal.Number.Integer, - T.Literal.Number.Float) - or isinstance(y, (sql.Parenthesis, - sql.SquareBrackets, - sql.Function))))) - for t in tl.tokens[i:]: - # Don't take whitespaces into account. - if t.ttype is T.Whitespace: - yield t - continue - if next(x)(t): - yield t - else: - if isinstance(t, sql.Comment) and t.is_multiline(): - yield t - if t.ttype is T.Keyword.Order: - yield t - return - - def _next_token(tl, i): - # chooses the next token. if two tokens are found then the - # first is returned. - t1 = tl.token_next_by_type( - i, (T.String.Symbol, T.Name, T.Literal.Number.Integer, - T.Literal.Number.Float)) - - i1 = tl.token_index(t1, start=i) if t1 else None - t2_end = None if i1 is None else i1 + 1 - t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), - end=t2_end) - - if t1 and t2: - i2 = tl.token_index(t2, start=i) - if i1 > i2: - return t2 - else: - return t1 - elif t1: - return t1 - else: - return t2 + T_IDENT = (T.String.Symbol, T.Name) + + token = tlist.token_next_by(t=T_IDENT) + while token: + token = tlist.group_tokens(sql.Identifier, [token, ]) + token = tlist.token_next_by(t=T_IDENT, idx=token) - # bottom up approach: group subgroups first - [group_identifier(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.Identifier)] - # real processing - idx = 0 - token = _next_token(tlist, idx) +def group_period(tlist): + lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), + t=(T.Name, T.String.Symbol,)) + + rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), + t=(T.Name, T.String.Symbol, T.Wildcard)) + + _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, + valid_left=lfunc, valid_right=rfunc) + + +def group_arrays(tlist): + token = tlist.token_next_by(i=sql.SquareBrackets) while token: - identifier_tokens = [token] + list( - _consume_cycle(tlist, - tlist.token_index(token, start=idx) + 1)) - # remove trailing whitespace - if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace: - identifier_tokens = identifier_tokens[:-1] - if not (len(identifier_tokens) == 1 - and (isinstance(identifier_tokens[0], (sql.Function, - sql.Parenthesis)) - or identifier_tokens[0].ttype in ( - T.Literal.Number.Integer, T.Literal.Number.Float))): - group = tlist.group_tokens(sql.Identifier, identifier_tokens) - idx = tlist.token_index(group, start=idx) + 1 - else: - idx += 1 - token = _next_token(tlist, idx) + prev = tlist.token_prev(idx=token) + if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), + t=(T.Name, T.String.Symbol,)): + tokens = tlist.tokens_between(prev, token) + token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + + +@recurse(sql.Identifier) +def group_operator(tlist): + I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + sql.Identifier,) # sql.Operation) + # wilcards wouldn't have operations next to them + T_CYCLE = T_NUMERICAL + T_STRING + T_NAME # + T.Wildcard + func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) + + token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) + while token: + left, right = tlist.token_prev(token), tlist.token_next(token) + + if func(left) and func(right): + token.ttype = T.Operator + tokens = tlist.tokens_between(left, right) + # token = tlist.group_tokens(sql.Operation, tokens) + token = tlist.group_tokens(sql.Identifier, tokens) + + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) @recurse(sql.IdentifierList) @@ -295,7 +265,10 @@ def group(tlist): group_functions, group_where, group_case, + group_period, + group_arrays, group_identifier, + group_operator, group_order, group_typecasts, group_as, diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 7dc1269..daaec9b 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -89,9 +89,9 @@ class TestGrouping(TestCaseBase): p = sqlparse.parse('a.')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier)) self.assertEqual(p.tokens[0].has_alias(), False) - self.assertEqual(p.tokens[0].get_name(), None) - self.assertEqual(p.tokens[0].get_real_name(), None) - self.assertEqual(p.tokens[0].get_parent_name(), 'a') + self.assertEqual(p.tokens[0].get_name(), 'a') + self.assertEqual(p.tokens[0].get_real_name(), 'a') + self.assertEqual(p.tokens[0].get_parent_name(), None) def test_identifier_as_invalid(self): # issue8 p = sqlparse.parse('foo as select *')[0] -- cgit v1.2.1 From 6748b48adc76491d3cdef5794ddd0731df0d3418 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 11 May 2016 04:22:10 -0700 Subject: refactor sql.py functions --- examples/column_defs_lowlevel.py | 2 +- sqlparse/filters.py | 4 +- sqlparse/sql.py | 112 +++++++++++---------------------------- 3 files changed, 32 insertions(+), 86 deletions(-) diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py index 9e945d4..e804bb2 100644 --- a/examples/column_defs_lowlevel.py +++ b/examples/column_defs_lowlevel.py @@ -15,7 +15,7 @@ SQL = """CREATE TABLE foo ( parsed = sqlparse.parse(SQL)[0] # extract the parenthesis which holds column definitions -par = parsed.token_next_by_instance(0, sqlparse.sql.Parenthesis) +par = parsed.token_next_by(i=sqlparse.sql.Parenthesis) def extract_definitions(token_list): diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 68e9b1a..72f17d0 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -200,9 +200,7 @@ class StripCommentsFilter: def _get_next_comment(self, tlist): # TODO(andi) Comment types should be unified, see related issue38 - token = tlist.token_next_by_instance(0, sql.Comment) - if token is None: - token = tlist.token_next_by_type(0, T.Comment) + token = tlist.token_next_by(i=sql.Comment, t=T.Comment) return token def _process(self, tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 673e452..9afdac3 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -255,12 +255,9 @@ class TokenList(Token): if *ignore_comments* is ``True`` (default: ``False``), comments are ignored too. """ - for token in self.tokens: - if ignore_whitespace and token.is_whitespace(): - continue - if ignore_comments and imt(token, i=Comment): - continue - return token + funcs = lambda tk: not ((ignore_whitespace and tk.is_whitespace()) or + (ignore_comments and imt(tk, i=Comment))) + return self._token_matching(funcs) def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) @@ -274,48 +271,26 @@ class TokenList(Token): If no matching token can be found ``None`` is returned. """ - if not isinstance(clss, (list, tuple)): - clss = (clss,) - - for token in self.tokens[idx:end]: - if isinstance(token, clss): - return token + funcs = lambda tk: imt(tk, i=clss) + return self._token_matching(funcs, idx, end) def token_next_by_type(self, idx, ttypes): """Returns next matching token by it's token type.""" - if not isinstance(ttypes, (list, tuple)): - ttypes = [ttypes] - - for token in self.tokens[idx:]: - if token.ttype in ttypes: - return token + funcs = lambda tk: imt(tk, t=ttypes) + return self._token_matching(funcs, idx) def token_next_match(self, idx, ttype, value, regex=False): """Returns next token where it's ``match`` method returns ``True``.""" - if not isinstance(idx, int): - idx = self.token_index(idx) - - for n in range(idx, len(self.tokens)): - token = self.tokens[n] - if token.match(ttype, value, regex): - return token + funcs = lambda tk: imt(tk, m=(ttype, value, regex)) + return self._token_matching(funcs, idx) def token_not_matching(self, idx, funcs): - for token in self.tokens[idx:]: - passed = False - for func in funcs: - if func(token): - passed = True - break - - if not passed: - return token + funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs + funcs = [lambda tk: not func(tk) for func in funcs] + return self._token_matching(funcs, idx) def token_matching(self, idx, funcs): - for token in self.tokens[idx:]: - for func in funcs: - if func(token): - return token + return self._token_matching(funcs, idx) def token_prev(self, idx, skip_ws=True): """Returns the previous token relative to *idx*. @@ -323,17 +298,10 @@ class TokenList(Token): If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no previous token. """ - if idx is None: - return None - - if not isinstance(idx, int): - idx = self.token_index(idx) - - while idx: - idx -= 1 - if self.tokens[idx].is_whitespace() and skip_ws: - continue - return self.tokens[idx] + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this + funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + return self._token_matching(funcs, idx, reverse=True) def token_next(self, idx, skip_ws=True): """Returns the next token relative to *idx*. @@ -341,43 +309,24 @@ class TokenList(Token): If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no next token. """ - if idx is None: - return None - - if not isinstance(idx, int): - idx = self.token_index(idx) - - while idx < len(self.tokens) - 1: - idx += 1 - if self.tokens[idx].is_whitespace() and skip_ws: - continue - return self.tokens[idx] + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this + funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + return self._token_matching(funcs, idx) def token_index(self, token, start=0): """Return list index of token.""" - if start > 0: - # Performing `index` manually is much faster when starting - # in the middle of the list of tokens and expecting to find - # the token near to the starting index. - for i in range(start, len(self.tokens)): - if self.tokens[i] == token: - return i - return -1 - return self.tokens.index(token) - - def tokens_between(self, start, end, exclude_end=False): + start = self.token_index(start) if not isinstance(start, int) else start + return start + self.tokens[start:].index(token) + + def tokens_between(self, start, end, include_end=True): """Return all tokens between (and including) start and end. - If *exclude_end* is ``True`` (default is ``False``) the end token - is included too. + If *include_end* is ``False`` (default is ``True``) the end token + is excluded. """ - # FIXME(andi): rename exclude_end to inlcude_end - if exclude_end: - offset = 0 - else: - offset = 1 - end_idx = self.token_index(end) + offset start_idx = self.token_index(start) + end_idx = include_end + self.token_index(end) return self.tokens[start_idx:end_idx] def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False): @@ -431,13 +380,12 @@ class TokenList(Token): """Returns the alias for this identifier or ``None``.""" # "name AS alias" - kw = self.token_next_match(0, T.Keyword, 'AS') + kw = self.token_next_by(m=(T.Keyword, 'AS')) if kw is not None: return self._get_first_name(kw, keywords=True) # "name alias" or "complicated column expression alias" - if len(self.tokens) > 2 \ - and self.token_next_by_type(0, T.Whitespace) is not None: + if len(self.tokens) > 2 and self.token_next_by(t=T.Whitespace): return self._get_first_name(reverse=True) return None -- cgit v1.2.1 From 955996e3e5c49fb6b7f200ceecee2f8082656ac4 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 11 May 2016 04:53:12 -0700 Subject: refactor group_comments --- sqlparse/engine/grouping.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 8fb4af1..e30abab 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -172,22 +172,16 @@ def group_parenthesis(tlist): @recurse(sql.Comment) def group_comments(tlist): - idx = 0 - token = tlist.token_next_by_type(idx, T.Comment) + token = tlist.token_next_by(t=T.Comment) while token: - tidx = tlist.token_index(token) - end = tlist.token_not_matching(tidx + 1, - [lambda t: t.ttype in T.Comment, - lambda t: t.is_whitespace()]) - if end is None: - idx = tidx + 1 - else: - eidx = tlist.token_index(end) - grp_tokens = tlist.tokens_between(token, - tlist.token_prev(eidx, False)) - group = tlist.group_tokens(sql.Comment, grp_tokens) - idx = tlist.token_index(group) - token = tlist.token_next_by_type(idx, T.Comment) + end = tlist.token_not_matching( + token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + if end is not None: + end = tlist.token_prev(end, False) + tokens = tlist.tokens_between(token, end) + token = tlist.group_tokens(sql.Comment, tokens) + + token = tlist.token_next_by(t=T.Comment, idx=token) @recurse(sql.Where) -- cgit v1.2.1