From 6145070d6590f1e8f7fc4d86fb0a1061bc1a47d9 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Wed, 1 Jun 2016 20:23:19 +0200 Subject: Call `Token`-methods index based. A lot of methods have token-to-idx magic due to `Token._find_matching` converting tokens to indexes. Unknowingly, this turns innocent looking algorithms into O(n^2) (or worse). This does not solve the problem, but makes it more clear by moving the call to `Token.token_index` obvious at the call-site, at the cost of repeating it over-and-over. --- sqlparse/engine/grouping.py | 50 +++++++++++++++++++++++---------------------- sqlparse/filters.py | 2 +- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index f7953e7..39bcf8e 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,15 +23,17 @@ def _group_left_right(tlist, m, cls, token = tlist.token_next_by(m=m) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + tidx = tlist.token_index(token) + left, right = tlist.token_prev(tidx), tlist.token_next(tidx) if valid_left(left) and valid_right(right): if semicolon: - sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) + sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. tokens = tlist.tokens_between(left, right) + # Luckily, this leaves the position of `token` intact. token = tlist.group_tokens(cls, tokens, extend=True) - token = tlist.token_next_by(m=m, idx=token) + token = tlist.token_next_by(m=m, idx=tidx + 1) def _group_matching(tlist, cls): @@ -44,7 +46,7 @@ def _group_matching(tlist, cls): if end is not None: token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=token) + token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) def group_if(tlist): @@ -97,7 +99,7 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=token) + token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) def group_period(tlist): @@ -114,12 +116,12 @@ def group_period(tlist): def group_arrays(tlist): token = tlist.token_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(idx=token) + prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @recurse(sql.Identifier) @@ -132,7 +134,7 @@ def group_operator(tlist): token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(left) and func(right): token.ttype = T.Operator @@ -140,7 +142,7 @@ def group_operator(tlist): # token = tlist.group_tokens(sql.Operation, tokens) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @recurse(sql.IdentifierList) @@ -154,12 +156,12 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(token), tlist.token_next(token) + before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): tokens = tlist.tokens_between(before, after) token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=token) + token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) def group_brackets(tlist): @@ -175,20 +177,20 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: - end = tlist.token_prev(end, False) + end = tlist.token_prev(tlist.token_index(end), False) tokens = tlist.tokens_between(token, end) token = tlist.group_tokens(sql.Comment, tokens) - token = tlist.token_next_by(t=T.Comment, idx=token) + token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @recurse(sql.Where) def group_where(tlist): token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token) + end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) @@ -197,7 +199,7 @@ def group_where(tlist): token, tlist.tokens[tlist.token_index(end) - 1]) token = tlist.group_tokens(sql.Where, tokens) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) + token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @recurse() @@ -207,11 +209,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) def group_typecasts(tlist): @@ -231,33 +233,33 @@ def group_functions(tlist): return token = tlist.token_next_by(t=T.Name) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Function, tokens) - token = tlist.token_next_by(t=T.Name, idx=token) + token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" token = tlist.token_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(token) + prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=T.Keyword.Order, idx=token) + token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @recurse() def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment) while token: - before = tlist.token_prev(token) + before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): tokens = tlist.tokens_between(before, token) token = tlist.group_tokens(sql.TokenList, tokens, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=token) + token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) def group(tlist): diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 72f17d0..095ee85 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -340,7 +340,7 @@ class ReindentFilter: offset += 1 uprev = u(prev) if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): - nl = tlist.token_next(token) + nl = tlist.token_next(tlist.token_index(token)) else: nl = self.nl() added.add(nl) -- cgit v1.2.1 From 896774cb5298924abbcea81b9b90f1c7c10b3d6a Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 07:38:27 +0200 Subject: Special-case group_tokens(..., tokens_between()) When having been guaranteed that the tokens form a range, it is possible to get rid of a lot of calls to `Token.tokens.remove(...)` which are expensive. --- sqlparse/engine/grouping.py | 38 ++++++++++++++------------------------ sqlparse/sql.py | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 39bcf8e..ad7da9f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -30,9 +30,8 @@ def _group_left_right(tlist, m, cls, if semicolon: sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. - tokens = tlist.tokens_between(left, right) # Luckily, this leaves the position of `token` intact. - token = tlist.group_tokens(cls, tokens, extend=True) + token = tlist.group_tokens_between(cls, left, right, extend=True) token = tlist.token_next_by(m=m, idx=tidx + 1) @@ -44,7 +43,7 @@ def _group_matching(tlist, cls): while token: end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) if end is not None: - token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + token = tlist.group_tokens_between(cls, token, end) _group_matching(token, cls) token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) @@ -119,8 +118,7 @@ def group_arrays(tlist): prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @@ -138,9 +136,8 @@ def group_operator(tlist): if func(left) and func(right): token.ttype = T.Operator - tokens = tlist.tokens_between(left, right) - # token = tlist.group_tokens(sql.Operation, tokens) - token = tlist.group_tokens(sql.Identifier, tokens) + # token = tlist.group_tokens_between(sql.Operation, left, right) + token = tlist.group_tokens_between(sql.Identifier, left, right) token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @@ -159,8 +156,7 @@ def group_identifier_list(tlist): before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): - tokens = tlist.tokens_between(before, after) - token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) @@ -180,8 +176,7 @@ def group_comments(tlist): tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: end = tlist.token_prev(tlist.token_index(end), False) - tokens = tlist.tokens_between(token, end) - token = tlist.group_tokens(sql.Comment, tokens) + token = tlist.group_tokens_between(sql.Comment, token, end) token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @@ -193,12 +188,11 @@ def group_where(tlist): end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: - tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) + end = tlist._groupable_tokens[-1] else: - tokens = tlist.tokens_between( - token, tlist.tokens[tlist.token_index(end) - 1]) + end = tlist.tokens[tlist.token_index(end) - 1] - token = tlist.group_tokens(sql.Where, tokens) + token = tlist.group_tokens_between(sql.Where, token, end) token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @@ -211,8 +205,7 @@ def group_aliased(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) @@ -235,8 +228,7 @@ def group_functions(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Function, tokens) + token = tlist.group_tokens_between(sql.Function, token, next_) token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) @@ -246,8 +238,7 @@ def group_order(tlist): while token: prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens) + token = tlist.group_tokens_between(sql.Identifier, prev, token) token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @@ -257,8 +248,7 @@ def align_comments(tlist): while token: before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): - tokens = tlist.tokens_between(before, token) - token = tlist.group_tokens(sql.TokenList, tokens, extend=True) + token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 9afdac3..81cd8e9 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -329,6 +329,29 @@ class TokenList(Token): end_idx = include_end + self.token_index(end) return self.tokens[start_idx:end_idx] + def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False): + """Replace tokens by an instance of *grp_cls*.""" + start_idx = self.token_index(start) + end_idx = self.token_index(end) + include_end + tokens = self.tokens[start_idx:end_idx] + + if extend and isinstance(start, grp_cls): + subtokens = self.tokens[start_idx+1:end_idx] + + grp = start + grp.tokens.extend(subtokens) + del self.tokens[start_idx+1:end_idx] + grp.value = start.__str__() + else: + subtokens = self.tokens[start_idx:end_idx] + grp = grp_cls(tokens) + self.tokens[start_idx:end_idx] = [grp] + grp.parent = self + + for token in subtokens: + token.parent = grp + + return grp def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False): """Replace tokens by an instance of *grp_cls*.""" if ignore_ws: -- cgit v1.2.1 From d4cc0644c8348da5e49c58df5e26a3e969045249 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 08:30:27 +0200 Subject: Replace _group_matching with an inward-out grouping algorithm All the matching between open/close was done all the time, first finding the matching closing token, and then grouping the tokens in between, and recurse over the newly created list. Instead, it is more efficient to look for the previous open-token on finding a closing-token, group these two together, and then continue on. squashed: Handle token indices in group_tokens_between and find_matching. --- sqlparse/engine/grouping.py | 28 ++++++++++++++++++++-------- sqlparse/sql.py | 13 +++++++++---- sqlparse/utils.py | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ad7da9f..e004eae 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -2,7 +2,7 @@ from sqlparse import sql from sqlparse import tokens as T -from sqlparse.utils import recurse, imt, find_matching +from sqlparse.utils import recurse, imt M_ROLE = (T.Keyword, ('null', 'role')) M_SEMICOLON = (T.Punctuation, ';') @@ -39,13 +39,25 @@ def _group_matching(tlist, cls): """Groups Tokens that have beginning and end. ie. parenthesis, brackets..""" idx = 1 if imt(tlist, i=cls) else 0 - token = tlist.token_next_by(m=cls.M_OPEN, idx=idx) - while token: - end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) - if end is not None: - token = tlist.group_tokens_between(cls, token, end) - _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) + opens = [] + + while True: + try: + token = tlist.tokens[idx] + except IndexError: + break + + if token.match(*cls.M_OPEN): + opens.append(idx) + elif token.match(*cls.M_CLOSE): + try: + open_idx = opens.pop() + except IndexError: + break + tlist.group_tokens_between(cls, open_idx, idx) + idx = open_idx + + idx += 1 def group_if(tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 81cd8e9..dfe0430 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -331,9 +331,14 @@ class TokenList(Token): def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" - start_idx = self.token_index(start) - end_idx = self.token_index(end) + include_end - tokens = self.tokens[start_idx:end_idx] + if isinstance(start, int): + start_idx = start + start = self.tokens[start_idx] + else: + start_idx = self.token_index(start) + + end_idx = self.token_index(end) if not isinstance(end, int) else end + end_idx += include_end if extend and isinstance(start, grp_cls): subtokens = self.tokens[start_idx+1:end_idx] @@ -344,7 +349,7 @@ class TokenList(Token): grp.value = start.__str__() else: subtokens = self.tokens[start_idx:end_idx] - grp = grp_cls(tokens) + grp = grp_cls(subtokens) self.tokens[start_idx:end_idx] = [grp] grp.parent = self diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 90acb5c..5e01f58 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -164,7 +164,7 @@ def imt(token, i=None, m=None, t=None): def find_matching(tlist, token, M1, M2): - idx = tlist.token_index(token) + idx = tlist.token_index(token) if not isinstance(token, int) else token depth = 0 for token in tlist.tokens[idx:]: if token.match(*M1): -- cgit v1.2.1 From 237575ef726e4232b60a5043177c43a72f370238 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 09:21:05 +0200 Subject: Re-use token index in group_identifier. --- sqlparse/engine/grouping.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index e004eae..77a53ad 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -109,8 +109,9 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: - token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(token) + token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) + token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -165,11 +166,14 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): - token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(before) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + + token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -215,10 +219,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): -- cgit v1.2.1 From 67dc823e1174eee9ea2159674c8eb016b2f95b54 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:08:00 +0200 Subject: Use specialized token_idx_next_by in group_aliased. The method group_aliased was making a lot of calls to token_index. By specializing token_next_by to token_idx_next_by, the calls to token_index became superfluous. Also use token_idx_next_by in group_identifier_list. It was making a lot of calls, which is now more than reduced in half. --- sqlparse/engine/grouping.py | 10 ++++------ sqlparse/sql.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 77a53ad..fddee0f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -163,17 +163,16 @@ def group_identifier_list(tlist): (T.Keyword, T.Comment, T.Wildcard)) func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - token = tlist.token_next_by(m=M_COMMA) + tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - tidx = tlist.token_index(token) before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): tidx = tlist.token_index(before) token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -217,13 +216,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, ) # sql.Operation) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - tidx = tlist.token_index(token) next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index dfe0430..928b784 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -225,6 +225,22 @@ class TokenList(Token): def _groupable_tokens(self): return self.tokens + def _token_idx_matching(self, funcs, start=0, end=None, reverse=False): + """next token that match functions""" + if start is None: + return None + + if not isinstance(funcs, (list, tuple)): + funcs = (funcs,) + + iterable = enumerate(self.tokens[start:end], start=start) + + for idx, token in iterable: + for func in funcs: + if func(token): + return idx, token + return None, None + def _token_matching(self, funcs, start=0, end=None, reverse=False): """next token that match functions""" if start is None: @@ -259,6 +275,10 @@ class TokenList(Token): (ignore_comments and imt(tk, i=Comment))) return self._token_matching(funcs) + def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None): + funcs = lambda tk: imt(tk, i, m, t) + return self._token_idx_matching(funcs, idx, end) + def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) -- cgit v1.2.1 From 8f7968ed5c649e5227e605ee272f59dd5ca75adb Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:28:54 +0200 Subject: Index-based token_idx_prev Prevent some more calls to token_index in group_identifier_list. They are now all gone. --- sqlparse/engine/grouping.py | 5 +++-- sqlparse/sql.py | 28 ++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index fddee0f..6bdba2f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -166,10 +166,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tidx), tlist.token_next(tidx) + before_idx, before = tlist.token_idx_prev(tidx) + after = tlist.token_next(tidx) if func(before) and func(after): - tidx = tlist.token_index(before) + tidx = before_idx token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 928b784..9782c33 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -233,12 +233,18 @@ class TokenList(Token): if not isinstance(funcs, (list, tuple)): funcs = (funcs,) - iterable = enumerate(self.tokens[start:end], start=start) - - for idx, token in iterable: - for func in funcs: - if func(token): - return idx, token + if reverse: + assert end is None + for idx in range(start - 2, -1, -1): + token = self.tokens[idx] + for func in funcs: + if func(token): + return idx, token + else: + for idx, token in enumerate(self.tokens[start:end], start=start): + for func in funcs: + if func(token): + return idx, token return None, None def _token_matching(self, funcs, start=0, end=None, reverse=False): @@ -312,6 +318,16 @@ class TokenList(Token): def token_matching(self, idx, funcs): return self._token_matching(funcs, idx) + def token_idx_prev(self, idx, skip_ws=True): + """Returns the previous token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no previous token. + """ + idx += 1 # alot of code usage current pre-compensates for this + funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + return self._token_idx_matching(funcs, idx, reverse=True) + def token_prev(self, idx, skip_ws=True): """Returns the previous token relative to *idx*. -- cgit v1.2.1 From 89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 11:58:19 +0200 Subject: Use a specialized token_idx_next. Prevent calling token_index. --- sqlparse/engine/grouping.py | 8 ++++---- sqlparse/sql.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6bdba2f..0169830 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -167,11 +167,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: before_idx, before = tlist.token_idx_prev(tidx) - after = tlist.token_next(tidx) + after_idx, after = tlist.token_idx_next(tidx) if func(before) and func(after): tidx = before_idx - token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) @@ -219,9 +219,9 @@ def group_aliased(tlist): tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tidx) + next_index_, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 9782c33..f3ef642 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -350,6 +350,26 @@ class TokenList(Token): funcs = lambda tk: not (tk.is_whitespace() and skip_ws) return self._token_matching(funcs, idx) + def token_idx_next(self, idx, skip_ws=True): + """Returns the next token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no next token. + """ + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this + try: + if not skip_ws: + return idx, self.tokens[idx] + else: + while True: + token = self.tokens[idx] + if not token.is_whitespace(): + return idx, token + idx += 1 + except IndexError: + return None, None + def token_index(self, token, start=0): """Return list index of token.""" start = self.token_index(start) if not isinstance(start, int) else start -- cgit v1.2.1 From 0bcb34cc1514d77446a29c2c636a3f9a653588f2 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 21:02:11 -0700 Subject: Remove unused code from sql.py and style up some changes --- sqlparse/sql.py | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 54f7d4f..027228d 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -338,16 +338,6 @@ class TokenList(Token): start = start if isinstance(start, int) else self.token_index(start) return start + self.tokens[start:].index(token) - def tokens_between(self, start, end, include_end=True): - """Return all tokens between (and including) start and end. - - If *include_end* is ``False`` (default is ``True``) the end token - is excluded. - """ - start_idx = self.token_index(start) - end_idx = include_end + self.token_index(end) - return self.tokens[start_idx:end_idx] - def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" @@ -357,8 +347,12 @@ class TokenList(Token): else: start_idx = self.token_index(start) - end_idx = self.token_index(end) if not isinstance(end, int) else end - end_idx += include_end + end = end if isinstance(end, int) else self.token_index(end, start_idx) + end_idx = end + include_end + + # will be needed later for new group_clauses + # while skip_ws and tokens and tokens[-1].is_whitespace(): + # tokens = tokens[:-1] if extend and isinstance(start, grp_cls): subtokens = self.tokens[start_idx + 1:end_idx] @@ -366,7 +360,7 @@ class TokenList(Token): grp = start grp.tokens.extend(subtokens) del self.tokens[start_idx + 1:end_idx] - grp.value = start.__str__() + grp.value = text_type(start) else: subtokens = self.tokens[start_idx:end_idx] grp = grp_cls(subtokens) @@ -378,31 +372,6 @@ class TokenList(Token): return grp - def group_tokens(self, grp_cls, tokens, skip_ws=False, extend=False): - """Replace tokens by an instance of *grp_cls*.""" - - while skip_ws and tokens and tokens[-1].is_whitespace(): - tokens = tokens[:-1] - - left = tokens[0] - idx = self.token_index(left) - - if extend and isinstance(left, grp_cls): - grp = left - grp.tokens.extend(tokens[1:]) - else: - grp = grp_cls(tokens) - - for token in tokens: - token.parent = grp - - # Improve performance. LOOP(list.remove()) is O(n**2) operation - self.tokens = [token for token in self.tokens if token not in tokens] - - self.tokens.insert(idx, grp) - grp.parent = self - return grp - def insert_before(self, where, token): """Inserts *token* before *where*.""" token.parent = self -- cgit v1.2.1 From 405a66817f1b0789901adf9c81b96658a04e6950 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 21:22:07 -0700 Subject: Reapply fix for case within paranthesis --- sqlparse/engine/grouping.py | 4 +++- tests/test_regressions.py | 8 -------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index cae5d23..f9ca6b4 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -43,7 +43,9 @@ def _group_left_right(tlist, m, cls, def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" - idx = 1 if imt(tlist, i=cls) else 0 + [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, cls)] + idx = 1 if isinstance(tlist, cls) else 0 opens = [] diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 3a3406b..b55939a 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -312,11 +312,3 @@ def test_issue207_runaway_format(): " 2 as two,", " 3", " from dual) t0"]) - - -@pytest.mark.xfail(reason="broke with new indexing") -def test_case_within_parenthesis(): - # see issue #164 - s = '(case when 1=1 then 2 else 5 end)' - p = sqlparse.parse(s)[0] - assert isinstance(p[0][1], sql.Case) -- cgit v1.2.1 From c601435bde6afd32f93b7e19b17287ca9d3b02f9 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 21:39:14 -0700 Subject: Apply alt style for grouping left/right --- sqlparse/engine/grouping.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index f9ca6b4..240ce5e 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,11 +23,14 @@ def _group_left_right(tlist, m, cls, valid_right=lambda t: t is not None, semicolon=False): """Groups together tokens that are joined by a middle token. ie. x < y""" - [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon) - for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group_left_right(token, m, cls, valid_left, valid_right, + semicolon) + continue + if not token.match(*m): + continue - token = tlist.token_next_by(m=m) - while token: tidx = tlist.token_index(token) left, right = tlist.token_prev(tidx), tlist.token_next(tidx) @@ -37,15 +40,14 @@ def _group_left_right(tlist, m, cls, sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # Luckily, this leaves the position of `token` intact. - token = tlist.group_tokens_between(cls, left, right, extend=True) - token = tlist.token_next_by(m=m, idx=tidx + 1) + tlist.group_tokens_between(cls, left, right, extend=True) def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] - idx = 1 if isinstance(tlist, cls) else 0 + idx = 0 # check no longer needed since not recursing. opens = [] -- cgit v1.2.1 From 954ba46e16af4e3c9b1302bbae95ebf2a4be2a8b Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 22:07:07 -0700 Subject: Refactor _group_matching --- sqlparse/engine/grouping.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 240ce5e..bf76119 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -45,29 +45,25 @@ def _group_left_right(tlist, m, cls, def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" - [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, cls)] - idx = 0 # check no longer needed since not recursing. - opens = [] - - while True: - try: - token = tlist.tokens[idx] - except IndexError: - break + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + # Check inside previously grouped (ie. parenthesis) if group + # of differnt type is inside (ie, case). though ideally should + # should check for all open/close tokens at once to avoid recursion + _group_matching(token, cls) + continue if token.match(*cls.M_OPEN): - opens.append(idx) + opens.append(token) elif token.match(*cls.M_CLOSE): try: - open_idx = opens.pop() + open_token = opens.pop() except IndexError: - break - tlist.group_tokens_between(cls, open_idx, idx) - idx = open_idx - - idx += 1 + # this indicates invalid sql and unbalanced tokens. + # instead of break, continue in case other "valid" groups exist + continue + tlist.group_tokens_between(cls, open_token, token) def group_if(tlist): -- cgit v1.2.1 From 997f95b8b6ec5129362dcfe5deedaf50800e3afc Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 10:50:58 -0700 Subject: Change argument order to match order of all other functions --- sqlparse/engine/grouping.py | 3 ++- sqlparse/sql.py | 4 ++-- tests/test_tokenize.py | 12 +++++------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index bf76119..86c4bf2 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -195,7 +195,8 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), + idx=tlist.token_index(token) + 1) if end is not None: end = tlist.token_prev(tlist.token_index(end), False) token = tlist.group_tokens_between(sql.Comment, token, end) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 027228d..e0ac81d 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -269,12 +269,12 @@ class TokenList(Token): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) - def token_not_matching(self, idx, funcs): + def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] return self._token_matching(funcs, idx) - def token_matching(self, idx, funcs): + def token_matching(self, funcs, idx): return self._token_matching(funcs, idx) def token_idx_prev(self, idx, skip_ws=True): diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index adfd1ea..61eaa3e 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -113,14 +113,12 @@ class TestTokenList(unittest.TestCase): t1 = sql.Token(T.Keyword, 'foo') t2 = sql.Token(T.Punctuation, ',') x = sql.TokenList([t1, t2]) - self.assertEqual(x.token_matching(0, [lambda t: t.ttype is T.Keyword]), - t1) self.assertEqual(x.token_matching( - 0, - [lambda t: t.ttype is T.Punctuation]), - t2) - self.assertEqual(x.token_matching(1, [lambda t: t.ttype is T.Keyword]), - None) + [lambda t: t.ttype is T.Keyword], 0), t1) + self.assertEqual(x.token_matching( + [lambda t: t.ttype is T.Punctuation], 0), t2) + self.assertEqual(x.token_matching( + [lambda t: t.ttype is T.Keyword], 1), None) class TestStream(unittest.TestCase): -- cgit v1.2.1 From a795be1a70a241e177227b742269fb2df88af962 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 13:21:20 -0700 Subject: Change token_ funcs to token_idx funcs --- examples/column_defs_lowlevel.py | 8 +-- sqlparse/engine/grouping.py | 111 ++++++++++++++++++++----------------- sqlparse/filters/aligned_indent.py | 25 +++++---- sqlparse/filters/others.py | 36 ++++++------ sqlparse/filters/reindent.py | 54 ++++++++++-------- sqlparse/sql.py | 64 ++++++++++++--------- tests/test_grouping.py | 6 +- 7 files changed, 167 insertions(+), 137 deletions(-) diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py index 5e98be3..1ebd065 100644 --- a/examples/column_defs_lowlevel.py +++ b/examples/column_defs_lowlevel.py @@ -17,16 +17,16 @@ def extract_definitions(token_list): definitions = [] tmp = [] # grab the first token, ignoring whitespace. idx=1 to skip open ( - token = token_list.token_next(1) + tidx, token = token_list.token_idx_next(1) while token and not token.match(sqlparse.tokens.Punctuation, ')'): tmp.append(token) # grab the next token, this times including whitespace - token = token_list.token_next(token, skip_ws=False) + tidx, token = token_list.token_idx_next(tidx, skip_ws=False) # split on ",", except when on end of statement if token and token.match(sqlparse.tokens.Punctuation, ','): definitions.append(tmp) tmp = [] - token = token_list.token_next(token) + tidx, token = token_list.token_idx_next(tidx) if tmp and isinstance(tmp[0], sqlparse.sql.Identifier): definitions.append(tmp) return definitions @@ -41,7 +41,7 @@ if __name__ == '__main__': parsed = sqlparse.parse(SQL)[0] # extract the parenthesis which holds column definitions - par = parsed.token_next_by(i=sqlparse.sql.Parenthesis) + _, par = parsed.token_idx_next_by(i=sqlparse.sql.Parenthesis) columns = extract_definitions(par) for column in columns: diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 86c4bf2..88064cb 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -32,15 +32,16 @@ def _group_left_right(tlist, m, cls, continue tidx = tlist.token_index(token) - left, right = tlist.token_prev(tidx), tlist.token_next(tidx) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) - if valid_left(left) and valid_right(right): + if valid_left(prev_) and valid_right(next_): if semicolon: # only overwrite if a semicolon present. - sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) - right = sright or right + snidx, _ = tlist.token_idx_next_by(m=M_SEMICOLON, idx=nidx) + nidx = snidx or nidx # Luckily, this leaves the position of `token` intact. - tlist.group_tokens_between(cls, left, right, extend=True) + tlist.group_tokens_between(cls, pidx, nidx, extend=True) def _group_matching(tlist, cls): @@ -114,11 +115,10 @@ def group_case(tlist): def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) - token = tlist.token_next_by(t=T_IDENT) + tidx, token = tlist.token_idx_next_by(t=T_IDENT) while token: - tidx = tlist.token_index(token) - token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) - token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) + tlist.group_tokens_between(sql.Identifier, tidx, tidx) + tidx, token = tlist.token_idx_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -133,13 +133,14 @@ def group_period(tlist): def group_arrays(tlist): - token = tlist.token_next_by(i=sql.SquareBrackets) + tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(tlist.token_index(token)) + pidx, prev = tlist.token_idx_prev(tidx) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) + tlist.group_tokens_between(sql.Identifier, pidx, tidx, extend=True) + tidx = pidx + tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets, idx=tidx + 1) @recurse(sql.Identifier) @@ -150,15 +151,18 @@ def group_operator(tlist): T_CYCLE = T_NUMERICAL + T_STRING + T_NAME func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) + tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) - if func(left) and func(right): + if func(prev_) and func(next_): token.ttype = T.Operator - token = tlist.group_tokens_between(sql.Operation, left, right) + tlist.group_tokens_between(sql.Operation, pidx, nidx) + tidx = pidx - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) + tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard), + idx=tidx + 1) @recurse(sql.IdentifierList) @@ -172,13 +176,12 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before_idx, before = tlist.token_idx_prev(tidx) - after_idx, after = tlist.token_idx_next(tidx) - - if func(before) and func(after): - tidx = before_idx - token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) + if func(prev_) and func(next_): + tlist.group_tokens_between(sql.IdentifierList, pidx, nidx, extend=True) + tidx = pidx tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) @@ -192,31 +195,32 @@ def group_parenthesis(tlist): @recurse(sql.Comment) def group_comments(tlist): - token = tlist.token_next_by(t=T.Comment) + tidx, token = tlist.token_idx_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), - idx=tlist.token_index(token) + 1) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) if end is not None: - end = tlist.token_prev(tlist.token_index(end), False) - token = tlist.group_tokens_between(sql.Comment, token, end) + eidx = tlist.token_index(end) + eidx, end = tlist.token_idx_prev(eidx, skip_ws=False) + tlist.group_tokens_between(sql.Comment, tidx, eidx) - token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) + tidx, token = tlist.token_idx_next_by(t=T.Comment, idx=tidx + 1) @recurse(sql.Where) def group_where(tlist): - token = tlist.token_next_by(m=sql.Where.M_OPEN) + tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) + eidx, end = tlist.token_idx_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) if end is None: end = tlist._groupable_tokens[-1] else: - end = tlist.tokens[tlist.token_index(end) - 1] - - token = tlist.group_tokens_between(sql.Where, token, end) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) + end = tlist.tokens[eidx - 1] + # TODO: convert this to eidx instead of end token. + # i think above values are len(tlist) and eidx-1 + tlist.group_tokens_between(sql.Where, tidx, end) + tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) @recurse() @@ -226,9 +230,9 @@ def group_aliased(tlist): tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_index_, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) + tlist.group_tokens_between(sql.Identifier, tidx, nidx, extend=True) tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) @@ -247,32 +251,35 @@ def group_functions(tlist): has_table = True if has_create and has_table: return - token = tlist.token_next_by(t=T.Name) + + tidx, token = tlist.token_idx_next_by(t=T.Name) while token: - next_ = tlist.token_next(tlist.token_index(token)) - if imt(next_, i=sql.Parenthesis): - token = tlist.group_tokens_between(sql.Function, token, next_) - token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) + nidx, next_ = tlist.token_idx_next(tidx) + if isinstance(next_, sql.Parenthesis): + tlist.group_tokens_between(sql.Function, tidx, nidx) + tidx, token = tlist.token_idx_next_by(t=T.Name, idx=tidx + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" - token = tlist.token_next_by(t=T.Keyword.Order) + tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(tlist.token_index(token)) + pidx, prev = tlist.token_idx_prev(tidx) if imt(prev, i=sql.Identifier, t=T.Number): - token = tlist.group_tokens_between(sql.Identifier, prev, token) - token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) + tlist.group_tokens_between(sql.Identifier, pidx, tidx) + tidx = pidx + tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order, idx=tidx + 1) @recurse() def align_comments(tlist): - token = tlist.token_next_by(i=sql.Comment) + tidx, token = tlist.token_idx_next_by(i=sql.Comment) while token: - before = tlist.token_prev(tlist.token_index(token)) - if isinstance(before, sql.TokenList): - token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) + pidx, prev = tlist.token_idx_prev(tidx) + if isinstance(prev, sql.TokenList): + tlist.group_tokens_between(sql.TokenList, pidx, tidx, extend=True) + tidx = pidx + tidx, token = tlist.token_idx_next_by(i=sql.Comment, idx=tidx + 1) def group(stmt): diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py index ea749e9..719b450 100644 --- a/sqlparse/filters/aligned_indent.py +++ b/sqlparse/filters/aligned_indent.py @@ -46,7 +46,8 @@ class AlignedIndentFilter(object): def _process_parenthesis(self, tlist): # if this isn't a subquery, don't re-indent - if tlist.token_next_by(m=(T.DML, 'SELECT')): + _, token = tlist.token_idx_next_by(m=(T.DML, 'SELECT')) + if token is not None: with indent(self): tlist.insert_after(tlist[0], self.nl('SELECT')) # process the inside of the parantheses @@ -66,7 +67,7 @@ class AlignedIndentFilter(object): offset_ = len('case ') + len('when ') cases = tlist.get_cases(skip_ws=True) # align the end as well - end_token = tlist.token_next_by(m=(T.Keyword, 'END')) + _, end_token = tlist.token_idx_next_by(m=(T.Keyword, 'END')) cases.append((None, [end_token])) condition_width = [len(' '.join(map(text_type, cond))) if cond else 0 @@ -87,16 +88,16 @@ class AlignedIndentFilter(object): def _next_token(self, tlist, idx=0): split_words = T.Keyword, self.split_words, True - token = tlist.token_next_by(m=split_words, idx=idx) + tidx, token = tlist.token_idx_next_by(m=split_words, idx=idx) # treat "BETWEEN x and y" as a single statement - if token and token.value.upper() == 'BETWEEN': - token = self._next_token(tlist, token) - if token and token.value.upper() == 'AND': - token = self._next_token(tlist, token) - return token + if token and token.normalized == 'BETWEEN': + tidx, token = self._next_token(tlist, tidx + 1) + if token and token.normalized == 'AND': + tidx, token = self._next_token(tlist, tidx + 1) + return tidx, token def _split_kwds(self, tlist): - token = self._next_token(tlist) + tidx, token = self._next_token(tlist) while token: # joins are special case. only consider the first word as aligner if token.match(T.Keyword, self.join_words, regex=True): @@ -104,13 +105,15 @@ class AlignedIndentFilter(object): else: token_indent = text_type(token) tlist.insert_before(token, self.nl(token_indent)) - token = self._next_token(tlist, token) + tidx += 1 + tidx, token = self._next_token(tlist, tidx + 1) def _process_default(self, tlist): self._split_kwds(tlist) # process any sub-sub statements for sgroup in tlist.get_sublists(): - prev = tlist.token_prev(sgroup) + idx = tlist.token_index(sgroup) + pidx, prev = tlist.token_idx_prev(idx) # HACK: make "group/order by" work. Longer than max_len. offset_ = 3 if (prev and prev.match(T.Keyword, 'BY')) else 0 with offset(self, offset_): diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 71b1f8e..ecde2fe 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -14,23 +14,22 @@ class StripCommentsFilter(object): def _process(tlist): def get_next_comment(): # TODO(andi) Comment types should be unified, see related issue38 - return tlist.token_next_by(i=sql.Comment, t=T.Comment) + return tlist.token_idx_next_by(i=sql.Comment, t=T.Comment) - token = get_next_comment() + tidx, token = get_next_comment() while token: - prev = tlist.token_prev(token, skip_ws=False) - next_ = tlist.token_next(token, skip_ws=False) + pidx, prev_ = tlist.token_idx_prev(tidx, skip_ws=False) + nidx, next_ = tlist.token_idx_next(tidx, skip_ws=False) # Replace by whitespace if prev and next exist and if they're not # whitespaces. This doesn't apply if prev or next is a paranthesis. - if (prev is None or next_ is None or - prev.is_whitespace() or prev.match(T.Punctuation, '(') or + if (prev_ is None or next_ is None or + prev_.is_whitespace() or prev_.match(T.Punctuation, '(') or next_.is_whitespace() or next_.match(T.Punctuation, ')')): tlist.tokens.remove(token) else: - tidx = tlist.token_index(token) tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') - token = get_next_comment() + tidx, token = get_next_comment() def process(self, stmt): [self.process(sgroup) for sgroup in stmt.get_sublists()] @@ -86,20 +85,21 @@ class StripWhitespaceFilter(object): class SpacesAroundOperatorsFilter(object): @staticmethod def _process(tlist): - def next_token(idx=0): - return tlist.token_next_by(t=(T.Operator, T.Comparison), idx=idx) - token = next_token() + ttypes = (T.Operator, T.Comparison) + tidx, token = tlist.token_idx_next_by(t=ttypes) while token: - prev_ = tlist.token_prev(token, skip_ws=False) - if prev_ and prev_.ttype != T.Whitespace: - tlist.insert_before(token, sql.Token(T.Whitespace, ' ')) - - next_ = tlist.token_next(token, skip_ws=False) + nidx, next_ = tlist.token_idx_next(tidx, skip_ws=False) if next_ and next_.ttype != T.Whitespace: - tlist.insert_after(token, sql.Token(T.Whitespace, ' ')) + tlist.insert_after(tidx, sql.Token(T.Whitespace, ' ')) + + pidx, prev_ = tlist.token_idx_prev(tidx, skip_ws=False) + if prev_ and prev_.ttype != T.Whitespace: + tlist.insert_before(tidx, sql.Token(T.Whitespace, ' ')) + tidx += 1 # has to shift since token inserted before it - token = next_token(idx=token) + # assert tlist.token_index(token) == tidx + tidx, token = tlist.token_idx_next_by(t=ttypes, idx=tidx + 1) def process(self, stmt): [self.process(sgroup) for sgroup in stmt.get_sublists()] diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py index b490631..d13fdf3 100644 --- a/sqlparse/filters/reindent.py +++ b/sqlparse/filters/reindent.py @@ -48,40 +48,47 @@ class ReindentFilter(object): split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR', 'GROUP', 'ORDER', 'UNION', 'VALUES', 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') - token = tlist.token_next_by(m=(T.Keyword, split_words, True), idx=idx) + m_split = T.Keyword, split_words, True + tidx, token = tlist.token_idx_next_by(m=m_split, idx=idx) - if token and token.value.upper() == 'BETWEEN': - token = self._next_token(tlist, token) + if token and token.normalized == 'BETWEEN': + tidx, token = self._next_token(tlist, tidx + 1) - if token and token.value.upper() == 'AND': - token = self._next_token(tlist, token) + if token and token.normalized == 'AND': + tidx, token = self._next_token(tlist, tidx + 1) - return token + return tidx, token def _split_kwds(self, tlist): - token = self._next_token(tlist) + tidx, token = self._next_token(tlist) while token: - prev = tlist.token_prev(token, skip_ws=False) + tidx = tlist.token_index(token) + pidx, prev = tlist.token_idx_prev(tidx, skip_ws=False) uprev = text_type(prev) if prev and prev.is_whitespace(): - tlist.tokens.remove(prev) + del tlist.tokens[pidx] + tidx -= 1 if not (uprev.endswith('\n') or uprev.endswith('\r')): - tlist.insert_before(token, self.nl()) + tlist.insert_before(tidx, self.nl()) + tidx += 1 - token = self._next_token(tlist, token) + tidx, token = self._next_token(tlist, tidx + 1) def _split_statements(self, tlist): - token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) + tidx, token = tlist.token_idx_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) while token: - prev = tlist.token_prev(token, skip_ws=False) + pidx, prev = tlist.token_idx_prev(tidx, skip_ws=False) if prev and prev.is_whitespace(): - tlist.tokens.remove(prev) + del tlist.tokens[pidx] + tidx -= 1 # only break if it's not the first token - tlist.insert_before(token, self.nl()) if prev else None - token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML), - idx=token) + if prev: + tlist.insert_before(tidx, self.nl()) + tidx += 1 + tidx, token = tlist.token_idx_next_by( + t=(T.Keyword.DDL, T.Keyword.DML), idx=tidx + 1) def _process(self, tlist): func_name = '_process_{cls}'.format(cls=type(tlist).__name__) @@ -89,16 +96,17 @@ class ReindentFilter(object): func(tlist) def _process_where(self, tlist): - token = tlist.token_next_by(m=(T.Keyword, 'WHERE')) + tidx, token = tlist.token_idx_next_by(m=(T.Keyword, 'WHERE')) # issue121, errors in statement fixed?? - tlist.insert_before(token, self.nl()) + tlist.insert_before(tidx, self.nl()) with indent(self): self._process_default(tlist) def _process_parenthesis(self, tlist): - is_dml_dll = tlist.token_next_by(t=(T.Keyword.DML, T.Keyword.DDL)) - first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN) + ttypes = T.Keyword.DML, T.Keyword.DDL + _, is_dml_dll = tlist.token_idx_next_by(t=ttypes) + fidx, first = tlist.token_idx_next_by(m=sql.Parenthesis.M_OPEN) with indent(self, 1 if is_dml_dll else 0): tlist.tokens.insert(0, self.nl()) if is_dml_dll else None @@ -135,8 +143,8 @@ class ReindentFilter(object): # len "when ", "then ", "else " with offset(self, len("WHEN ")): self._process_default(tlist) - end = tlist.token_next_by(m=sql.Case.M_CLOSE) - tlist.insert_before(end, self.nl()) + end_idx, end = tlist.token_idx_next_by(m=sql.Case.M_CLOSE) + tlist.insert_before(end_idx, self.nl()) def _process_default(self, tlist, stmts=True): self._split_statements(tlist) if stmts else None diff --git a/sqlparse/sql.py b/sqlparse/sql.py index e0ac81d..d1d8e3e 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -259,7 +259,7 @@ class TokenList(Token): # this on is inconsistent, using Comment instead of T.Comment... funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or (skip_cm and imt(tk, t=T.Comment, i=Comment))) - return self._token_matching(funcs) + return self._token_idx_matching(funcs)[1] def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) @@ -272,19 +272,22 @@ class TokenList(Token): def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] - return self._token_matching(funcs, idx) + return self._token_idx_matching(funcs, idx)[1] def token_matching(self, funcs, idx): - return self._token_matching(funcs, idx) + return self._token_idx_matching(funcs, idx)[1] - def token_idx_prev(self, idx, skip_ws=True): + def token_idx_prev(self, idx, skip_ws=True, skip_cm=False): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no previous token. """ + if idx is None: + return None, None idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or + (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_idx_matching(funcs, idx, reverse=True) def token_prev(self, idx=0, skip_ws=True, skip_cm=False): @@ -313,14 +316,17 @@ class TokenList(Token): (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs, idx) - def token_idx_next(self, idx, skip_ws=True): + # TODO: May need to implement skip_cm for upstream changes. + # TODO: May need to re-add default value to idx + def token_idx_next(self, idx, skip_ws=True, skip_cm=False): """Returns the next token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no next token. """ - if isinstance(idx, int): - idx += 1 # alot of code usage current pre-compensates for this + if idx is None: + return None, None + idx += 1 # alot of code usage current pre-compensates for this try: if not skip_ws: return idx, self.tokens[idx] @@ -374,17 +380,21 @@ class TokenList(Token): def insert_before(self, where, token): """Inserts *token* before *where*.""" + if not isinstance(where, int): + where = self.token_index(where) token.parent = self - self.tokens.insert(self.token_index(where), token) + self.tokens.insert(where, token) def insert_after(self, where, token, skip_ws=True): """Inserts *token* after *where*.""" - next_token = self.token_next(where, skip_ws=skip_ws) + if not isinstance(where, int): + where = self.token_index(where) + nidx, next_ = self.token_idx_next(where, skip_ws=skip_ws) token.parent = self - if next_token is None: + if next_ is None: self.tokens.append(token) else: - self.insert_before(next_token, token) + self.tokens.insert(nidx, token) def has_alias(self): """Returns ``True`` if an alias is present.""" @@ -394,12 +404,13 @@ class TokenList(Token): """Returns the alias for this identifier or ``None``.""" # "name AS alias" - kw = self.token_next_by(m=(T.Keyword, 'AS')) + kw_idx, kw = self.token_idx_next_by(m=(T.Keyword, 'AS')) if kw is not None: - return self._get_first_name(kw, keywords=True) + return self._get_first_name(kw_idx + 1, keywords=True) # "name alias" or "complicated column expression alias" - if len(self.tokens) > 2 and self.token_next_by(t=T.Whitespace): + _, ws = self.token_idx_next_by(t=T.Whitespace) + if len(self.tokens) > 2 and ws is not None: return self._get_first_name(reverse=True) def get_name(self): @@ -414,16 +425,16 @@ class TokenList(Token): def get_real_name(self): """Returns the real name (object name) of this identifier.""" # a.b - dot = self.token_next_by(m=(T.Punctuation, '.')) - return self._get_first_name(dot) + dot_idx, _ = self.token_idx_next_by(m=(T.Punctuation, '.')) + return self._get_first_name(dot_idx) def get_parent_name(self): """Return name of the parent object if any. A parent object is identified by the first occuring dot. """ - dot = self.token_next_by(m=(T.Punctuation, '.')) - prev_ = self.token_prev(dot) + dot_idx, _ = self.token_idx_next_by(m=(T.Punctuation, '.')) + _, prev_ = self.token_idx_prev(dot_idx) return remove_quotes(prev_.value) if prev_ is not None else None def _get_first_name(self, idx=None, reverse=False, keywords=False): @@ -472,9 +483,10 @@ class Statement(TokenList): # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - token = self.token_next(first_token, skip_ws=True) + fidx = self.token_index(first_token) + tidx, token = self.token_idx_next(fidx, skip_ws=True) if isinstance(token, (Identifier, IdentifierList)): - dml_keyword = self.token_next(token, skip_ws=True) + _, dml_keyword = self.token_idx_next(tidx, skip_ws=True) if dml_keyword.ttype == T.Keyword.DML: return dml_keyword.normalized @@ -491,18 +503,18 @@ class Identifier(TokenList): def is_wildcard(self): """Return ``True`` if this identifier contains a wildcard.""" - token = self.token_next_by(t=T.Wildcard) + _, token = self.token_idx_next_by(t=T.Wildcard) return token is not None def get_typecast(self): """Returns the typecast or ``None`` of this object as a string.""" - marker = self.token_next_by(m=(T.Punctuation, '::')) - next_ = self.token_next(marker, skip_ws=False) + midx, marker = self.token_idx_next_by(m=(T.Punctuation, '::')) + nidx, next_ = self.token_idx_next(midx, skip_ws=False) return next_.value if next_ else None def get_ordering(self): """Returns the ordering or ``None`` as uppercase string.""" - ordering = self.token_next_by(t=T.Keyword.Order) + _, ordering = self.token_idx_next_by(t=T.Keyword.Order) return ordering.normalized if ordering else None def get_array_indices(self): @@ -649,7 +661,7 @@ class Function(TokenList): """Return a list of parameters.""" parenthesis = self.tokens[-1] for token in parenthesis.tokens: - if imt(token, i=IdentifierList): + if isinstance(token, IdentifierList): return token.get_identifiers() elif imt(token, i=(Function, Identifier), t=T.Literal): return [token, ] diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 147162f..4f904cf 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -128,11 +128,11 @@ class TestGrouping(TestCaseBase): p = sqlparse.parse("select * from (" "select a, b + c as d from table) sub")[0] subquery = p.tokens[-1].tokens[0] - iden_list = subquery.token_next_by(i=sql.IdentifierList) + idx, iden_list = subquery.token_idx_next_by(i=sql.IdentifierList) self.assert_(iden_list is not None) # all the identifiers should be within the IdentifierList - self.assert_(subquery.token_next_by(i=sql.Identifier, - idx=iden_list) is None) + _, ilist = subquery.token_idx_next_by(i=sql.Identifier, idx=idx) + self.assert_(ilist is None) def test_identifier_list_case(self): p = sqlparse.parse('a, case when 1 then 2 else 3 end as b, c')[0] -- cgit v1.2.1 From 711744d1664f8244d8ab0b090cbf12e923101cce Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 21:50:17 -0700 Subject: Remove functions no-longer used --- sqlparse/sql.py | 51 --------------------------------------------------- sqlparse/utils.py | 12 ------------ 2 files changed, 63 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index d1d8e3e..af282a3 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -226,27 +226,6 @@ class TokenList(Token): return idx, token return None, None - def _token_matching(self, funcs, start=0, end=None, reverse=False): - """next token that match functions""" - if start is None: - return None - - if not isinstance(start, int): - start = self.token_index(start) + 1 - - if not isinstance(funcs, (list, tuple)): - funcs = (funcs,) - - if reverse: - iterable = reversed(self.tokens[end:start - 1]) - else: - iterable = self.tokens[start:end] - - for token in iterable: - for func in funcs: - if func(token): - return token - def token_first(self, skip_ws=True, skip_cm=False): """Returns the first child token. @@ -265,10 +244,6 @@ class TokenList(Token): funcs = lambda tk: imt(tk, i, m, t) return self._token_idx_matching(funcs, idx, end) - def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): - funcs = lambda tk: imt(tk, i, m, t) - return self._token_matching(funcs, idx, end) - def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] @@ -290,32 +265,6 @@ class TokenList(Token): (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_idx_matching(funcs, idx, reverse=True) - def token_prev(self, idx=0, skip_ws=True, skip_cm=False): - """Returns the previous token relative to *idx*. - - If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. - ``None`` is returned if there's no previous token. - """ - if isinstance(idx, int): - idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or - (skip_cm and imt(tk, t=T.Comment, i=Comment))) - return self._token_matching(funcs, idx, reverse=True) - - def token_next(self, idx=0, skip_ws=True, skip_cm=False): - """Returns the next token relative to *idx*. - - If called with idx = 0. Returns the first child token. - If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. - If *skip_cm* is ``True`` (default: ``False``), comments are ignored. - ``None`` is returned if there's no next token. - """ - if isinstance(idx, int): - idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or - (skip_cm and imt(tk, t=T.Comment, i=Comment))) - return self._token_matching(funcs, idx) - # TODO: May need to implement skip_cm for upstream changes. # TODO: May need to re-add default value to idx def token_idx_next(self, idx, skip_ws=True, skip_cm=False): diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 59301ff..c3542b8 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -103,18 +103,6 @@ def imt(token, i=None, m=None, t=None): return False -def find_matching(tlist, token, open_pattern, close_pattern): - idx = tlist.token_index(token) if not isinstance(token, int) else token - depth = 0 - for token in tlist.tokens[idx:]: - if token.match(*open_pattern): - depth += 1 - elif token.match(*close_pattern): - depth -= 1 - if depth == 0: - return token - - def consume(iterator, n): """Advance the iterator n-steps ahead. If n is none, consume entirely.""" deque(itertools.islice(iterator, n), maxlen=0) -- cgit v1.2.1 From 4f922d9b6fb68b8281c6b3d93a57a4c84860e06a Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 22:01:53 -0700 Subject: Rename token_idx_ funcs to simply token_ funcs --- examples/column_defs_lowlevel.py | 8 +-- sqlparse/engine/grouping.py | 100 ++++++++++++++++++------------------- sqlparse/filters/aligned_indent.py | 10 ++-- sqlparse/filters/others.py | 14 +++--- sqlparse/filters/reindent.py | 26 +++++----- sqlparse/sql.py | 46 ++++++++--------- tests/test_grouping.py | 4 +- 7 files changed, 104 insertions(+), 104 deletions(-) diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py index 1ebd065..584b3f3 100644 --- a/examples/column_defs_lowlevel.py +++ b/examples/column_defs_lowlevel.py @@ -17,16 +17,16 @@ def extract_definitions(token_list): definitions = [] tmp = [] # grab the first token, ignoring whitespace. idx=1 to skip open ( - tidx, token = token_list.token_idx_next(1) + tidx, token = token_list.token_next(1) while token and not token.match(sqlparse.tokens.Punctuation, ')'): tmp.append(token) # grab the next token, this times including whitespace - tidx, token = token_list.token_idx_next(tidx, skip_ws=False) + tidx, token = token_list.token_next(tidx, skip_ws=False) # split on ",", except when on end of statement if token and token.match(sqlparse.tokens.Punctuation, ','): definitions.append(tmp) tmp = [] - tidx, token = token_list.token_idx_next(tidx) + tidx, token = token_list.token_next(tidx) if tmp and isinstance(tmp[0], sqlparse.sql.Identifier): definitions.append(tmp) return definitions @@ -41,7 +41,7 @@ if __name__ == '__main__': parsed = sqlparse.parse(SQL)[0] # extract the parenthesis which holds column definitions - _, par = parsed.token_idx_next_by(i=sqlparse.sql.Parenthesis) + _, par = parsed.token_next_by(i=sqlparse.sql.Parenthesis) columns = extract_definitions(par) for column in columns: diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 88064cb..a229e3d 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -32,16 +32,16 @@ def _group_left_right(tlist, m, cls, continue tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if valid_left(prev_) and valid_right(next_): if semicolon: # only overwrite if a semicolon present. - snidx, _ = tlist.token_idx_next_by(m=M_SEMICOLON, idx=nidx) + snidx, _ = tlist.token_next_by(m=M_SEMICOLON, idx=nidx) nidx = snidx or nidx # Luckily, this leaves the position of `token` intact. - tlist.group_tokens_between(cls, pidx, nidx, extend=True) + tlist.group_tokens(cls, pidx, nidx, extend=True) def _group_matching(tlist, cls): @@ -64,7 +64,7 @@ def _group_matching(tlist, cls): # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - tlist.group_tokens_between(cls, open_token, token) + tlist.group_tokens(cls, open_token, token) def group_if(tlist): @@ -115,10 +115,10 @@ def group_case(tlist): def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) - tidx, token = tlist.token_idx_next_by(t=T_IDENT) + tidx, token = tlist.token_next_by(t=T_IDENT) while token: - tlist.group_tokens_between(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_idx_next_by(t=T_IDENT, idx=tidx + 1) + tlist.group_tokens(sql.Identifier, tidx, tidx) + tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -133,14 +133,14 @@ def group_period(tlist): def group_arrays(tlist): - tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), + pidx, prev_ = tlist.token_prev(tidx) + if imt(prev_, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tlist.group_tokens_between(sql.Identifier, pidx, tidx, extend=True) + tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx + 1) @recurse(sql.Identifier) @@ -151,18 +151,18 @@ def group_operator(tlist): T_CYCLE = T_NUMERICAL + T_STRING + T_NAME func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard)) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if func(prev_) and func(next_): token.ttype = T.Operator - tlist.group_tokens_between(sql.Operation, pidx, nidx) + tlist.group_tokens(sql.Operation, pidx, nidx) tidx = pidx - tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard), - idx=tidx + 1) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), + idx=tidx + 1) @recurse(sql.IdentifierList) @@ -174,15 +174,15 @@ def group_identifier_list(tlist): func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - tidx, token = tlist.token_idx_next_by(m=M_COMMA) + tidx, token = tlist.token_next_by(m=M_COMMA) while token: - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if func(prev_) and func(next_): - tlist.group_tokens_between(sql.IdentifierList, pidx, nidx, extend=True) + tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -195,23 +195,23 @@ def group_parenthesis(tlist): @recurse(sql.Comment) def group_comments(tlist): - tidx, token = tlist.token_idx_next_by(t=T.Comment) + tidx, token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) if end is not None: eidx = tlist.token_index(end) - eidx, end = tlist.token_idx_prev(eidx, skip_ws=False) - tlist.group_tokens_between(sql.Comment, tidx, eidx) + eidx, end = tlist.token_prev(eidx, skip_ws=False) + tlist.group_tokens(sql.Comment, tidx, eidx) - tidx, token = tlist.token_idx_next_by(t=T.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx + 1) @recurse(sql.Where) def group_where(tlist): - tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - eidx, end = tlist.token_idx_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) + eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) if end is None: end = tlist._groupable_tokens[-1] @@ -219,8 +219,8 @@ def group_where(tlist): end = tlist.tokens[eidx - 1] # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 - tlist.group_tokens_between(sql.Where, tidx, end) - tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) + tlist.group_tokens(sql.Where, tidx, end) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) @recurse() @@ -228,12 +228,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, sql.Operation) - tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - nidx, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): - tlist.group_tokens_between(sql.Identifier, tidx, nidx, extend=True) - tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): @@ -252,34 +252,34 @@ def group_functions(tlist): if has_create and has_table: return - tidx, token = tlist.token_idx_next_by(t=T.Name) + tidx, token = tlist.token_next_by(t=T.Name) while token: - nidx, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_next(tidx) if isinstance(next_, sql.Parenthesis): - tlist.group_tokens_between(sql.Function, tidx, nidx) - tidx, token = tlist.token_idx_next_by(t=T.Name, idx=tidx + 1) + tlist.group_tokens(sql.Function, tidx, nidx) + tidx, token = tlist.token_next_by(t=T.Name, idx=tidx + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" - tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order) + tidx, token = tlist.token_next_by(t=T.Keyword.Order) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if imt(prev, i=sql.Identifier, t=T.Number): - tlist.group_tokens_between(sql.Identifier, pidx, tidx) + pidx, prev_ = tlist.token_prev(tidx) + if imt(prev_, i=sql.Identifier, t=T.Number): + tlist.group_tokens(sql.Identifier, pidx, tidx) tidx = pidx - tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx + 1) @recurse() def align_comments(tlist): - tidx, token = tlist.token_idx_next_by(i=sql.Comment) + tidx, token = tlist.token_next_by(i=sql.Comment) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if isinstance(prev, sql.TokenList): - tlist.group_tokens_between(sql.TokenList, pidx, tidx, extend=True) + pidx, prev_ = tlist.token_prev(tidx) + if isinstance(prev_, sql.TokenList): + tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(i=sql.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx + 1) def group(stmt): diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py index 719b450..ed5e15e 100644 --- a/sqlparse/filters/aligned_indent.py +++ b/sqlparse/filters/aligned_indent.py @@ -46,7 +46,7 @@ class AlignedIndentFilter(object): def _process_parenthesis(self, tlist): # if this isn't a subquery, don't re-indent - _, token = tlist.token_idx_next_by(m=(T.DML, 'SELECT')) + _, token = tlist.token_next_by(m=(T.DML, 'SELECT')) if token is not None: with indent(self): tlist.insert_after(tlist[0], self.nl('SELECT')) @@ -67,7 +67,7 @@ class AlignedIndentFilter(object): offset_ = len('case ') + len('when ') cases = tlist.get_cases(skip_ws=True) # align the end as well - _, end_token = tlist.token_idx_next_by(m=(T.Keyword, 'END')) + _, end_token = tlist.token_next_by(m=(T.Keyword, 'END')) cases.append((None, [end_token])) condition_width = [len(' '.join(map(text_type, cond))) if cond else 0 @@ -88,7 +88,7 @@ class AlignedIndentFilter(object): def _next_token(self, tlist, idx=0): split_words = T.Keyword, self.split_words, True - tidx, token = tlist.token_idx_next_by(m=split_words, idx=idx) + tidx, token = tlist.token_next_by(m=split_words, idx=idx) # treat "BETWEEN x and y" as a single statement if token and token.normalized == 'BETWEEN': tidx, token = self._next_token(tlist, tidx + 1) @@ -113,9 +113,9 @@ class AlignedIndentFilter(object): # process any sub-sub statements for sgroup in tlist.get_sublists(): idx = tlist.token_index(sgroup) - pidx, prev = tlist.token_idx_prev(idx) + pidx, prev_ = tlist.token_prev(idx) # HACK: make "group/order by" work. Longer than max_len. - offset_ = 3 if (prev and prev.match(T.Keyword, 'BY')) else 0 + offset_ = 3 if (prev_ and prev_.match(T.Keyword, 'BY')) else 0 with offset(self, offset_): self._process(sgroup) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index ecde2fe..a23a6c6 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -14,12 +14,12 @@ class StripCommentsFilter(object): def _process(tlist): def get_next_comment(): # TODO(andi) Comment types should be unified, see related issue38 - return tlist.token_idx_next_by(i=sql.Comment, t=T.Comment) + return tlist.token_next_by(i=sql.Comment, t=T.Comment) tidx, token = get_next_comment() while token: - pidx, prev_ = tlist.token_idx_prev(tidx, skip_ws=False) - nidx, next_ = tlist.token_idx_next(tidx, skip_ws=False) + pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) + nidx, next_ = tlist.token_next(tidx, skip_ws=False) # Replace by whitespace if prev and next exist and if they're not # whitespaces. This doesn't apply if prev or next is a paranthesis. if (prev_ is None or next_ is None or @@ -87,19 +87,19 @@ class SpacesAroundOperatorsFilter(object): def _process(tlist): ttypes = (T.Operator, T.Comparison) - tidx, token = tlist.token_idx_next_by(t=ttypes) + tidx, token = tlist.token_next_by(t=ttypes) while token: - nidx, next_ = tlist.token_idx_next(tidx, skip_ws=False) + nidx, next_ = tlist.token_next(tidx, skip_ws=False) if next_ and next_.ttype != T.Whitespace: tlist.insert_after(tidx, sql.Token(T.Whitespace, ' ')) - pidx, prev_ = tlist.token_idx_prev(tidx, skip_ws=False) + pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) if prev_ and prev_.ttype != T.Whitespace: tlist.insert_before(tidx, sql.Token(T.Whitespace, ' ')) tidx += 1 # has to shift since token inserted before it # assert tlist.token_index(token) == tidx - tidx, token = tlist.token_idx_next_by(t=ttypes, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=ttypes, idx=tidx + 1) def process(self, stmt): [self.process(sgroup) for sgroup in stmt.get_sublists()] diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py index d13fdf3..d23a8d5 100644 --- a/sqlparse/filters/reindent.py +++ b/sqlparse/filters/reindent.py @@ -49,7 +49,7 @@ class ReindentFilter(object): 'GROUP', 'ORDER', 'UNION', 'VALUES', 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') m_split = T.Keyword, split_words, True - tidx, token = tlist.token_idx_next_by(m=m_split, idx=idx) + tidx, token = tlist.token_next_by(m=m_split, idx=idx) if token and token.normalized == 'BETWEEN': tidx, token = self._next_token(tlist, tidx + 1) @@ -63,10 +63,10 @@ class ReindentFilter(object): tidx, token = self._next_token(tlist) while token: tidx = tlist.token_index(token) - pidx, prev = tlist.token_idx_prev(tidx, skip_ws=False) - uprev = text_type(prev) + pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) + uprev = text_type(prev_) - if prev and prev.is_whitespace(): + if prev_ and prev_.is_whitespace(): del tlist.tokens[pidx] tidx -= 1 @@ -77,17 +77,17 @@ class ReindentFilter(object): tidx, token = self._next_token(tlist, tidx + 1) def _split_statements(self, tlist): - tidx, token = tlist.token_idx_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) + tidx, token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) while token: - pidx, prev = tlist.token_idx_prev(tidx, skip_ws=False) - if prev and prev.is_whitespace(): + pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) + if prev_ and prev_.is_whitespace(): del tlist.tokens[pidx] tidx -= 1 # only break if it's not the first token - if prev: + if prev_: tlist.insert_before(tidx, self.nl()) tidx += 1 - tidx, token = tlist.token_idx_next_by( + tidx, token = tlist.token_next_by( t=(T.Keyword.DDL, T.Keyword.DML), idx=tidx + 1) def _process(self, tlist): @@ -96,7 +96,7 @@ class ReindentFilter(object): func(tlist) def _process_where(self, tlist): - tidx, token = tlist.token_idx_next_by(m=(T.Keyword, 'WHERE')) + tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE')) # issue121, errors in statement fixed?? tlist.insert_before(tidx, self.nl()) @@ -105,8 +105,8 @@ class ReindentFilter(object): def _process_parenthesis(self, tlist): ttypes = T.Keyword.DML, T.Keyword.DDL - _, is_dml_dll = tlist.token_idx_next_by(t=ttypes) - fidx, first = tlist.token_idx_next_by(m=sql.Parenthesis.M_OPEN) + _, is_dml_dll = tlist.token_next_by(t=ttypes) + fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN) with indent(self, 1 if is_dml_dll else 0): tlist.tokens.insert(0, self.nl()) if is_dml_dll else None @@ -143,7 +143,7 @@ class ReindentFilter(object): # len "when ", "then ", "else " with offset(self, len("WHEN ")): self._process_default(tlist) - end_idx, end = tlist.token_idx_next_by(m=sql.Case.M_CLOSE) + end_idx, end = tlist.token_next_by(m=sql.Case.M_CLOSE) tlist.insert_before(end_idx, self.nl()) def _process_default(self, tlist, stmts=True): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index af282a3..ed56793 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -204,7 +204,7 @@ class TokenList(Token): def _groupable_tokens(self): return self.tokens - def _token_idx_matching(self, funcs, start=0, end=None, reverse=False): + def _token_matching(self, funcs, start=0, end=None, reverse=False): """next token that match functions""" if start is None: return None @@ -238,21 +238,21 @@ class TokenList(Token): # this on is inconsistent, using Comment instead of T.Comment... funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or (skip_cm and imt(tk, t=T.Comment, i=Comment))) - return self._token_idx_matching(funcs)[1] + return self._token_matching(funcs)[1] - def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None): + def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) - return self._token_idx_matching(funcs, idx, end) + return self._token_matching(funcs, idx, end) def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] - return self._token_idx_matching(funcs, idx)[1] + return self._token_matching(funcs, idx)[1] def token_matching(self, funcs, idx): - return self._token_idx_matching(funcs, idx)[1] + return self._token_matching(funcs, idx)[1] - def token_idx_prev(self, idx, skip_ws=True, skip_cm=False): + def token_prev(self, idx, skip_ws=True, skip_cm=False): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. @@ -263,11 +263,11 @@ class TokenList(Token): idx += 1 # alot of code usage current pre-compensates for this funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or (skip_cm and imt(tk, t=T.Comment, i=Comment))) - return self._token_idx_matching(funcs, idx, reverse=True) + return self._token_matching(funcs, idx, reverse=True) # TODO: May need to implement skip_cm for upstream changes. # TODO: May need to re-add default value to idx - def token_idx_next(self, idx, skip_ws=True, skip_cm=False): + def token_next(self, idx, skip_ws=True, skip_cm=False): """Returns the next token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. @@ -293,8 +293,8 @@ class TokenList(Token): start = start if isinstance(start, int) else self.token_index(start) return start + self.tokens[start:].index(token) - def group_tokens_between(self, grp_cls, start, end, include_end=True, - extend=False): + def group_tokens(self, grp_cls, start, end, include_end=True, + extend=False): """Replace tokens by an instance of *grp_cls*.""" if isinstance(start, int): start_idx = start @@ -338,7 +338,7 @@ class TokenList(Token): """Inserts *token* after *where*.""" if not isinstance(where, int): where = self.token_index(where) - nidx, next_ = self.token_idx_next(where, skip_ws=skip_ws) + nidx, next_ = self.token_next(where, skip_ws=skip_ws) token.parent = self if next_ is None: self.tokens.append(token) @@ -353,12 +353,12 @@ class TokenList(Token): """Returns the alias for this identifier or ``None``.""" # "name AS alias" - kw_idx, kw = self.token_idx_next_by(m=(T.Keyword, 'AS')) + kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS')) if kw is not None: return self._get_first_name(kw_idx + 1, keywords=True) # "name alias" or "complicated column expression alias" - _, ws = self.token_idx_next_by(t=T.Whitespace) + _, ws = self.token_next_by(t=T.Whitespace) if len(self.tokens) > 2 and ws is not None: return self._get_first_name(reverse=True) @@ -374,7 +374,7 @@ class TokenList(Token): def get_real_name(self): """Returns the real name (object name) of this identifier.""" # a.b - dot_idx, _ = self.token_idx_next_by(m=(T.Punctuation, '.')) + dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.')) return self._get_first_name(dot_idx) def get_parent_name(self): @@ -382,8 +382,8 @@ class TokenList(Token): A parent object is identified by the first occuring dot. """ - dot_idx, _ = self.token_idx_next_by(m=(T.Punctuation, '.')) - _, prev_ = self.token_idx_prev(dot_idx) + dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.')) + _, prev_ = self.token_prev(dot_idx) return remove_quotes(prev_.value) if prev_ is not None else None def _get_first_name(self, idx=None, reverse=False, keywords=False): @@ -433,9 +433,9 @@ class Statement(TokenList): # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. fidx = self.token_index(first_token) - tidx, token = self.token_idx_next(fidx, skip_ws=True) + tidx, token = self.token_next(fidx, skip_ws=True) if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_idx_next(tidx, skip_ws=True) + _, dml_keyword = self.token_next(tidx, skip_ws=True) if dml_keyword.ttype == T.Keyword.DML: return dml_keyword.normalized @@ -452,18 +452,18 @@ class Identifier(TokenList): def is_wildcard(self): """Return ``True`` if this identifier contains a wildcard.""" - _, token = self.token_idx_next_by(t=T.Wildcard) + _, token = self.token_next_by(t=T.Wildcard) return token is not None def get_typecast(self): """Returns the typecast or ``None`` of this object as a string.""" - midx, marker = self.token_idx_next_by(m=(T.Punctuation, '::')) - nidx, next_ = self.token_idx_next(midx, skip_ws=False) + midx, marker = self.token_next_by(m=(T.Punctuation, '::')) + nidx, next_ = self.token_next(midx, skip_ws=False) return next_.value if next_ else None def get_ordering(self): """Returns the ordering or ``None`` as uppercase string.""" - _, ordering = self.token_idx_next_by(t=T.Keyword.Order) + _, ordering = self.token_next_by(t=T.Keyword.Order) return ordering.normalized if ordering else None def get_array_indices(self): diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 4f904cf..272d266 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -128,10 +128,10 @@ class TestGrouping(TestCaseBase): p = sqlparse.parse("select * from (" "select a, b + c as d from table) sub")[0] subquery = p.tokens[-1].tokens[0] - idx, iden_list = subquery.token_idx_next_by(i=sql.IdentifierList) + idx, iden_list = subquery.token_next_by(i=sql.IdentifierList) self.assert_(iden_list is not None) # all the identifiers should be within the IdentifierList - _, ilist = subquery.token_idx_next_by(i=sql.Identifier, idx=idx) + _, ilist = subquery.token_next_by(i=sql.Identifier, idx=idx) self.assert_(ilist is None) def test_identifier_list_case(self): -- cgit v1.2.1 From 5002bfa36c4fa2ee72eff18648b6ddc616b718f0 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 22:20:29 -0700 Subject: Normalize behavior between token_next and token_next_by both will now return the "next" token and not itself when passing own index --- sqlparse/engine/grouping.py | 25 ++++++++++++------------- sqlparse/filters/aligned_indent.py | 8 ++++---- sqlparse/filters/others.py | 2 +- sqlparse/filters/reindent.py | 14 +++++++------- sqlparse/sql.py | 3 ++- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index a229e3d..e7072d0 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -118,7 +118,7 @@ def group_identifier(tlist): tidx, token = tlist.token_next_by(t=T_IDENT) while token: tlist.group_tokens(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) def group_period(tlist): @@ -140,7 +140,7 @@ def group_arrays(tlist): t=(T.Name, T.String.Symbol,)): tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx) @recurse(sql.Identifier) @@ -161,8 +161,7 @@ def group_operator(tlist): tlist.group_tokens(sql.Operation, pidx, nidx) tidx = pidx - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), - idx=tidx + 1) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tidx) @recurse(sql.IdentifierList) @@ -182,7 +181,7 @@ def group_identifier_list(tlist): if func(prev_) and func(next_): tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) def group_brackets(tlist): @@ -198,20 +197,20 @@ def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx) if end is not None: eidx = tlist.token_index(end) eidx, end = tlist.token_prev(eidx, skip_ws=False) tlist.group_tokens(sql.Comment, tidx, eidx) - tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx) @recurse(sql.Where) def group_where(tlist): tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) + eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx) if end is None: end = tlist._groupable_tokens[-1] @@ -220,7 +219,7 @@ def group_where(tlist): # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 tlist.group_tokens(sql.Where, tidx, end) - tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx) @recurse() @@ -233,7 +232,7 @@ def group_aliased(tlist): nidx, next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True) - tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx) def group_typecasts(tlist): @@ -257,7 +256,7 @@ def group_functions(tlist): nidx, next_ = tlist.token_next(tidx) if isinstance(next_, sql.Parenthesis): tlist.group_tokens(sql.Function, tidx, nidx) - tidx, token = tlist.token_next_by(t=T.Name, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Name, idx=tidx) def group_order(tlist): @@ -268,7 +267,7 @@ def group_order(tlist): if imt(prev_, i=sql.Identifier, t=T.Number): tlist.group_tokens(sql.Identifier, pidx, tidx) tidx = pidx - tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx) @recurse() @@ -279,7 +278,7 @@ def align_comments(tlist): if isinstance(prev_, sql.TokenList): tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx) def group(stmt): diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py index ed5e15e..2fea4d2 100644 --- a/sqlparse/filters/aligned_indent.py +++ b/sqlparse/filters/aligned_indent.py @@ -86,14 +86,14 @@ class AlignedIndentFilter(object): max_cond_width - condition_width[i])) tlist.insert_after(cond[-1], ws) - def _next_token(self, tlist, idx=0): + def _next_token(self, tlist, idx=-1): split_words = T.Keyword, self.split_words, True tidx, token = tlist.token_next_by(m=split_words, idx=idx) # treat "BETWEEN x and y" as a single statement if token and token.normalized == 'BETWEEN': - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) if token and token.normalized == 'AND': - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) return tidx, token def _split_kwds(self, tlist): @@ -106,7 +106,7 @@ class AlignedIndentFilter(object): token_indent = text_type(token) tlist.insert_before(token, self.nl(token_indent)) tidx += 1 - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) def _process_default(self, tlist): self._split_kwds(tlist) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index a23a6c6..9d4a1d1 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -99,7 +99,7 @@ class SpacesAroundOperatorsFilter(object): tidx += 1 # has to shift since token inserted before it # assert tlist.token_index(token) == tidx - tidx, token = tlist.token_next_by(t=ttypes, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) def process(self, stmt): [self.process(sgroup) for sgroup in stmt.get_sublists()] diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py index d23a8d5..1b539c6 100644 --- a/sqlparse/filters/reindent.py +++ b/sqlparse/filters/reindent.py @@ -44,7 +44,7 @@ class ReindentFilter(object): def nl(self): return sql.Token(T.Whitespace, self.n + self.char * self.leading_ws) - def _next_token(self, tlist, idx=0): + def _next_token(self, tlist, idx=-1): split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR', 'GROUP', 'ORDER', 'UNION', 'VALUES', 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') @@ -52,10 +52,10 @@ class ReindentFilter(object): tidx, token = tlist.token_next_by(m=m_split, idx=idx) if token and token.normalized == 'BETWEEN': - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) if token and token.normalized == 'AND': - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) return tidx, token @@ -74,10 +74,11 @@ class ReindentFilter(object): tlist.insert_before(tidx, self.nl()) tidx += 1 - tidx, token = self._next_token(tlist, tidx + 1) + tidx, token = self._next_token(tlist, tidx) def _split_statements(self, tlist): - tidx, token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) + ttypes = T.Keyword.DML, T.Keyword.DDL + tidx, token = tlist.token_next_by(t=ttypes) while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) if prev_ and prev_.is_whitespace(): @@ -87,8 +88,7 @@ class ReindentFilter(object): if prev_: tlist.insert_before(tidx, self.nl()) tidx += 1 - tidx, token = tlist.token_next_by( - t=(T.Keyword.DDL, T.Keyword.DML), idx=tidx + 1) + tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) def _process(self, tlist): func_name = '_process_{cls}'.format(cls=type(tlist).__name__) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index ed56793..4b6abf1 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -240,8 +240,9 @@ class TokenList(Token): (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs)[1] - def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): + def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None): funcs = lambda tk: imt(tk, i, m, t) + idx += 1 return self._token_matching(funcs, idx, end) def token_not_matching(self, funcs, idx): -- cgit v1.2.1 From 56b28dc15023d36bab8764bea6df75e28651646e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 04:15:27 -0700 Subject: Make use of token_index more obvious --- sqlparse/engine/grouping.py | 10 ++++++---- sqlparse/filters/reindent.py | 1 - sqlparse/sql.py | 13 +++---------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index e7072d0..c52a759 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -64,7 +64,9 @@ def _group_matching(tlist, cls): # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - tlist.group_tokens(cls, open_token, token) + oidx = tlist.token_index(open_token) + cidx = tlist.token_index(token) + tlist.group_tokens(cls, oidx, cidx) def group_if(tlist): @@ -196,10 +198,9 @@ def group_parenthesis(tlist): def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) while token: - end = tlist.token_not_matching( + eidx, end = tlist.token_not_matching( lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx) if end is not None: - eidx = tlist.token_index(end) eidx, end = tlist.token_prev(eidx, skip_ws=False) tlist.group_tokens(sql.Comment, tidx, eidx) @@ -218,7 +219,8 @@ def group_where(tlist): end = tlist.tokens[eidx - 1] # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 - tlist.group_tokens(sql.Where, tidx, end) + eidx = tlist.token_index(end) + tlist.group_tokens(sql.Where, tidx, eidx) tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx) diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py index 1b539c6..68595a5 100644 --- a/sqlparse/filters/reindent.py +++ b/sqlparse/filters/reindent.py @@ -62,7 +62,6 @@ class ReindentFilter(object): def _split_kwds(self, tlist): tidx, token = self._next_token(tlist) while token: - tidx = tlist.token_index(token) pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) uprev = text_type(prev_) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 4b6abf1..9656390 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -248,7 +248,7 @@ class TokenList(Token): def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] - return self._token_matching(funcs, idx)[1] + return self._token_matching(funcs, idx) def token_matching(self, funcs, idx): return self._token_matching(funcs, idx)[1] @@ -297,13 +297,9 @@ class TokenList(Token): def group_tokens(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" - if isinstance(start, int): - start_idx = start - start = self.tokens[start_idx] - else: - start_idx = self.token_index(start) + start_idx = start + start = self.tokens[start_idx] - end = end if isinstance(end, int) else self.token_index(end, start_idx) end_idx = end + include_end # will be needed later for new group_clauses @@ -390,9 +386,6 @@ class TokenList(Token): def _get_first_name(self, idx=None, reverse=False, keywords=False): """Returns the name of the first token with a name""" - if idx and not isinstance(idx, int): - idx = self.token_index(idx) + 1 - tokens = self.tokens[idx:] if idx else self.tokens tokens = reversed(tokens) if reverse else tokens types = [T.Name, T.Wildcard, T.String.Symbol] -- cgit v1.2.1 From af9b82e0b2d00732704fedf7d7b03dcb598dca84 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 06:26:41 -0700 Subject: Reorder grouping code and func call order Remove repeated for-each/for grouping --- sqlparse/engine/grouping.py | 133 ++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 67 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index c52a759..7879f76 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -9,41 +9,11 @@ from sqlparse import sql from sqlparse import tokens as T from sqlparse.utils import recurse, imt -M_ROLE = (T.Keyword, ('null', 'role')) -M_SEMICOLON = (T.Punctuation, ';') -M_COMMA = (T.Punctuation, ',') - T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float) T_STRING = (T.String, T.String.Single, T.String.Symbol) T_NAME = (T.Name, T.Name.Placeholder) -def _group_left_right(tlist, m, cls, - valid_left=lambda t: t is not None, - valid_right=lambda t: t is not None, - semicolon=False): - """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): - if token.is_group() and not isinstance(token, cls): - _group_left_right(token, m, cls, valid_left, valid_right, - semicolon) - continue - if not token.match(*m): - continue - - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) - - if valid_left(prev_) and valid_right(next_): - if semicolon: - # only overwrite if a semicolon present. - snidx, _ = tlist.token_next_by(m=M_SEMICOLON, idx=nidx) - nidx = snidx or nidx - # Luckily, this leaves the position of `token` intact. - tlist.group_tokens(cls, pidx, nidx, extend=True) - - def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" opens = [] @@ -69,6 +39,18 @@ def _group_matching(tlist, cls): tlist.group_tokens(cls, oidx, cidx) +def group_brackets(tlist): + _group_matching(tlist, sql.SquareBrackets) + + +def group_parenthesis(tlist): + _group_matching(tlist, sql.Parenthesis) + + +def group_case(tlist): + _group_matching(tlist, sql.Case) + + def group_if(tlist): _group_matching(tlist, sql.If) @@ -77,16 +59,54 @@ def group_for(tlist): _group_matching(tlist, sql.For) -def group_foreach(tlist): - _group_matching(tlist, sql.For) - - def group_begin(tlist): _group_matching(tlist, sql.Begin) +def _group_left_right(tlist, m, cls, + valid_left=lambda t: t is not None, + valid_right=lambda t: t is not None, + semicolon=False): + """Groups together tokens that are joined by a middle token. ie. x < y""" + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group_left_right(token, m, cls, valid_left, valid_right, + semicolon) + continue + if not token.match(*m): + continue + + tidx = tlist.token_index(token) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) + + if valid_left(prev_) and valid_right(next_): + if semicolon: + # only overwrite if a semicolon present. + m_semicolon = T.Punctuation, ';' + snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) + nidx = snidx or nidx + # Luckily, this leaves the position of `token` intact. + tlist.group_tokens(cls, pidx, nidx, extend=True) + + +def group_typecasts(tlist): + _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) + + +def group_period(tlist): + lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), + t=(T.Name, T.String.Symbol,)) + + rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), + t=(T.Name, T.String.Symbol, T.Wildcard)) + + _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, + valid_left=lfunc, valid_right=rfunc) + + def group_as(tlist): - lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.value == 'NULL' + lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.normalized == 'NULL' rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL)) _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier, valid_left=lfunc, valid_right=rfunc) @@ -109,10 +129,6 @@ def group_comparison(tlist): valid_left=func, valid_right=func) -def group_case(tlist): - _group_matching(tlist, sql.Case) - - @recurse(sql.Identifier) def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) @@ -123,17 +139,6 @@ def group_identifier(tlist): tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) -def group_period(tlist): - lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), - t=(T.Name, T.String.Symbol,)) - - rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), - t=(T.Name, T.String.Symbol, T.Wildcard)) - - _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) - - def group_arrays(tlist): tidx, token = tlist.token_next_by(i=sql.SquareBrackets) while token: @@ -168,6 +173,9 @@ def group_operator(tlist): @recurse(sql.IdentifierList) def group_identifier_list(tlist): + M_ROLE = T.Keyword, ('null', 'role') + M_COMMA = T.Punctuation, ',' + I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, sql.IdentifierList, sql.Operation) T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + @@ -186,14 +194,6 @@ def group_identifier_list(tlist): tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) -def group_brackets(tlist): - _group_matching(tlist, sql.SquareBrackets) - - -def group_parenthesis(tlist): - _group_matching(tlist, sql.Parenthesis) - - @recurse(sql.Comment) def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) @@ -237,10 +237,6 @@ def group_aliased(tlist): tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx) -def group_typecasts(tlist): - _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) - - @recurse(sql.Function) def group_functions(tlist): has_create = False @@ -286,11 +282,17 @@ def align_comments(tlist): def group(stmt): for func in [ group_comments, + + # _group_matching group_brackets, group_parenthesis, + group_case, + group_if, + group_for, + group_begin, + group_functions, group_where, - group_case, group_period, group_arrays, group_identifier, @@ -301,12 +303,9 @@ def group(stmt): group_aliased, group_assignment, group_comparison, + align_comments, group_identifier_list, - group_if, - group_for, - group_foreach, - group_begin, ]: func(stmt) return stmt -- cgit v1.2.1 From 74b3464d781cbad4c39cd082daa80334aa7aed78 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 21:20:31 -0700 Subject: Re-Write grouping functions --- sqlparse/engine/grouping.py | 76 ++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 7879f76..ae214c2 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -152,46 +152,42 @@ def group_arrays(tlist): @recurse(sql.Identifier) def group_operator(tlist): - I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + ttypes = T_NUMERICAL + T_STRING + T_NAME + clss = (sql.SquareBrackets, sql.Parenthesis, sql.Function, sql.Identifier, sql.Operation) - # wilcards wouldn't have operations next to them - T_CYCLE = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) - while token: - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) + def match(token): + return imt(token, t=(T.Operator, T.Wildcard)) - if func(prev_) and func(next_): - token.ttype = T.Operator - tlist.group_tokens(sql.Operation, pidx, nidx) - tidx = pidx + def valid(token): + return imt(token, i=clss, t=ttypes) + + def post(tlist, pidx, tidx, nidx): + tlist[tidx].ttype = T.Operator + return pidx, nidx - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tidx) + _group(tlist, sql.Operation, match, valid, valid, post, extend=False) -@recurse(sql.IdentifierList) def group_identifier_list(tlist): - M_ROLE = T.Keyword, ('null', 'role') - M_COMMA = T.Punctuation, ',' + m_role = T.Keyword, ('null', 'role') + m_comma = T.Punctuation, ',' + clss = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, + sql.IdentifierList, sql.Operation) + ttypes = (T_NUMERICAL + T_STRING + T_NAME + + (T.Keyword, T.Comment, T.Wildcard)) - I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, - sql.IdentifierList, sql.Operation) - T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + - (T.Keyword, T.Comment, T.Wildcard)) + def match(token): + return imt(token, m=m_comma) - func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) + def func(token): + return imt(token, i=clss, m=m_role, t=ttypes) - tidx, token = tlist.token_next_by(m=M_COMMA) - while token: - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - if func(prev_) and func(next_): - tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) - tidx = pidx - tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) + _group(tlist, sql.IdentifierList, match, + valid_left=func, valid_right=func, post=post, extend=True) @recurse(sql.Comment) @@ -309,3 +305,25 @@ def group(stmt): ]: func(stmt) return stmt + + +def _group(tlist, cls, match, + valid_left=lambda t: True, + valid_right=lambda t: True, + post=None, + extend=True): + """Groups together tokens that are joined by a middle token. ie. x < y""" + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group(token, cls, match, valid_left, valid_right, post, extend) + continue + if not match(token): + continue + + tidx = tlist.token_index(token) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) + + if valid_left(prev_) and valid_right(next_): + from_idx, to_idx = post(tlist, pidx, tidx, nidx) + tlist.group_tokens(cls, from_idx, to_idx, extend=extend) -- cgit v1.2.1 From 0acaa4c57d7169f3903af3c3df4faf95d2cbea84 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 02:39:54 -0700 Subject: Reduce calls by _group to get tk idx --- sqlparse/engine/grouping.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ae214c2..a74f6f8 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -313,17 +313,28 @@ def _group(tlist, cls, match, post=None, extend=True): """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): + + tidx_offset = 0 + pidx, prev_ = None, None + for idx, token in enumerate(list(tlist)): + tidx = idx - tidx_offset + + if token.is_whitespace(): + continue if token.is_group() and not isinstance(token, cls): _group(token, cls, match, valid_left, valid_right, post, extend) + pidx, prev_ = tidx, token continue if not match(token): + pidx, prev_ = tidx, token continue - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) nidx, next_ = tlist.token_next(tidx) if valid_left(prev_) and valid_right(next_): from_idx, to_idx = post(tlist, pidx, tidx, nidx) - tlist.group_tokens(cls, from_idx, to_idx, extend=extend) + grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) + tidx_offset += to_idx - from_idx + pidx, prev_ = from_idx, grp + else: + pidx, prev_ = tidx, token -- cgit v1.2.1 From 49979e9ca1159190320e2faad989c8bd267c8000 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 12:36:45 -0700 Subject: Refactor _group's prev token logic --- sqlparse/engine/grouping.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index a74f6f8..1be4f53 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -321,20 +321,18 @@ def _group(tlist, cls, match, if token.is_whitespace(): continue + if token.is_group() and not isinstance(token, cls): _group(token, cls, match, valid_left, valid_right, post, extend) - pidx, prev_ = tidx, token - continue - if not match(token): - pidx, prev_ = tidx, token - continue - nidx, next_ = tlist.token_next(tidx) + if match(token): + nidx, next_ = tlist.token_next(tidx) + if valid_left(prev_) and valid_right(next_): + from_idx, to_idx = post(tlist, pidx, tidx, nidx) + grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) - if valid_left(prev_) and valid_right(next_): - from_idx, to_idx = post(tlist, pidx, tidx, nidx) - grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) - tidx_offset += to_idx - from_idx - pidx, prev_ = from_idx, grp - else: - pidx, prev_ = tidx, token + tidx_offset += to_idx - from_idx + pidx, prev_ = from_idx, grp + continue + + pidx, prev_ = tidx, token -- cgit v1.2.1 From a653650432b76447255e69cd93ba2d2e2c34d037 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 03:04:41 -0700 Subject: remove extra recurse and rename vars # Conflicts: # sqlparse/engine/grouping.py --- sqlparse/engine/grouping.py | 73 ++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 1be4f53..b0b8836 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -118,11 +118,11 @@ def group_assignment(tlist): def group_comparison(tlist): - I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier, + sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier, sql.Operation) - T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME + ttypes = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: (imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or + func = lambda tk: (imt(tk, t=ttypes, i=sqlcls) or (tk and tk.is_keyword and tk.normalized == 'NULL')) _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, @@ -131,63 +131,74 @@ def group_comparison(tlist): @recurse(sql.Identifier) def group_identifier(tlist): - T_IDENT = (T.String.Symbol, T.Name) + ttypes = (T.String.Symbol, T.Name) - tidx, token = tlist.token_next_by(t=T_IDENT) + tidx, token = tlist.token_next_by(t=ttypes) while token: tlist.group_tokens(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) + tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) def group_arrays(tlist): - tidx, token = tlist.token_next_by(i=sql.SquareBrackets) - while token: - pidx, prev_ = tlist.token_prev(tidx) - if imt(prev_, i=(sql.SquareBrackets, sql.Identifier, sql.Function), - t=(T.Name, T.String.Symbol,)): - tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) - tidx = pidx - tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx) + sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function + ttypes = T.Name, T.String.Symbol + + def match(token): + return isinstance(token, sql.SquareBrackets) + + def valid_prev(token): + return imt(token, i=sqlcls, t=ttypes) + + def valid_next(token): + return True + + def post(tlist, pidx, tidx, nidx): + return pidx, tidx + + _group(tlist, sql.Identifier, match, + valid_prev, valid_next, post, extend=True, recurse=False) -@recurse(sql.Identifier) def group_operator(tlist): ttypes = T_NUMERICAL + T_STRING + T_NAME - clss = (sql.SquareBrackets, sql.Parenthesis, sql.Function, - sql.Identifier, sql.Operation) + sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + sql.Identifier, sql.Operation) def match(token): return imt(token, t=(T.Operator, T.Wildcard)) def valid(token): - return imt(token, i=clss, t=ttypes) + return imt(token, i=sqlcls, t=ttypes) def post(tlist, pidx, tidx, nidx): tlist[tidx].ttype = T.Operator return pidx, nidx - _group(tlist, sql.Operation, match, valid, valid, post, extend=False) + valid_prev = valid_next = valid + _group(tlist, sql.Operation, match, + valid_prev, valid_next, post, extend=False) def group_identifier_list(tlist): m_role = T.Keyword, ('null', 'role') m_comma = T.Punctuation, ',' - clss = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, - sql.IdentifierList, sql.Operation) + sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, + sql.IdentifierList, sql.Operation) ttypes = (T_NUMERICAL + T_STRING + T_NAME + (T.Keyword, T.Comment, T.Wildcard)) def match(token): return imt(token, m=m_comma) - def func(token): - return imt(token, i=clss, m=m_role, t=ttypes) + def valid(token): + return imt(token, i=sqlcls, m=m_role, t=ttypes) def post(tlist, pidx, tidx, nidx): return pidx, nidx + valid_prev = valid_next = valid _group(tlist, sql.IdentifierList, match, - valid_left=func, valid_right=func, post=post, extend=True) + valid_prev, valid_next, post, extend=True) @recurse(sql.Comment) @@ -308,10 +319,12 @@ def group(stmt): def _group(tlist, cls, match, - valid_left=lambda t: True, - valid_right=lambda t: True, + valid_prev=lambda t: True, + valid_next=lambda t: True, post=None, - extend=True): + extend=True, + recurse=True + ): """Groups together tokens that are joined by a middle token. ie. x < y""" tidx_offset = 0 @@ -322,12 +335,12 @@ def _group(tlist, cls, match, if token.is_whitespace(): continue - if token.is_group() and not isinstance(token, cls): - _group(token, cls, match, valid_left, valid_right, post, extend) + if recurse and token.is_group() and not isinstance(token, cls): + _group(token, cls, match, valid_prev, valid_next, post, extend) if match(token): nidx, next_ = tlist.token_next(tidx) - if valid_left(prev_) and valid_right(next_): + if valid_prev(prev_) and valid_next(next_): from_idx, to_idx = post(tlist, pidx, tidx, nidx) grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) -- cgit v1.2.1 From d6763dc7592f6e60c3e2e712b39a1b865fc8485e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 03:50:36 -0700 Subject: Change grouping from _left_right to _group --- sqlparse/engine/grouping.py | 89 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index b0b8836..2148b50 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -91,42 +91,95 @@ def _group_left_right(tlist, m, cls, def group_typecasts(tlist): - _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) + def match(token): + return token.match(T.Punctuation, '::') + + def valid(token): + return token is not None + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx + + valid_prev = valid_next = valid + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_period(tlist): - lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), - t=(T.Name, T.String.Symbol,)) + def match(token): + return token.match(T.Punctuation, '.') + + def valid_prev(token): + sqlcls = sql.SquareBrackets, sql.Identifier + ttypes = T.Name, T.String.Symbol + return imt(token, i=sqlcls, t=ttypes) + + def valid_next(token): + sqlcls = sql.SquareBrackets, sql.Function + ttypes = T.Name, T.String.Symbol, T.Wildcard + return imt(token, i=sqlcls, t=ttypes) - rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), - t=(T.Name, T.String.Symbol, T.Wildcard)) + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_as(tlist): - lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.normalized == 'NULL' - rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL)) - _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) + def match(token): + return token.is_keyword and token.normalized == 'AS' + + def valid_prev(token): + return token.normalized == 'NULL' or not token.is_keyword + + def valid_next(token): + ttypes = T.DML, T.DDL + return not imt(token, t=ttypes) + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx + + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_assignment(tlist): - _group_left_right(tlist, (T.Assignment, ':='), sql.Assignment, - semicolon=True) + def match(token): + return token.match(T.Assignment, ':=') + + def valid(token): + return token is not None + + def post(tlist, pidx, tidx, nidx): + m_semicolon = T.Punctuation, ';' + snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) + nidx = snidx or nidx + return pidx, nidx + + valid_prev = valid_next = valid + _group(tlist, sql.Assignment, match, valid_prev, valid_next, post) def group_comparison(tlist): sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier, - sql.Operation) + sql.Operation) ttypes = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: (imt(tk, t=ttypes, i=sqlcls) or - (tk and tk.is_keyword and tk.normalized == 'NULL')) + def match(token): + return token.ttype == T.Operator.Comparison + + def valid(token): + if imt(token, t=ttypes, i=sqlcls): + return True + elif token and token.is_keyword and token.normalized == 'NULL': + return True + else: + return False + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, - valid_left=func, valid_right=func) + valid_prev = valid_next = valid + _group(tlist, sql.Comparison, match, + valid_prev, valid_next, post, extend=False) @recurse(sql.Identifier) -- cgit v1.2.1 From 228059eb4da5ed2389fc7e987dba37c6d05ea3ea Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 13:59:29 -0700 Subject: remove group left_right --- sqlparse/engine/grouping.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 2148b50..b2b46bb 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -63,33 +63,6 @@ def group_begin(tlist): _group_matching(tlist, sql.Begin) -def _group_left_right(tlist, m, cls, - valid_left=lambda t: t is not None, - valid_right=lambda t: t is not None, - semicolon=False): - """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): - if token.is_group() and not isinstance(token, cls): - _group_left_right(token, m, cls, valid_left, valid_right, - semicolon) - continue - if not token.match(*m): - continue - - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) - - if valid_left(prev_) and valid_right(next_): - if semicolon: - # only overwrite if a semicolon present. - m_semicolon = T.Punctuation, ';' - snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) - nidx = snidx or nidx - # Luckily, this leaves the position of `token` intact. - tlist.group_tokens(cls, pidx, nidx, extend=True) - - def group_typecasts(tlist): def match(token): return token.match(T.Punctuation, '::') -- cgit v1.2.1 From caefd84779d1115b6775ea7c0039f969f3b10d8a Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 10:52:35 -0700 Subject: Change group_matching back to idx --- sqlparse/engine/grouping.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index b2b46bb..62f37a6 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -17,7 +17,16 @@ T_NAME = (T.Name, T.Name.Placeholder) def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" opens = [] - for token in list(tlist): + tidx_offset = 0 + for idx, token in enumerate(list(tlist)): + tidx = idx - tidx_offset + + if token.is_whitespace(): + # ~50% of tokens will be whitespace. Will checking early + # for them avoid 3 comparisons, but then add 1 more comparison + # for the other ~50% of tokens... + continue + if token.is_group() and not isinstance(token, cls): # Check inside previously grouped (ie. parenthesis) if group # of differnt type is inside (ie, case). though ideally should @@ -26,17 +35,18 @@ def _group_matching(tlist, cls): continue if token.match(*cls.M_OPEN): - opens.append(token) + opens.append(tidx) + elif token.match(*cls.M_CLOSE): try: - open_token = opens.pop() + open_idx = opens.pop() except IndexError: # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - oidx = tlist.token_index(open_token) - cidx = tlist.token_index(token) - tlist.group_tokens(cls, oidx, cidx) + close_idx = tidx + tlist.group_tokens(cls, open_idx, close_idx) + tidx_offset += close_idx - open_idx def group_brackets(tlist): -- cgit v1.2.1 From 9fcf1f2cda629cdf11a8a4ac596fb7cae0e89de9 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 15:40:27 -0700 Subject: Update AUTHORS and CHANGELOG --- AUTHORS | 1 + CHANGELOG | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 3d3bc26..3893c8c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,6 +33,7 @@ Alphabetical list of contributors: * Ryan Wooden * saaj * Shen Longxing +* Sjoerd Job Postmus * spigwitmer * Tenghuan * Tim Graham diff --git a/CHANGELOG b/CHANGELOG index 6d22f45..fac9763 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,7 @@ Internal Changes sqlparse.exceptions. * sqlparse.sql.Token.to_unicode was removed. * Lots of code cleanups and modernization (thanks esp. to vmuriart!). +* Improved grouping performance. (sjoerdjob) Enhancements -- cgit v1.2.1