From dba96a147be55be7da657a17a38c21b30ff0e664 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 13 Feb 2013 13:23:52 +0100 Subject: Dynamically change bufsize to parse lenghty tokens faster (fixes #86). --- sqlparse/lexer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'sqlparse/lexer.py') diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index a92e2b7..4cec0db 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -161,7 +161,9 @@ class Lexer(object): stripnl = False tabsize = 0 flags = re.IGNORECASE | re.UNICODE - bufsize = 4096 + DEFAULT_BUFSIZE = 4096 + MAX_BUFSIZE = 2 ** 31 + bufsize = DEFAULT_BUFSIZE tokens = { 'root': [ @@ -322,9 +324,14 @@ class Lexer(object): else: assert False, "wrong state def: %r" % new_state statetokens = tokendefs[statestack[-1]] + # reset bufsize + self.bufsize = self.DEFAULT_BUFSIZE break else: if hasmore: + # we have no match, increase bufsize to parse lengthy + # tokens faster (see #86). + self.bufsize = min(self.bufsize * 2, self.MAX_BUFSIZE) buf = stream.read(self.bufsize) hasmore = len(buf) == self.bufsize text = text[pos:] + self._decode(buf) -- cgit v1.2.1