summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-03-26 20:44:12 +0400
committerAlexander Barkov <bar@mariadb.org>2015-03-26 20:44:12 +0400
commit50eee6050464e4a7737841245956a4d6c9c2dea9 (patch)
tree455da318848564e49cd31786e46ef2d5f0aae135
parent01d7da6785284383b2c04f2d4474feccebb0bb6f (diff)
downloadmariadb-git-50eee6050464e4a7737841245956a4d6c9c2dea9.tar.gz
Preparatory refactoring for:
MDEV-6218 Wrong result of CHAR_LENGTH(non-BMP-character) with 3-byte utf8 - Moving get_text() as a method to Lex_input_stream. - Moving the unescaping part into a separate function, this piece of code will later go to /strings most likely. - Removing Lex_input_string::yytoklen, as it's not needed any more.
-rw-r--r--sql/sql_class.h4
-rw-r--r--sql/sql_lex.cc188
-rw-r--r--sql/sql_lex.h6
3 files changed, 108 insertions, 90 deletions
diff --git a/sql/sql_class.h b/sql/sql_class.h
index f3106edcb23..73637f8d8eb 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -2960,6 +2960,10 @@ public:
return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES));
}
+ inline bool backslash_escapes() const
+ {
+ return !MY_TEST(variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
+ }
inline my_time_t query_start() { query_start_used=1; return start_time; }
inline ulong query_start_sec_part()
{ query_start_sec_part_used=1; return start_time_sec_part; }
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 4e061112128..1e1bd20697b 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -281,7 +281,6 @@ void
Lex_input_stream::reset(char *buffer, unsigned int length)
{
yylineno= 1;
- yytoklen= 0;
yylval= NULL;
lookahead_token= -1;
lookahead_yylval= NULL;
@@ -641,7 +640,7 @@ static LEX_STRING get_token(Lex_input_stream *lip, uint skip, uint length)
{
LEX_STRING tmp;
lip->yyUnget(); // ptr points now after last token char
- tmp.length=lip->yytoklen=length;
+ tmp.length= length;
tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length);
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
@@ -665,7 +664,7 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
const char *from, *end;
char *to;
lip->yyUnget(); // ptr points now after last token char
- tmp.length= lip->yytoklen=length;
+ tmp.length= length;
tmp.str=(char*) lip->m_thd->alloc(tmp.length+1);
from= lip->get_tok_start() + skip;
to= tmp.str;
@@ -687,135 +686,152 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
}
+static size_t
+my_unescape(CHARSET_INFO *cs, char *to, const char *str, const char *end,
+ int sep, bool backslash_escapes)
+{
+ char *start= to;
+ for ( ; str != end ; str++)
+ {
+#ifdef USE_MB
+ int l;
+ if (use_mb(cs) && (l= my_ismbchar(cs, str, end)))
+ {
+ while (l--)
+ *to++ = *str++;
+ str--;
+ continue;
+ }
+#endif
+ if (backslash_escapes && *str == '\\' && str + 1 != end)
+ {
+ switch(*++str) {
+ case 'n':
+ *to++='\n';
+ break;
+ case 't':
+ *to++= '\t';
+ break;
+ case 'r':
+ *to++ = '\r';
+ break;
+ case 'b':
+ *to++ = '\b';
+ break;
+ case '0':
+ *to++= 0; // Ascii null
+ break;
+ case 'Z': // ^Z must be escaped on Win32
+ *to++='\032';
+ break;
+ case '_':
+ case '%':
+ *to++= '\\'; // remember prefix for wildcard
+ /* Fall through */
+ default:
+ *to++= *str;
+ break;
+ }
+ }
+ else if (*str == sep)
+ *to++= *str++; // Two ' or "
+ else
+ *to++ = *str;
+ }
+ *to= 0;
+ return to - start;
+}
+
+
+size_t
+Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
+ const char *str, const char *end,
+ int sep)
+{
+ return my_unescape(cs, to, str, end, sep, m_thd->backslash_escapes());
+}
+
+
/*
Return an unescaped text literal without quotes
Fix sometimes to do only one scan of the string
*/
-static char *get_text(Lex_input_stream *lip, int pre_skip, int post_skip)
+bool Lex_input_stream::get_text(LEX_STRING *dst, int pre_skip, int post_skip)
{
reg1 uchar c,sep;
uint found_escape=0;
- CHARSET_INFO *cs= lip->m_thd->charset();
+ CHARSET_INFO *cs= m_thd->charset();
- lip->tok_bitmap= 0;
- sep= lip->yyGetLast(); // String should end with this
- while (! lip->eof())
+ tok_bitmap= 0;
+ sep= yyGetLast(); // String should end with this
+ while (! eof())
{
- c= lip->yyGet();
- lip->tok_bitmap|= c;
+ c= yyGet();
+ tok_bitmap|= c;
#ifdef USE_MB
{
int l;
if (use_mb(cs) &&
(l = my_ismbchar(cs,
- lip->get_ptr() -1,
- lip->get_end_of_query()))) {
- lip->skip_binary(l-1);
+ get_ptr() -1,
+ get_end_of_query()))) {
+ skip_binary(l-1);
continue;
}
}
#endif
if (c == '\\' &&
- !(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))
+ !(m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))
{ // Escaped character
found_escape=1;
- if (lip->eof())
- return 0;
- lip->yySkip();
+ if (eof())
+ return true;
+ yySkip();
}
else if (c == sep)
{
- if (c == lip->yyGet()) // Check if two separators in a row
+ if (c == yyGet()) // Check if two separators in a row
{
found_escape=1; // duplicate. Remember for delete
continue;
}
else
- lip->yyUnget();
+ yyUnget();
/* Found end. Unescape and return string */
const char *str, *end;
- char *start;
- str= lip->get_tok_start();
- end= lip->get_ptr();
+ str= get_tok_start();
+ end= get_ptr();
/* Extract the text from the token */
str += pre_skip;
end -= post_skip;
DBUG_ASSERT(end >= str);
- if (!(start= (char*) lip->m_thd->alloc((uint) (end-str)+1)))
- return (char*) ""; // Sql_alloc has set error flag
+ if (!(dst->str= (char*) m_thd->alloc((uint) (end - str) + 1)))
+ {
+ dst->str= (char*) ""; // Sql_alloc has set error flag
+ dst->length= 0;
+ return true;
+ }
- lip->m_cpp_text_start= lip->get_cpp_tok_start() + pre_skip;
- lip->m_cpp_text_end= lip->get_cpp_ptr() - post_skip;
+ m_cpp_text_start= get_cpp_tok_start() + pre_skip;
+ m_cpp_text_end= get_cpp_ptr() - post_skip;
if (!found_escape)
{
- lip->yytoklen=(uint) (end-str);
- memcpy(start,str,lip->yytoklen);
- start[lip->yytoklen]=0;
+ memcpy(dst->str, str, dst->length= (end - str));
+ dst->str[dst->length]= 0;
}
else
{
- char *to;
-
- for (to=start ; str != end ; str++)
- {
-#ifdef USE_MB
- int l;
- if (use_mb(cs) &&
- (l = my_ismbchar(cs, str, end))) {
- while (l--)
- *to++ = *str++;
- str--;
- continue;
- }
-#endif
- if (!(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) &&
- *str == '\\' && str+1 != end)
- {
- switch(*++str) {
- case 'n':
- *to++='\n';
- break;
- case 't':
- *to++= '\t';
- break;
- case 'r':
- *to++ = '\r';
- break;
- case 'b':
- *to++ = '\b';
- break;
- case '0':
- *to++= 0; // Ascii null
- break;
- case 'Z': // ^Z must be escaped on Win32
- *to++='\032';
- break;
- case '_':
- case '%':
- *to++= '\\'; // remember prefix for wildcard
- /* Fall through */
- default:
- *to++= *str;
- break;
- }
- }
- else if (*str == sep)
- *to++= *str++; // Two ' or "
- else
- *to++ = *str;
- }
- *to=0;
- lip->yytoklen=(uint) (to-start);
+ dst->length= unescape(cs, dst->str, str, end, sep);
}
- return start;
+ return false;
}
}
- return 0; // unexpected end of query
+ return true; // unexpected end of query
}
@@ -1122,12 +1138,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
/* Found N'string' */
lip->yySkip(); // Skip '
- if (!(yylval->lex_str.str = get_text(lip, 2, 1)))
+ if (lip->get_text(&yylval->lex_str, 2, 1))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
- yylval->lex_str.length= lip->yytoklen;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING);
@@ -1488,12 +1503,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
/* " used for strings */
case MY_LEX_STRING: // Incomplete text string
- if (!(yylval->lex_str.str = get_text(lip, 1, 1)))
+ if (lip->get_text(&yylval->lex_str, 1, 1))
{
state= MY_LEX_CHAR; // Read char by char
break;
}
- yylval->lex_str.length=lip->yytoklen;
lip->body_utf8_append(lip->m_cpp_text_start);
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index a0da5a94c22..03e97b3d54a 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -1804,6 +1804,8 @@ enum enum_comment_state
class Lex_input_stream
{
+ size_t unescape(CHARSET_INFO *cs, char *to,
+ const char *str, const char *end, int sep);
public:
Lex_input_stream()
{
@@ -2088,9 +2090,6 @@ public:
/** Current line number. */
uint yylineno;
- /** Length of the last token parsed. */
- uint yytoklen;
-
/** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval;
@@ -2105,6 +2104,7 @@ public:
/** LALR(2) resolution, value of the look ahead token.*/
LEX_YYSTYPE lookahead_yylval;
+ bool get_text(LEX_STRING *to, int pre_skip, int post_skip);
private:
/** Pointer to the current position in the raw input stream. */
char *m_ptr;