Removing some duplicate code in THD::convert_string() & friends

1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.
author: Alexander Barkov <bar@mariadb.com> 2018-05-11 18:02:16 +0400
committer: Alexander Barkov <bar@mariadb.com> 2018-05-11 18:02:16 +0400
commit: 4a126bf3e1846612852131ee698d6091c777625a (patch)
tree: 636d172dde07c4b7de6904811548dbbf0e15811d /sql/sql_lex.h
parent: af682525a86960480bc9e14d954b15995343ffc8 (diff)
download: mariadb-git-4a126bf3e1846612852131ee698d6091c777625a.tar.gz
1 files changed, 33 insertions, 25 deletions
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 9e476f3a5eb..b78fbcf9d18 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -37,12 +37,16 @@
 
 
 /**
-  A string with metadata.
+  A string with metadata. Usually points to a string in the client
+  character set, but unlike Lex_ident_cli_st (see below) it does not
+  necessarily point to a query fragment. It can also point to memory
+  of other kinds (e.g. an additional THD allocated memory buffer
+  not overlapping with the current query text).
+
   We'll add more flags here eventually, to know if the string has, e.g.:
   - multi-byte characters
   - bad byte sequences
   - backslash escapes:   'a\nb'
-  - separator escapes:   'a''b'
   and reuse the original query fragments instead of making the string
   copy too early, in Lex_input_stream::get_text().
   This will allow to avoid unnecessary copying, as well as
@@ -50,9 +54,30 @@
 */
 struct Lex_string_with_metadata_st: public LEX_CSTRING
 {
+private:
   bool m_is_8bit; // True if the string has 8bit characters
+  char m_quote;   // Quote character, or 0 if not quoted
 public:
   void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
+  void set_metadata(bool is_8bit, char quote)
+  {
+    m_is_8bit= is_8bit;
+    m_quote= quote;
+  }
+  void set(const char *s, size_t len, bool is_8bit, char quote)
+  {
+    str= s;
+    length= len;
+    set_metadata(is_8bit, quote);
+  }
+  void set(const LEX_CSTRING *s, bool is_8bit, char quote)
+  {
+    ((LEX_CSTRING &)*this)= *s;
+    set_metadata(is_8bit, quote);
+  }
+  bool is_8bit() const { return m_is_8bit; }
+  bool is_quoted() const { return m_quote != '\0'; }
+  char quote() const { return m_quote; }
   // Get string repertoire by the 8-bit flag and the character set
   uint repertoire(CHARSET_INFO *cs) const
   {
@@ -71,44 +96,27 @@ public:
   Used to store identifiers in the client character set.
   Points to a query fragment.
 */
-struct Lex_ident_cli_st: public LEX_CSTRING
+struct Lex_ident_cli_st: public Lex_string_with_metadata_st
 {
-private:
-  bool m_is_8bit;
-  char m_quote;
 public:
   void set_keyword(const char *s, size_t len)
   {
-    str= s;
-    length= len;
-    m_is_8bit= false;
-    m_quote= '\0';
+    set(s, len, false, '\0');
   }
   void set_ident(const char *s, size_t len, bool is_8bit)
   {
-    str= s;
-    length= len;
-    m_is_8bit= is_8bit;
-    m_quote= '\0';
+    set(s, len, is_8bit, '\0');
   }
   void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
   {
-    str= s;
-    length= len;
-    m_is_8bit= is_8bit;
-    m_quote= quote;
+    set(s, len, is_8bit, quote);
   }
   void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
   {
-    ((LEX_CSTRING &)*this)= *s;
-    m_is_8bit= is_8bit;
-    m_quote= '\0';
+    set(s, is_8bit, '\0');
   }
   const char *pos() const { return str - is_quoted(); }
   const char *end() const { return str + length + is_quoted(); }
-  bool is_quoted() const { return m_quote != '\0'; }
-  bool is_8bit() const { return m_is_8bit; }
-  char quote() const { return m_quote; }
 };
 
 
@@ -2453,7 +2461,7 @@ public:
   void body_utf8_append(const char *ptr);
   void body_utf8_append(const char *ptr, const char *end_ptr);
   void body_utf8_append_ident(THD *thd,
-                              const LEX_CSTRING *txt,
+                              const Lex_string_with_metadata_st *txt,
                               const char *end_ptr);
   void body_utf8_append_escape(THD *thd,
                                const LEX_CSTRING *txt,
author	Alexander Barkov <bar@mariadb.com>	2018-05-11 18:02:16 +0400
committer	Alexander Barkov <bar@mariadb.com>	2018-05-11 18:02:16 +0400
commit	4a126bf3e1846612852131ee698d6091c777625a (patch)
tree	636d172dde07c4b7de6904811548dbbf0e15811d /sql/sql_lex.h
parent	af682525a86960480bc9e14d954b15995343ffc8 (diff)
download	mariadb-git-4a126bf3e1846612852131ee698d6091c777625a.tar.gz