1 files changed, 620 insertions, 21 deletions
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 7abbc808632..a474d7c25e9 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -2,6 +2,7 @@
 #define SQL_SORT_INCLUDED
 
 /* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2020, MariaDB Corporation.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -19,10 +20,8 @@
 #include "my_base.h"                            /* ha_rows */
 #include <my_sys.h>                             /* qsort2_cmp */
 #include "queues.h"
+#include "sql_class.h"
 
-typedef struct st_buffpek BUFFPEK;
-
-struct SORT_FIELD;
 class Field;
 struct TABLE;
 
@@ -64,50 +63,650 @@ struct BUFFPEK_COMPARE_CONTEXT
 };
 
 
+/**
+  Descriptor for a merge chunk to be sort-merged.
+  A merge chunk is a sequence of pre-sorted records, written to a
+  temporary file. A Merge_chunk instance describes where this chunk is stored
+  in the file, and where it is located when it is in memory.
+
+  It is a POD because
+   - we read/write them from/to files.
+
+  We have accessors (getters/setters) for all struct members.
+ */
+
+struct Merge_chunk {
+public:
+  my_off_t file_position() const { return m_file_position; }
+  void set_file_position(my_off_t val) { m_file_position= val; }
+  void advance_file_position(my_off_t val) { m_file_position+= val; }
+
+  uchar *buffer_start() { return m_buffer_start; }
+  const uchar *buffer_end() const { return m_buffer_end; }
+
+  void set_buffer(uchar *start, uchar *end)
+  {
+    m_buffer_start= start;
+    m_buffer_end= end;
+  }
+  void set_buffer_start(uchar *start)
+  {
+    m_buffer_start= start;
+  }
+  void set_buffer_end(uchar *end)
+  {
+    DBUG_ASSERT(m_buffer_end == NULL || end <= m_buffer_end);
+    m_buffer_end= end;
+  }
+
+  void init_current_key() { m_current_key= m_buffer_start; }
+  uchar *current_key() { return m_current_key; }
+  void advance_current_key(uint val) { m_current_key+= val; }
+
+  void decrement_rowcount(ha_rows val) { m_rowcount-= val; }
+  void set_rowcount(ha_rows val)       { m_rowcount= val; }
+  ha_rows rowcount() const             { return m_rowcount; }
+
+  ha_rows mem_count() const { return m_mem_count; }
+  void set_mem_count(ha_rows val) { m_mem_count= val; }
+  ha_rows decrement_mem_count() { return --m_mem_count; }
+
+  ha_rows max_keys() const { return m_max_keys; }
+  void set_max_keys(ha_rows val) { m_max_keys= val; }
+
+  size_t  buffer_size() const { return m_buffer_end - m_buffer_start; }
+
+  /**
+    Tries to merge *this with *mc, returns true if successful.
+    The assumption is that *this is no longer in use,
+    and the space it has been allocated can be handed over to a
+    buffer which is adjacent to it.
+   */
+  bool merge_freed_buff(Merge_chunk *mc) const
+  {
+    if (mc->m_buffer_end == m_buffer_start)
+    {
+      mc->m_buffer_end= m_buffer_end;
+      mc->m_max_keys+= m_max_keys;
+      return true;
+    }
+    else if (mc->m_buffer_start == m_buffer_end)
+    {
+      mc->m_buffer_start= m_buffer_start;
+      mc->m_max_keys+= m_max_keys;
+      return true;
+    }
+    return false;
+  }
+
+  /// The current key for this chunk
+  uchar *m_current_key= nullptr;
+  /// Current position in the file to be sorted.
+  my_off_t m_file_position= 0;
+  /// Start of main-memory buffer for this chunk.
+  uchar *m_buffer_start= nullptr;
+  /// End of main-memory buffer for this chunk.
+  uchar *m_buffer_end= nullptr;
+  /// Number of unread rows in this chunk.
+  ha_rows m_rowcount= 0;
+  /// Number of rows in the main-memory buffer.
+  ha_rows m_mem_count= 0;
+  /// If we have fixed-size rows: max number of rows in buffer.
+  ha_rows m_max_keys= 0;
+};
+
+typedef Bounds_checked_array<SORT_ADDON_FIELD> Addon_fields_array;
+typedef Bounds_checked_array<SORT_FIELD> Sort_keys_array;
+
+/**
+  This class wraps information about usage of addon fields.
+  An Addon_fields object is used both during packing of data in the filesort
+  buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
+
+  @see documentation for the Sort_addon_field struct.
+  @see documentation for get_addon_fields()
+ */
+class Addon_fields {
+public:
+  Addon_fields(Addon_fields_array arr)
+    : m_field_descriptors(arr),
+      m_addon_buf(),
+      m_addon_buf_length(),
+      m_using_packed_addons(false)
+  {
+    DBUG_ASSERT(!arr.is_null());
+  }
+
+  SORT_ADDON_FIELD *begin() { return m_field_descriptors.begin(); }
+  SORT_ADDON_FIELD *end()   { return m_field_descriptors.end(); }
+
+    /// rr_unpack_from_tempfile needs an extra buffer when unpacking.
+  uchar *allocate_addon_buf(uint sz)
+  {
+    m_addon_buf= (uchar *)my_malloc(PSI_INSTRUMENT_ME, sz, MYF(MY_WME | MY_THREAD_SPECIFIC));
+    if (m_addon_buf)
+      m_addon_buf_length= sz;
+    return m_addon_buf;
+  }
+
+  void free_addon_buff()
+  {
+    my_free(m_addon_buf);
+    m_addon_buf= NULL;
+    m_addon_buf_length= 0;
+  }
+
+  uchar *get_addon_buf() { return m_addon_buf; }
+  uint   get_addon_buf_length() const { return m_addon_buf_length; }
+
+  void set_using_packed_addons(bool val)
+  {
+    m_using_packed_addons= val;
+  }
+
+  bool using_packed_addons() const
+  {
+    return m_using_packed_addons;
+  }
+
+  static bool can_pack_addon_fields(uint record_length)
+  {
+    return (record_length <= (0xFFFF));
+  }
+
+  /**
+    @returns Total number of bytes used for packed addon fields.
+    the size of the length field + size of null bits + sum of field sizes.
+   */
+  static uint read_addon_length(uchar *p)
+  {
+    return size_of_length_field + uint2korr(p);
+  }
+
+  /**
+    Stores the number of bytes used for packed addon fields.
+   */
+  static void store_addon_length(uchar *p, uint sz)
+  {
+    // We actually store the length of everything *after* the length field.
+    int2store(p, sz - size_of_length_field);
+  }
+
+  static const uint size_of_length_field= 2;
+
+private:
+  Addon_fields_array m_field_descriptors;
+
+  uchar    *m_addon_buf;            ///< Buffer for unpacking addon fields.
+  uint      m_addon_buf_length;     ///< Length of the buffer.
+  bool      m_using_packed_addons;  ///< Are we packing the addon fields?
+};
+
+/**
+  This class wraps information about usage of sort keys.
+  A Sort_keys object is used both during packing of data in the filesort
+  buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
+
+  @see SORT_FIELD struct.
+*/
+
+class Sort_keys :public Sql_alloc,
+                 public Sort_keys_array
+{
+public:
+  Sort_keys(SORT_FIELD* arr, size_t count):
+    Sort_keys_array(arr, count),
+    m_using_packed_sortkeys(false),
+    size_of_packable_fields(0),
+    sort_length_with_original_values(0),
+    sort_length_with_memcmp_values(0),
+    parameters_computed(false)
+  {
+    DBUG_ASSERT(!is_null());
+  }
+
+  bool using_packed_sortkeys() const
+  { return m_using_packed_sortkeys; }
+
+  void set_using_packed_sortkeys(bool val)
+  {
+    m_using_packed_sortkeys= val;
+  }
+  void set_size_of_packable_fields(uint len)
+  {
+    size_of_packable_fields= len;
+  }
+
+  uint get_size_of_packable_fields()
+  {
+    return size_of_packable_fields;
+  }
+
+  void set_sort_length_with_original_values(uint len)
+  {
+    sort_length_with_original_values= len;
+  }
+
+  uint get_sort_length_with_original_values()
+  {
+    return sort_length_with_original_values;
+  }
+
+  void set_sort_length_with_memcmp_values(uint len)
+  {
+    sort_length_with_memcmp_values= len;
+  }
+
+  uint get_sort_length_with_memcmp_values()
+  {
+    return sort_length_with_memcmp_values;
+  }
+
+  static void store_sortkey_length(uchar *p, uint sz)
+  {
+    int4store(p, sz - size_of_length_field);
+  }
+
+  static uint read_sortkey_length(uchar *p)
+  {
+    return size_of_length_field + uint4korr(p);
+  }
+
+  void increment_size_of_packable_fields(uint len)
+  {
+    size_of_packable_fields+= len;
+  }
+
+  void increment_original_sort_length(uint len)
+  {
+    sort_length_with_original_values+= len;
+  }
+
+  bool is_parameters_computed() { return parameters_computed; }
+  void set_parameters_computed(bool val) { parameters_computed= val; }
+
+  static const uint size_of_length_field= 4;
+
+private:
+  bool m_using_packed_sortkeys;     // Are we packing sort keys
+  uint size_of_packable_fields;     // Total length bytes for packable columns
+
+  /*
+    The sort length for all the keyparts storing the original values
+  */
+  uint sort_length_with_original_values;
+
+  /*
+    The sort length for all the keyparts storing the mem-comparable images
+  */
+  uint sort_length_with_memcmp_values;
+
+  /*
+    TRUE       parameters(like sort_length_* , size_of_packable_field)
+               are computed
+    FALSE      otherwise.
+  */
+  bool parameters_computed;
+};
+
+
+/**
+PACKED SORT KEYS
+
+Description
+
+In this optimization where we would like the pack the values of the sort key
+inside the sort buffer for each record.
+
+Contents:
+1. Background
+1.1 Implementation details
+2. Solution : Packed Sort Keys
+2.1 Packed key format
+2.2 Which format to use
+3. Special cases
+3.1 Handling very long strings
+3.2 Handling for long binary strings
+3.3 Handling very long strings with Packed sort keys
+4. Sort key columns in addon_fields
+
+1. Background
+Before this optimization of using packed sort keys, filesort() sorted the
+data using mem-comparable keys.
+
+That is, if we wanted to sort by
+
+  ORDER BY col1, col2, ... colN
+then the filesort code would for each row generate one "Sort Key"
+and then sort the rows by their Sort Keys.
+
+The Sort Keys are mem-comparable (that is, are compared by memcmp()) and
+they are of FIXED SIZE. The sort key has the same length regardless of
+what value it represents. This causes INEFFICIENT MEMORY USAGE.
+
+1.1 Implementation details
+
+make_sortkey() is the function that produces a sort key
+from a record.
+
+The function treats Field and Item objects differently.
+
+class Field has:
+
+a) void make_sort_key(uchar *buff, uint length);
+   make_sort_key is a non-virtual function which handles encoding of
+   SQL null values.
+
+b) virtual void sort_string(uchar *buff,uint length)=0;
+    sort_string produces mem-comparable image of the field value
+    for each datatype.
+
+For Items, Type_handler has a virtual function:
+
+  virtual void make_sort_key(uchar *to, Item *item,
+                             const SORT_FIELD_ATTR *sort_field,
+                             Sort_param *param) const= 0;
+  which various datatypes overload.
+
+
+2. SOLUTION: PACKED SORT KEYS
+
+Note that one can have mem-comparable keys are that are not fixed-size.
+MyRocks uses such encoding for example.
+
+However for this optimization it was decided to store the original
+(non-mem-comparable) values instead and use a datatype-aware
+key comparison function.
+
+2.1 Packed key format
+The keys are stored in a new variable-size data format called "packed".
+
+The format is as follows:
+
+  <sort_key_length><packed_value_1><packed_value2> ....... <packed_valueN>
+
+  format for a n-part sort key
+
+<sort_key_length> is the length of the whole key.
+Each packed value is encoded as follows:
+
+  <null_byte=0>  // This is a an SQL NULL
+  [<null_byte=1>] <packed_value>  // this a non-NULL value
+null_byte is present if the field/item is NULLable.
+SQL NULL is encoded as just one NULL-indicator byte. The value itself is omitted.
+
+The format of the packed_value depends on the datatype.
+For "non-packable" datatypes it is just their mem-comparable form, as before.
+
+The "packable" datatypes are currently variable-length strings and the
+packed format for them is (for binary blobs, see a note below):
+
+<length> <string>
+2.2 Which format to use
+
+The advantage of Packed Key Format is potential space savings for
+variable-length fields.
+
+The disadvantages are:
+
+a) It may actually take more space, because of sort_key_length and
+   length fields.
+b) The comparison function is more expensive.
+
+Currently the logic is: use Packed Key Format if we would save 128 or more
+bytes when constructing a sort key from values that have empty string
+for each packable component.
+
+3. SPECIAL CASES
+3.1 HANDLING VERY LONG STRINGS
+the size of sort key part was limited by @@max_sort_length variable.
+It is defined as:
+
+The number of bytes to use when sorting data values. The server uses only the
+first max_sort_length bytes of each value and ignores the rest.
+
+3.2 HANDLING VERY LONG BINARY STRINGS
+Long binary strings receive special treatment. A sort key for the long
+binary string is truncated at max_sort_length bytes like described above,
+but then a "suffix" is appended which contains the total length of the
+value before the truncation.
+
+3.3 HANDLING VERY LONG STRINGS WITH PACKED SORT KEY
+Truncating multi-byte string at N bytes is not safe because one can cut in the
+middle of a character. One is tempted to solve this by discarding the partial
+character but that's also not a good idea as in some collations multiple
+characters may produce one weight (this is called "contraction").
+
+This combination of circumstances:
+
+The string value is very long, so truncation is necessary
+The collation is "complex", so truncation is dangerous
+is deemed to be relatively rare so it was decided to just use
+the non-packed sort keys in this case.
+
+4. SORT KEY COLUMNS IN ADDON FIELDS
+Currently, each sort key column is actually stored twice
+1. as part of the sort key
+2. in the addon_fields
+This made total sense when sort key stored the mem-comparable image
+(from which one cannot restore the original value in general case).
+But since we now store the original value, we could also remove it from the
+addon_fields and further save space. This is still a limitation and needs
+to be fixed later
+
+@see Sort_keys
+
+**/
+
+/**
+  The sort record format may use one of two formats for the non-sorted part of
+  the record:
+
+  1. Use the rowid
+
+    |<sort_key>|   <rowid>  |
+    /          / ref_length /
+
+  2. Use "addon fields"
+
+    |<sort_key>|<null bits>|<field a><field b>...|
+    /          /         addon_length            /
+
+  The packed format for "addon fields"
+
+    |<sort_key>|<length>|<null bits>|<field a><field b>...|
+    /          /         addon_length                     /
+
+  <sort_key>  The key may use one of the two formats:
+              A. fixed-size mem-comparable form. The record is always
+                 sort_length bytes long.
+              B. "PackedKeyFormat" - the records are variable-size.
+
+  <key>       Fields are fixed-size, specially encoded with
+              Field::make_sort_key() so we can do byte-by-byte compare.
+
+  <length>    Contains the *actual* packed length (after packing) of
+              everything after the sort keys.
+              The size of the length field is 2 bytes,
+              which should cover most use cases: addon data <= 65535 bytes.
+              This is the same as max record size in MySQL.
+  <null bits> One bit for each nullable field, indicating whether the field
+              is null or not. May have size zero if no fields are nullable.
+  <field xx>  Are stored with field->pack(), and retrieved with
+              field->unpack(). Addon fields within a record are stored
+              consecutively, with no "holes" or padding. They will have zero
+              size for NULL values.
+
+*/
+
 class Sort_param {
 public:
   uint rec_length;            // Length of sorted records.
   uint sort_length;           // Length of sorted columns.
   uint ref_length;            // Length of record ref.
+  uint addon_length;          // Length of addon_fields
   uint res_length;            // Length of records in final sorted file/buffer.
   uint max_keys_per_buffer;   // Max keys / buffer.
   uint min_dupl_count;
   ha_rows max_rows;           // Select limit, or HA_POS_ERROR if unlimited.
   ha_rows examined_rows;      // Number of examined rows.
   TABLE *sort_form;           // For quicker make_sortkey.
-  SORT_FIELD *local_sortorder;
-  SORT_FIELD *end;
-  SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields.
-  LEX_STRING addon_buf;          // Buffer & length of added packed fields.
+  /**
+    ORDER BY list with some precalculated info for filesort.
+    Array is created and owned by a Filesort instance.
+   */
+  Bounds_checked_array<SORT_FIELD> local_sortorder;
+  Addon_fields *addon_fields;     // Descriptors for companion fields.
+  Sort_keys *sort_keys;
+  bool using_pq;
 
   uchar *unique_buff;
   bool not_killable;
-  char* tmp_buffer;
+  String tmp_buffer;
   // The fields below are used only by Unique class.
   qsort2_cmp compare;
   BUFFPEK_COMPARE_CONTEXT cmp_context;
 
   Sort_param()
   {
-    memset(this, 0, sizeof(*this));
+    memset(reinterpret_cast<void*>(this), 0, sizeof(*this));
+    tmp_buffer.set_thread_specific();
+    /*
+      Fix memset() clearing the charset.
+      TODO: The constructor should be eventually rewritten not to use memset().
+    */
+    tmp_buffer.set_charset(&my_charset_bin);
   }
   void init_for_filesort(uint sortlen, TABLE *table,
                          ha_rows maxrows, bool sort_positions);
+  /// Enables the packing of addons if possible.
+  void try_to_pack_addons(ulong max_length_for_sort_data);
+
+  /// Are we packing the "addon fields"?
+  bool using_packed_addons() const
+  {
+    DBUG_ASSERT(m_using_packed_addons ==
+                (addon_fields != NULL &&
+                 addon_fields->using_packed_addons()));
+    return m_using_packed_addons;
+  }
+
+  bool using_packed_sortkeys() const
+  {
+    DBUG_ASSERT(m_using_packed_sortkeys ==
+                (sort_keys != NULL && sort_keys->using_packed_sortkeys()));
+    return m_using_packed_sortkeys;
+  }
+
+  /// Are we using "addon fields"?
+  bool using_addon_fields() const
+  {
+    return addon_fields != NULL;
+  }
+
+  uint32 get_result_length(uchar *plen)
+  {
+    if (!m_using_packed_addons)
+      return res_length;
+    return Addon_fields::read_addon_length(plen);
+  }
+
+  uint32 get_addon_length(uchar *plen)
+  {
+    if (using_packed_addons())
+      return Addon_fields::read_addon_length(plen);
+    else
+      return addon_length;
+  }
+
+  uint32 get_sort_length(uchar *plen)
+  {
+    if (using_packed_sortkeys())
+      return Sort_keys::read_sortkey_length(plen) +
+              /*
+                when addon fields are not present, then the sort_length also
+                includes the res_length. For packed keys here we add
+                the res_length
+              */
+             (using_addon_fields() ? 0: res_length);
+    else
+      return sort_length;
+  }
+
+  uint get_record_length(uchar *plen)
+  {
+    if (m_packed_format)
+    {
+      uint sort_len= get_sort_length(plen);
+      return sort_len + get_addon_length(plen + sort_len);
+    }
+    else
+      return rec_length;
+  }
+
+  /**
+    Getter for record length and result length.
+    @param record_start Pointer to record.
+    @param [out] recl   Store record length here.
+    @param [out] resl   Store result length here.
+   */
+  void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl)
+  {
+    if (m_packed_format)
+    {
+      uint sort_len= get_sort_length(record_start);
+      uint addon_len= get_addon_length(record_start + sort_len);
+      *recl= sort_len + addon_len;
+      *resl= using_addon_fields() ? addon_len : res_length;
+    }
+    else
+    {
+      *recl= rec_length;
+      *resl= res_length;
+    }
+  }
+
+  void try_to_pack_sortkeys();
+
+  qsort2_cmp get_compare_function() const
+  {
+    return using_packed_sortkeys() ?
+           get_packed_keys_compare_ptr() :
+           get_ptr_compare(sort_length);
+  }
+  void* get_compare_argument(size_t *sort_len) const
+  {
+    return using_packed_sortkeys() ?
+           (void*) this :
+           (void*) sort_len;
+  }
+
+  bool is_packed_format() const
+  {
+    return m_packed_format;
+  }
+
+private:
+  uint m_packable_length;
+  bool m_using_packed_addons; ///< caches the value of using_packed_addons()
+  /* caches the value of using_packed_sortkeys() */
+  bool m_using_packed_sortkeys;
+  bool m_packed_format;
 };
 
+typedef Bounds_checked_array<uchar> Sort_buffer;
 
-int merge_many_buff(Sort_param *param, uchar *sort_buffer,
-		    BUFFPEK *buffpek,
-		    uint *maxbuffer, IO_CACHE *t_file);
-ulong read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek,
-                     uint sort_length);
+int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer,
+                    Merge_chunk *buffpek, uint *maxbuffer, IO_CACHE *t_file);
+ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
+                     Sort_param *param, bool packing_format);
 bool merge_buffers(Sort_param *param,IO_CACHE *from_file,
-                   IO_CACHE *to_file, uchar *sort_buffer,
-                   BUFFPEK *lastbuff,BUFFPEK *Fb,
-                   BUFFPEK *Tb,int flag);
-int merge_index(Sort_param *param, uchar *sort_buffer,
-		BUFFPEK *buffpek, uint maxbuffer,
-		IO_CACHE *tempfile, IO_CACHE *outfile);
-void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
+                   IO_CACHE *to_file, Sort_buffer sort_buffer,
+                   Merge_chunk *lastbuff, Merge_chunk *Fb,
+                   Merge_chunk *Tb, int flag);
+int merge_index(Sort_param *param, Sort_buffer sort_buffer,
+                Merge_chunk *buffpek, uint maxbuffer,
+                IO_CACHE *tempfile, IO_CACHE *outfile);
+void reuse_freed_buff(QUEUE *queue, Merge_chunk *reuse, uint key_length);
 
 #endif /* SQL_SORT_INCLUDED */