14 files changed, 1922 insertions, 226 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 3c9b7246309..dc1f32187e8 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -151,6 +151,7 @@ SET (SQL_SOURCE
                sql_analyze_stmt.cc
                sql_join_cache.cc
                create_options.cc multi_range_read.cc
+               opt_histogram_json.cc
                opt_index_cond_pushdown.cc opt_subselect.cc
                opt_table_elimination.cc sql_expression_cache.cc
                gcalc_slicescan.cc gcalc_tools.cc
diff --git a/sql/field.cc b/sql/field.cc
index b72b0357c23..75d15cbdea1 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1108,19 +1108,27 @@ Field_longstr::pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field)
   relative position of the field value in the numeric interval [min,max] 
 */
 
-double Field::pos_in_interval_val_real(Field *min, Field *max)
+double pos_in_interval_for_double(double midp_val, double min_val,
+                                  double max_val)
 {
   double n, d;
-  n= val_real() - min->val_real();
+  n= midp_val - min_val;
   if (n < 0)
     return 0.0;
-  d= max->val_real() - min->val_real();
+  d= max_val - min_val;
   if (d <= 0)
     return 1.0;
   return MY_MIN(n/d, 1.0);
 }
 
 
+double Field::pos_in_interval_val_real(Field *min, Field *max)
+{
+  return pos_in_interval_for_double(val_real(), min->val_real(),
+                                    max->val_real());
+}
+
+
 static
 inline ulonglong char_prefix_to_ulonglong(uchar *src)
 {
@@ -1178,22 +1186,32 @@ static inline double safe_substract(ulonglong a, ulonglong b)
 
 double Field::pos_in_interval_val_str(Field *min, Field *max, uint data_offset)
 {
+  return pos_in_interval_for_string(charset(),
+            ptr      + data_offset, data_length(),
+            min->ptr + data_offset, min->data_length(),
+            max->ptr + data_offset, max->data_length()
+         );
+}
+
+
+double pos_in_interval_for_string(CHARSET_INFO *cset,
+                                  const uchar *midp_val, uint32 midp_len,
+                                  const uchar *min_val,  uint32 min_len,
+                                  const uchar *max_val,  uint32 max_len)
+{
   uchar mp_prefix[sizeof(ulonglong)];
   uchar minp_prefix[sizeof(ulonglong)];
   uchar maxp_prefix[sizeof(ulonglong)];
   ulonglong mp, minp, maxp;
-  charset()->strnxfrm(mp_prefix, sizeof(mp),
-                      ptr + data_offset,
-                      data_length());
-  charset()->strnxfrm(minp_prefix, sizeof(minp),
-                      min->ptr + data_offset,
-                      min->data_length());
-  charset()->strnxfrm(maxp_prefix, sizeof(maxp),
-                      max->ptr + data_offset,
-                      max->data_length());
-  mp= char_prefix_to_ulonglong(mp_prefix);
+
+  cset->strnxfrm(mp_prefix, sizeof(mp), midp_val, midp_len);
+  cset->strnxfrm(minp_prefix, sizeof(minp), min_val, min_len);
+  cset->strnxfrm(maxp_prefix, sizeof(maxp), max_val, max_len);
+
+  mp=   char_prefix_to_ulonglong(mp_prefix);
   minp= char_prefix_to_ulonglong(minp_prefix);
   maxp= char_prefix_to_ulonglong(maxp_prefix);
+
   double n, d;
   n= safe_substract(mp, minp);
   if (n < 0)
diff --git a/sql/field.h b/sql/field.h
index 7be16a1457e..078e22c6161 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1514,11 +1514,20 @@ public:
     if (null_ptr)
       null_ptr=ADD_TO_PTR(null_ptr,ptr_diff,uchar*);
   }
+
+  /*
+    Copy the Field's value to buff. The value will be in table->record[]
+    format.
+  */
   void get_image(uchar *buff, uint length, CHARSET_INFO *cs) const
   { get_image(buff, length, ptr, cs); }
   virtual void get_image(uchar *buff, uint length,
                          const uchar *ptr_arg, CHARSET_INFO *cs) const
     { memcpy(buff,ptr_arg,length); }
+
+  /*
+    Set Field's value to the value in *buf.
+  */
   virtual void set_image(const uchar *buff,uint length, CHARSET_INFO *cs)
     { memcpy(ptr,buff,length); }
 
@@ -1857,6 +1866,7 @@ public:
   {
     return (double) 0.5; 
   }
+  virtual bool pos_through_val_str() { return false;}
 
   /*
     Check if comparison between the field and an item unambiguously
@@ -2142,6 +2152,8 @@ public:
   {
     return pos_in_interval_val_str(min, max, length_size());
   }
+  bool pos_through_val_str() override {return true;}
+
   bool test_if_equality_guarantees_uniqueness(const Item *const_item) const
     override;
   SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part,
@@ -5895,5 +5907,12 @@ ulonglong TABLE::vers_start_id() const
   return static_cast<ulonglong>(vers_start_field()->val_int());
 }
 
+double pos_in_interval_for_string(CHARSET_INFO *cset,
+                                  const uchar *midp_val, uint32 midp_len,
+                                  const uchar *min_val,  uint32 min_len,
+                                  const uchar *max_val,  uint32 max_len);
+
+double pos_in_interval_for_double(double midp_val,
+                                  double min_val, double max_val);
 
 #endif /* FIELD_INCLUDED */
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index d4bf28a9c21..00413f95e32 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -503,7 +503,7 @@ err:
 
 
 const char *histogram_types[] =
-           {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", 0};
+    {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON_HB", 0};
 static TYPELIB histogram_types_typelib=
   { array_elements(histogram_types),
     "histogram_types",
@@ -533,6 +533,14 @@ String *Item_func_decode_histogram::val_str(String *str)
     null_value= 1;
     return 0;
   }
+
+  if (type == JSON_HB)
+  {
+    // It's a JSON histogram. Return it as-is.
+    null_value= 0;
+    return res;
+  }
+
   if (type == DOUBLE_PREC_HB && res->length() % 2 != 0)
     res->length(res->length() - 1); // one byte is unused
 
diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h
index 9e7081c96d1..7840476b878 100644
--- a/sql/my_json_writer.h
+++ b/sql/my_json_writer.h
@@ -238,6 +238,8 @@ public:
   Json_writer& add_member(const char *name, size_t len);
   
   /* Add atomic values */
+
+  /* Note: the add_str methods do not do escapes. Should this change? */
   void add_str(const char* val);
   void add_str(const char* val, size_t num_bytes);
   void add_str(const String &str);
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
new file mode 100644
index 00000000000..f2be4e5c0fb
--- /dev/null
+++ b/sql/opt_histogram_json.cc
@@ -0,0 +1,1188 @@
+/*
+   Copyright (c) 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#include "mariadb.h"
+#include "sql_base.h"
+#include "my_json_writer.h"
+#include "sql_statistics.h"
+#include "opt_histogram_json.h"
+
+
+/*
+  @brief
+    Un-escape a JSON string and save it into *out.
+
+  @detail
+    There's no way to tell how much space is needed for the output.
+    Start with a small string and increase its size until json_unescape()
+    succeeds.
+*/
+
+static bool json_unescape_to_string(const char *val, int val_len, String* out)
+{
+  // Make sure 'out' has some memory allocated.
+  if (!out->alloced_length() && out->alloc(128))
+    return true;
+
+  while (1)
+  {
+    uchar *buf= (uchar*)out->ptr();
+    out->length(out->alloced_length());
+
+    int res= json_unescape(&my_charset_utf8mb4_bin,
+                           (const uchar*)val,
+                           (const uchar*)val + val_len,
+                           out->charset(),
+                           buf, buf + out->length());
+    if (res >= 0)
+    {
+      out->length(res);
+      return false; // Ok
+    }
+
+    // We get here if the unescaped string didn't fit into memory.
+    if (out->alloc(out->alloced_length()*2))
+      return true;
+  }
+}
+
+
+/*
+  @brief
+    Escape a JSON string and save it into *out.
+
+  @detail
+    There's no way to tell how much space is needed for the output.
+    Start with a small string and increase its size until json_escape()
+    succeeds.
+*/
+
+static int json_escape_to_string(const String *str, String* out)
+{
+  // Make sure 'out' has some memory allocated.
+  if (!out->alloced_length() && out->alloc(128))
+    return JSON_ERROR_OUT_OF_SPACE;
+
+  while (1)
+  {
+    uchar *buf= (uchar*)out->ptr();
+    out->length(out->alloced_length());
+    const uchar *str_ptr= (const uchar*)str->ptr();
+
+    int res= json_escape(str->charset(),
+                         str_ptr,
+                         str_ptr + str->length(),
+                         &my_charset_utf8mb4_bin,
+                         buf, buf + out->length());
+    if (res >= 0)
+    {
+      out->length(res);
+      return 0; // Ok
+    }
+
+    if (res != JSON_ERROR_OUT_OF_SPACE)
+      return res; // Some conversion error
+
+    // Out of space error. Try with a bigger buffer
+    if (out->alloc(out->alloced_length()*2))
+      return JSON_ERROR_OUT_OF_SPACE;
+  }
+}
+
+
+class Histogram_json_builder : public Histogram_builder
+{
+  Histogram_json_hb *histogram;
+  /* Number of buckets in the histogram */
+  uint hist_width;
+
+  /*
+    Number of rows that we intend to have in the bucket. That is, this is
+
+      n_rows_in_table / hist_width
+
+    Actual number of rows in the buckets we produce may vary because of
+    "popular values" and rounding.
+  */
+  longlong bucket_capacity;
+
+  /* Number of the buckets already collected */
+  uint n_buckets_collected;
+
+  /*
+    TRUE means do not try to represent values as UTF-8 text in histogram
+    storage. Use start_hex/end_hex for all values.
+  */
+  bool force_binary;
+
+  /* Data about the bucket we are filling now */
+  struct CurBucket
+  {
+    /* Number of values in the bucket so far. */
+    longlong size;
+
+    /* Number of distinct values in the bucket */
+    int ndv;
+  };
+  CurBucket bucket;
+
+  /* Used to create the JSON representation of the histogram. */
+  Json_writer writer;
+
+public:
+
+  Histogram_json_builder(Histogram_json_hb *hist, Field *col, uint col_len,
+                         ha_rows rows)
+    : Histogram_builder(col, col_len, rows), histogram(hist)
+  {
+    /*
+      When computing number of rows in the bucket, round it UP. This way, we
+      will not end up with a histogram that has more buckets than intended.
+
+      We may end up producing a histogram with fewer buckets than intended, but
+      this is considered tolerable.
+    */
+    bucket_capacity= (longlong)round(rows2double(records) / histogram->get_width() + 0.5);
+    if (bucket_capacity == 0)
+      bucket_capacity= 1;
+    hist_width= histogram->get_width();
+    n_buckets_collected= 0;
+    bucket.ndv= 0;
+    bucket.size= 0;
+    force_binary= (col->type() == MYSQL_TYPE_BIT);
+
+    writer.start_object();
+    append_histogram_params();
+
+    writer.add_member(Histogram_json_hb::JSON_NAME).start_array();
+  }
+
+  ~Histogram_json_builder() override = default;
+
+private:
+  bool bucket_is_empty() { return bucket.ndv == 0; }
+
+  void append_histogram_params()
+  {
+    char buf[128];
+
+    time_t cur_time_t= my_time(0);
+    struct tm curtime;
+    localtime_r(&cur_time_t, &curtime);
+
+    my_snprintf(buf, sizeof(buf), "%d-%02d-%02d %2d:%02d:%02d %s",
+                curtime.tm_year + 1900,
+                curtime.tm_mon+1,
+                curtime.tm_mday,
+                curtime.tm_hour,
+                curtime.tm_min,
+                curtime.tm_sec,
+                system_time_zone);
+
+    writer.add_member("target_histogram_size").add_ull(hist_width);
+    writer.add_member("collected_at").add_str(buf);
+    writer.add_member("collected_by").add_str(server_version);
+  }
+  /*
+    Flush the current bucket out (to JSON output), and set it to be empty.
+  */
+  void finalize_bucket()
+  {
+    double fract= (double) bucket.size / records;
+    writer.add_member("size").add_double(fract);
+    writer.add_member("ndv").add_ll(bucket.ndv);
+    writer.end_object();
+    n_buckets_collected++;
+
+    bucket.ndv= 0;
+    bucket.size= 0;
+  }
+
+  /*
+    Same as finalize_bucket() but also provide the bucket's end value.
+  */
+  bool finalize_bucket_with_end_value(void *elem)
+  {
+    if (append_column_value(elem, false))
+      return true;
+    finalize_bucket();
+    return false;
+  }
+
+  /*
+    Write the first value group to the bucket.
+    @param elem  The value we are writing
+    @param cnt   The number of such values.
+  */
+  bool start_bucket(void *elem, longlong cnt)
+  {
+    DBUG_ASSERT(bucket.size == 0);
+    writer.start_object();
+    if (append_column_value(elem, true))
+      return true;
+
+    bucket.ndv= 1;
+    bucket.size= cnt;
+    return false;
+  }
+  
+  /*
+    Append the passed value into the JSON writer as string value
+  */
+  bool append_column_value(void *elem, bool is_start)
+  {
+    StringBuffer<MAX_FIELD_WIDTH> val;
+
+    // Get the text representation of the value
+    column->store_field_value((uchar*) elem, col_length);
+    String *str= column->val_str(&val);
+
+    // Escape the value for JSON
+    StringBuffer<MAX_FIELD_WIDTH> escaped_val;
+    int rc= JSON_ERROR_ILLEGAL_SYMBOL;
+    if (!force_binary)
+    {
+      rc= json_escape_to_string(str, &escaped_val);
+      if (!rc)
+      {
+        writer.add_member(is_start? "start": "end");
+        writer.add_str(escaped_val.c_ptr_safe());
+        return false;
+      }
+    }
+    if (rc == JSON_ERROR_ILLEGAL_SYMBOL)
+    {
+      escaped_val.set_hex(val.ptr(), val.length());
+      writer.add_member(is_start? "start_hex": "end_hex");
+      writer.add_str(escaped_val.c_ptr_safe());
+      return false;
+    }
+    return true;
+  }
+
+  /*
+    Append a value group of cnt values.
+  */
+  void append_to_bucket(longlong cnt)
+  {
+    bucket.ndv++;
+    bucket.size += cnt;
+  }
+
+public:
+  /*
+    @brief
+      Add data to the histogram.
+
+    @detail
+      The call signals to add a "value group" of elem_cnt rows, each of which
+      has the same value that is provided in *elem.
+
+      Subsequent next() calls will add values that are greater than the
+      current one.
+
+    @return
+      0 - OK
+  */
+  int next(void *elem, element_count elem_cnt) override
+  {
+    counters.next(elem, elem_cnt);
+    ulonglong count= counters.get_count();
+
+    /*
+      Ok, we've got a "value group" of elem_cnt identical values.
+
+      If we take the values from the value group and put them into
+      the current bucket, how many values will be left after we've
+      filled the bucket?
+    */
+    longlong overflow= bucket.size + elem_cnt - bucket_capacity;
+
+    /*
+      Case #1: This value group should be put into a separate bucket, if
+       A. It fills the current bucket and also fills the next bucket, OR
+       B. It fills the current bucket, which was empty.
+    */
+    if (overflow >= bucket_capacity || (bucket_is_empty() && overflow >= 0))
+    {
+      // Finalize the current bucket
+      if (!bucket_is_empty())
+        finalize_bucket();
+
+      // Start/end the separate bucket for this value group.
+      if (start_bucket(elem, elem_cnt))
+        return 1; // OOM
+
+      if (records == count)
+      {
+        if (finalize_bucket_with_end_value(elem))
+          return 1;
+      }
+      else
+        finalize_bucket();
+    }
+    else if (overflow >= 0)
+    {
+      /*
+        Case #2: is when Case#1 doesn't hold, but we can still fill the
+        current bucket.
+      */
+
+      // If the bucket was empty, it would have been case #1.
+      DBUG_ASSERT(!bucket_is_empty());
+
+      /*
+        Finalize the current bucket. Put there enough values to make it hold
+        bucket_capacity values.
+      */
+      append_to_bucket(bucket_capacity - bucket.size);
+      if (records == count && !overflow)
+      {
+        if (finalize_bucket_with_end_value(elem))
+          return 1;
+      }
+      else
+        finalize_bucket();
+
+      if (overflow > 0)
+      {
+        // Then, start the new bucket with the remaining values.
+        if (start_bucket(elem, overflow))
+          return 1;
+      }
+    }
+    else
+    {
+      // Case #3: there's not enough values to fill the current bucket.
+      if (bucket_is_empty())
+      {
+        if (start_bucket(elem, elem_cnt))
+          return 1;
+      }
+      else
+        append_to_bucket(elem_cnt);
+    }
+
+    if (records == count)
+    {
+      // This is the final value group.
+      if (!bucket_is_empty())
+      {
+        if (finalize_bucket_with_end_value(elem))
+          return 1;
+      }
+    }
+    return 0;
+  }
+
+  /*
+    @brief
+      Finalize the creation of histogram
+  */
+  void finalize() override
+  {
+    writer.end_array();
+    writer.end_object();
+    Binary_string *json_string= (Binary_string *) writer.output.get_string();
+    histogram->set_json_text(n_buckets_collected,
+                             json_string->c_ptr(),
+                             (size_t)json_string->length());
+  }
+};
+
+
+Histogram_builder *Histogram_json_hb::create_builder(Field *col, uint col_len,
+                                                     ha_rows rows)
+{
+  return new Histogram_json_builder(this, col, col_len, rows);
+}
+
+
+void Histogram_json_hb::init_for_collection(MEM_ROOT *mem_root,
+                                            Histogram_type htype_arg,
+                                            ulonglong size_arg)
+{
+  DBUG_ASSERT(htype_arg == JSON_HB);
+  size= (size_t)size_arg;
+}
+
+
+/*
+  A syntax sugar interface to json_string_t
+*/
+class Json_string
+{
+  json_string_t str;
+public:
+  explicit Json_string(const char *name)
+  {
+    json_string_set_str(&str, (const uchar*)name,
+                        (const uchar*)name + strlen(name));
+    json_string_set_cs(&str, system_charset_info);
+  }
+  json_string_t *get() { return &str; }
+};
+
+
+/*
+  This [partially] saves the JSON parser state and then can rollback the parser
+  to it.
+
+  The goal of this is to be able to make multiple json_key_matches() calls:
+
+    Json_saved_parser_state save(je);
+    if (json_key_matches(je, KEY_NAME_1)) {
+      ...
+      return;
+    }
+    save.restore_to(je);
+    if (json_key_matches(je, KEY_NAME_2)) {
+      ...
+    }
+
+  This allows one to parse JSON objects where [optional] members come in any
+  order.
+*/
+
+class Json_saved_parser_state
+{
+  const uchar *c_str;
+  my_wc_t c_next;
+  int state;
+public:
+  explicit Json_saved_parser_state(const json_engine_t *je) :
+    c_str(je->s.c_str),
+    c_next(je->s.c_next),
+    state(je->state)
+  {}
+  void restore_to(json_engine_t *je)
+  {
+    je->s.c_str= c_str;
+    je->s.c_next= c_next;
+    je->state= state;
+  }
+};
+
+
+/*
+  @brief
+    Read a constant from JSON document and save it in *out.
+
+  @detail
+    The JSON document stores constant in text form, we need to save it in
+    KeyTupleFormat. String constants in JSON may be escaped.
+*/
+
+bool read_bucket_endpoint(json_engine_t *je, Field *field, String *out,
+                          const char **err)
+{
+  if (json_read_value(je))
+    return true;
+
+  if (je->value_type != JSON_VALUE_STRING &&
+      je->value_type != JSON_VALUE_NUMBER)
+  {
+    *err= "String or number expected";
+    return true;
+  }
+
+  const char* je_value= (const char*)je->value;
+  if (je->value_type == JSON_VALUE_STRING && je->value_escaped)
+  {
+    StringBuffer<128> unescape_buf;
+    if (json_unescape_to_string(je_value, je->value_len, &unescape_buf))
+    {
+      *err= "Un-escape error";
+      return true;
+    }
+    field->store_text(unescape_buf.ptr(), unescape_buf.length(),
+                      unescape_buf.charset());
+  }
+  else
+    field->store_text(je_value, je->value_len, &my_charset_utf8mb4_bin);
+
+  out->alloc(field->pack_length());
+  uint bytes= field->get_key_image((uchar*)out->ptr(),
+                                   field->key_length(), Field::itRAW);
+  out->length(bytes);
+  return false;
+}
+
+
+bool read_hex_bucket_endpoint(json_engine_t *je, Field *field, String *out,
+                              const char **err)
+{
+  if (json_read_value(je))
+    return true;
+
+  if (je->value_type != JSON_VALUE_STRING || je->value_escaped ||
+      (je->value_len & 1))
+  {
+    *err= "Expected a hex string";
+    return true;
+  }
+  StringBuffer<128> buf;
+    
+  for (auto pc= je->value; pc < je->value + je->value_len; pc+=2)
+  {
+    int hex_char1= hexchar_to_int(pc[0]);
+    int hex_char2= hexchar_to_int(pc[1]);
+    if (hex_char1 == -1 || hex_char2 == -1)
+    {
+      *err= "Expected a hex string";
+      return true;
+    }
+    buf.append((hex_char1 << 4) | hex_char2);
+  }
+
+  field->store_text(buf.ptr(), buf.length(), field->charset());
+  out->alloc(field->pack_length());
+  uint bytes= field->get_key_image((uchar*)out->ptr(),
+                                   field->key_length(), Field::itRAW);
+  out->length(bytes);
+  return false;
+}
+
+
+/*
+  @brief  Parse a JSON reprsentation for one histogram bucket
+
+  @param je     The JSON parser object
+  @param field  Table field we are using histogram (used to convert
+                               endpoints from text representation to binary)
+  @param total_size  INOUT  Fraction of the table rows in the buckets parsed so
+                            far.
+  @param assigned_last_end  OUT  TRUE<=> The bucket had "end" members, the
+                                 function has saved it in
+                                 this->last_bucket_end_endp
+  @param err  OUT  If function returns 1, this *may* be set to point to text
+                   describing the error.
+
+  @detail
+
+    Parse a JSON object in this form:
+
+      { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"}
+
+   Unknown members are ignored.
+
+  @return
+    0  OK
+    1  Parse Error
+   -1  EOF
+*/
+int Histogram_json_hb::parse_bucket(json_engine_t *je, Field *field,
+                                    double *total_size,
+                                    bool *assigned_last_end,
+                                    const char **err)
+{
+  *assigned_last_end= false;
+  if (json_scan_next(je))
+    return 1;
+  if (je->state != JST_VALUE)
+  {
+    if (je->state == JST_ARRAY_END)
+      return -1; // EOF
+    else
+      return 1; // An error
+  }
+
+  if (json_scan_next(je) || je->state != JST_OBJ_START)
+  {
+    *err= "Expected an object in the buckets array";
+    return 1;
+  }
+
+  bool have_start= false;
+  bool have_size= false;
+  bool have_ndv= false;
+
+  double size_d;
+  longlong ndv_ll;
+  StringBuffer<128> value_buf;
+  int rc;
+
+  while (!(rc= json_scan_next(je)) && je->state != JST_OBJ_END)
+  {
+    Json_saved_parser_state save1(je);
+    Json_string start_str("start");
+    if (json_key_matches(je, start_str.get()))
+    {
+      if (read_bucket_endpoint(je, field, &value_buf, err))
+        return 1;
+
+      have_start= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+    Json_string size_str("size");
+    if (json_key_matches(je, size_str.get()))
+    {
+      if (json_read_value(je))
+        return 1;
+
+      const char *size= (const char*)je->value_begin;
+      char *size_end= (char*)je->value_end;
+      int conv_err;
+      size_d= my_strtod(size, &size_end, &conv_err);
+      if (conv_err)
+      {
+        *err= ".size member must be a floating-point value";
+        return 1;
+      }
+      have_size= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+    Json_string ndv_str("ndv");
+    if (json_key_matches(je, ndv_str.get()))
+    {
+      if (json_read_value(je))
+        return 1;
+
+      const char *ndv= (const char*)je->value_begin;
+      char *ndv_end= (char*)je->value_end;
+      int conv_err;
+      ndv_ll= my_strtoll10(ndv, &ndv_end, &conv_err);
+      if (conv_err)
+      {
+        *err= ".ndv member must be an integer value";
+        return 1;
+      }
+      have_ndv= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+    Json_string end_str("end");
+    if (json_key_matches(je, end_str.get()))
+    {
+      if (read_bucket_endpoint(je, field, &value_buf, err))
+        return 1;
+      last_bucket_end_endp.assign(value_buf.ptr(), value_buf.length());
+      *assigned_last_end= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+    // Less common endoints:
+    Json_string start_hex_str("start_hex");
+    if (json_key_matches(je, start_hex_str.get()))
+    {
+      if (read_hex_bucket_endpoint(je, field, &value_buf, err))
+        return 1;
+
+      have_start= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+    Json_string end_hex_str("end_hex");
+    if (json_key_matches(je, end_hex_str.get()))
+    {
+      if (read_hex_bucket_endpoint(je, field, &value_buf, err))
+        return 1;
+      last_bucket_end_endp.assign(value_buf.ptr(), value_buf.length());
+      *assigned_last_end= true;
+      continue;
+    }
+    save1.restore_to(je);
+
+
+    // Some unknown member. Skip it.
+    if (json_skip_key(je))
+      return 1;
+  }
+
+  if (rc)
+    return 1;
+
+  if (!have_start)
+  {
+    *err= "\"start\" element not present";
+    return 1;
+  }
+  if (!have_size)
+  {
+    *err= "\"size\" element not present";
+    return 1;
+  }
+  if (!have_ndv)
+  {
+    *err= "\"ndv\" element not present";
+    return 1;
+  }
+
+  *total_size += size_d;
+
+  buckets.push_back({std::string(value_buf.ptr(), value_buf.length()),
+                     *total_size, ndv_ll});
+
+  return 0; // Ok, continue reading
+}
+
+
+/*
+  @brief
+    Parse the histogram from its on-disk JSON representation
+
+  @detail
+    See opt_histogram_json.h, class Histogram_json_hb for description of the
+    data format.
+
+  @return
+     false  OK
+     True   Error
+*/
+
+bool Histogram_json_hb::parse(MEM_ROOT *mem_root, const char *db_name,
+                              const char *table_name, Field *field,
+                              Histogram_type type_arg,
+                              const char *hist_data, size_t hist_data_len)
+{
+  json_engine_t je;
+  int rc;
+  const char *err= "JSON parse error";
+  double total_size;
+  int end_element;
+  bool end_assigned;
+  DBUG_ENTER("Histogram_json_hb::parse");
+  DBUG_ASSERT(type_arg == JSON_HB);
+
+  json_scan_start(&je, &my_charset_utf8mb4_bin,
+                  (const uchar*)hist_data,
+                  (const uchar*)hist_data+hist_data_len);
+
+  if (json_scan_next(&je))
+    goto err;
+
+  if (je.state != JST_OBJ_START)
+  {
+    err= "Root JSON element must be a JSON object";
+    goto err;
+  }
+
+  while (1)
+  {
+    if (json_scan_next(&je))
+      goto err;
+    if (je.state == JST_OBJ_END)
+      break; // End of object
+
+    if (je.state != JST_KEY)
+      goto err; // Can' really have this: JSON object has keys in it
+
+    Json_string hist_key_name(JSON_NAME);
+    if (json_key_matches(&je, hist_key_name.get()))
+    {
+      total_size= 0.0;
+      end_element= -1;
+      if (json_scan_next(&je))
+        goto err;
+
+      if (je.state != JST_ARRAY_START)
+      {
+        err= "histogram_hb must contain an array";
+        goto err;
+      }
+
+      while (!(rc= parse_bucket(&je, field, &total_size, &end_assigned, &err)))
+      {
+        if (end_assigned && end_element != -1)
+          end_element= (int)buckets.size();
+      }
+      if (rc > 0)  // Got error other than EOF
+        goto err;
+    }
+    else
+    {
+      // Some unknown member. Skip it.
+      if (json_skip_key(&je))
+        return 1;
+    }
+  }
+
+  if (buckets.size() < 1)
+  {
+    err= "Histogram must have at least one bucket";
+    goto err;
+  }
+
+  if (end_element == -1)
+  {
+    buckets.back().start_value= last_bucket_end_endp;
+  }
+  else if (end_element < (int)buckets.size())
+  {
+    err= ".end is only allowed in the last bucket";
+    goto err;
+  }
+
+  DBUG_RETURN(false); // Ok
+err:
+  THD *thd= current_thd;
+  push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                      ER_JSON_HISTOGRAM_PARSE_FAILED,
+                      ER_THD(thd, ER_JSON_HISTOGRAM_PARSE_FAILED),
+                      db_name, table_name,
+                      err, (je.s.c_str - (const uchar*)hist_data));
+  sql_print_error(ER_THD(thd, ER_JSON_HISTOGRAM_PARSE_FAILED),
+                  db_name, table_name, err,
+                  (je.s.c_str - (const uchar*)hist_data));
+
+  DBUG_RETURN(true);
+}
+
+
+static
+void store_key_image_to_rec_no_null(Field *field, const char *ptr, size_t len)
+{
+  MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
+                                    &field->table->write_set);
+  field->set_key_image((const uchar*)ptr, (uint)len);
+  dbug_tmp_restore_column_map(&field->table->write_set, old_map);
+}
+
+
+static
+double position_in_interval(Field *field, const  uchar *key, uint key_len,
+                            const std::string& left, const std::string& right)
+{
+  double res;
+  if (field->pos_through_val_str())
+  {
+    StringBuffer<64> buf1, buf2, buf3;
+
+    store_key_image_to_rec_no_null(field, left.data(), left.size());
+    String *min_str= field->val_str(&buf1);
+    /*
+      Make sure we've saved a copy of the data, not a pointer into the
+      field->ptr. We will overwrite the contents of field->ptr with the next
+      store_key_image_to_rec_no_null call
+    */
+    if (&buf1 != min_str)
+      buf1.copy(*min_str);
+    else
+      buf1.copy();
+
+    store_key_image_to_rec_no_null(field, right.data(), right.size());
+    String *max_str= field->val_str(&buf2);
+    /* Same as above */
+    if (&buf2 != max_str)
+      buf2.copy(*max_str);
+    else
+      buf2.copy();
+
+    store_key_image_to_rec_no_null(field, (const char*)key, key_len);
+    String *midp_str= field->val_str(&buf3);
+
+    res= pos_in_interval_for_string(field->charset(),
+           (const uchar*)midp_str->ptr(), midp_str->length(),
+           (const uchar*)buf1.ptr(), buf1.length(),
+           (const uchar*)buf2.ptr(), buf2.length());
+  }
+  else
+  {
+    store_key_image_to_rec_no_null(field, left.data(), field->key_length());
+    double min_val_real= field->val_real();
+
+    store_key_image_to_rec_no_null(field, right.data(), field->key_length());
+    double max_val_real= field->val_real();
+
+    store_key_image_to_rec_no_null(field, (const char*)key, field->key_length());
+    double midp_val_real= field->val_real();
+
+    res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
+  }
+  return res;
+}
+
+
+double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
+                                            double avg_sel)
+{
+  const uchar *key = endpoint->key;
+  if (field->real_maybe_null())
+    key++;
+
+  // If the value is outside of the histogram's range, this will "clip" it to
+  // first or last bucket.
+  int endp_cmp;
+  int idx= find_bucket(field, key, &endp_cmp);
+
+  double sel;
+
+  if (buckets[idx].ndv == 1 && (endp_cmp!=0))
+  {
+    /*
+      The bucket has a single value and it doesn't match! Return a very
+      small value.
+    */
+    sel= 0.0;
+  }
+  else
+  {
+    /*
+      We get here when:
+      * The bucket has one value and this is the value we are looking for.
+      * The bucket has multiple values. Then, assume
+    */
+    sel= (buckets[idx].cum_fract - get_left_fract(idx)) / buckets[idx].ndv;
+  }
+  return sel;
+}
+
+
+double Histogram_json_hb::get_left_fract(int idx)
+{
+  if (!idx)
+    return 0.0;
+  else
+    return buckets[idx-1].cum_fract;
+}
+
+std::string& Histogram_json_hb::get_end_value(int idx)
+{
+  if (idx == (int)buckets.size()-1)
+    return last_bucket_end_endp;
+  else
+    return buckets[idx+1].start_value;
+}
+
+/*
+  @param field    The table field histogram is for.  We don't care about the
+                  field's current value, we only need its virtual functions to
+                  perform various operations
+
+  @param min_endp Left endpoint, or NULL if there is none
+  @param max_endp Right endpoint, or NULL if there is none
+*/
+
+double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
+                                            key_range *max_endp, double avg_sel)
+{
+  double min, max;
+
+  if (min_endp && !(field->real_maybe_null() && min_endp->key[0]))
+  {
+    bool exclusive_endp= (min_endp->flag == HA_READ_AFTER_KEY)? true: false;
+    const uchar *min_key= min_endp->key;
+    uint min_key_len= min_endp->length;
+    if (field->real_maybe_null())
+    {
+      min_key++;
+      min_key_len--;
+    }
+
+    // Find the leftmost bucket that contains the lookup value.
+    // (If the lookup value is to the left of all buckets, find bucket #0)
+    int endp_cmp;
+    int idx= find_bucket(field, min_key, &endp_cmp);
+
+    double sel;
+    // Special handling for buckets with ndv=1:
+    if (buckets[idx].ndv == 1)
+    {
+      if (endp_cmp < 0)
+        sel= 0.0;
+      else if (endp_cmp > 0)
+        sel= 1.0;
+      else // endp_cmp == 0.0
+        sel= (exclusive_endp)? 1.0 : 0.0;
+    }
+    else
+    {
+      sel= position_in_interval(field, min_key, min_key_len,
+				buckets[idx].start_value,
+				get_end_value(idx));
+    }
+    double left_fract= get_left_fract(idx);
+    min= left_fract + sel * (buckets[idx].cum_fract - left_fract);
+  }
+  else
+    min= 0.0;
+
+  if (max_endp)
+  {
+    // The right endpoint cannot be NULL
+    DBUG_ASSERT(!(field->real_maybe_null() && max_endp->key[0]));
+    bool inclusive_endp= (max_endp->flag == HA_READ_AFTER_KEY)? true: false;
+    const uchar *max_key= max_endp->key;
+    uint max_key_len= max_endp->length;
+    if (field->real_maybe_null())
+    {
+      max_key++;
+      max_key_len--;
+    }
+    int endp_cmp;
+    int idx= find_bucket(field, max_key, &endp_cmp);
+
+    if ((endp_cmp == 0) && !inclusive_endp)
+    {
+      /*
+        The range is "col < $CONST" and we've found a bucket starting with
+        $CONST.
+      */
+      if (idx > 0)
+      {
+        // Move to the previous bucket
+        endp_cmp= 1;
+        idx--;
+      }
+      else
+        endp_cmp= -1;
+    }
+    double sel;
+
+    // Special handling for buckets with ndv=1:
+    if (buckets[idx].ndv == 1)
+    {
+      if (endp_cmp < 0)
+        sel= 0.0;
+      else if (endp_cmp > 0)
+        sel= 1.0;
+      else // endp_cmp == 0.0
+        sel= inclusive_endp? 1.0 : 0.0;
+    }
+    else
+    {
+      sel= position_in_interval(field, max_key, max_key_len,
+                                buckets[idx].start_value,
+                                get_end_value(idx));
+    }
+    double left_fract= get_left_fract(idx);
+    max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
+  }
+  else
+    max= 1.0;
+
+  return max - min;
+}
+
+
+void Histogram_json_hb::serialize(Field *field)
+{
+  field->store(json_text.data(), json_text.size(), &my_charset_bin);
+}
+
+
+static int SGN(int x)
+{
+  if (!x)
+    return 0;
+  return (x < 0)? -1 : 1;
+}
+
+
+/*
+  @brief
+   Find the leftmost histogram bucket such that "lookup_val >= start_value".
+
+  @param field        Field object (used to do value comparisons)
+  @param lookup_val   The lookup value in KeyTupleFormat.
+  @param cmp  OUT     How the lookup_val compares to found_bucket.left_bound:
+                      0  - lookup_val == bucket.left_bound
+                      >0 - lookup_val > bucket.left_bound (the most typical)
+                      <0 - lookup_val < bucket.left_bound. This can only happen
+                      for the first bucket, for all other buckets we would just
+                      pick the previous bucket and have cmp>=0.
+  @return
+     The bucket index
+*/
+
+int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
+                                   int *cmp)
+{
+  int res;
+  int low= 0;
+  int high= (int)buckets.size() - 1;
+  *cmp= 1; // By default, (bucket[retval].start_value < *lookup_val)
+
+  while (low + 1 < high)
+  {
+    int middle= (low + high) / 2;
+    res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
+    if (!res)
+    {
+      *cmp= res;
+      low= middle;
+      goto end;
+    }
+    else if (res < 0)
+      low= middle;
+    else //res > 0
+      high= middle;
+  }
+
+  /*
+    If low and high were assigned a value in the above loop and we got here,
+    then the following holds:
+
+      bucket[low].start_value < lookup_val < bucket[high].start_value
+
+    Besides that, there are two special cases: low=0 and high=last_bucket.
+    Handle them below.
+  */
+  if (low == 0)
+  {
+    res= field->key_cmp(lookup_val, (uchar*)buckets[0].start_value.data());
+    if (res <= 0)
+      *cmp= res;
+    else // res>0, lookup_val > buckets[0].start_value
+    {
+      res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data());
+      if (res >= 0)  // lookup_val >= buckets[high].start_value
+      {
+        // Move to that bucket
+        low= high;
+        *cmp= res;
+      }
+      else
+        *cmp= 1;
+    }
+  }
+  else if (high == (int)buckets.size() - 1)
+  {
+    res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data());
+    if (res >= 0)
+    {
+      // Ok the value is in the last bucket.
+      *cmp= res;
+      low= high;
+    }
+    else
+    {
+      // The value is in the 'low' bucket.
+      res= field->key_cmp(lookup_val, (uchar*)buckets[low].start_value.data());
+      *cmp= res;
+    }
+  }
+
+end:
+  // Verification: *cmp has correct value
+  DBUG_ASSERT(SGN(*cmp) ==
+              SGN(field->key_cmp(lookup_val,
+                                 (uchar*)buckets[low].start_value.data())));
+  // buckets[low] <= lookup_val, with one exception of the first bucket.
+  DBUG_ASSERT(low == 0 ||
+              field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0);
+  // buckets[low+1] > lookup_val, with one exception of the last bucket
+  DBUG_ASSERT(low == (int)buckets.size()-1 ||
+              field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0);
+  return low;
+}
diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h
new file mode 100644
index 00000000000..e9b69869f4b
--- /dev/null
+++ b/sql/opt_histogram_json.h
@@ -0,0 +1,148 @@
+/*
+   Copyright (c) 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#include "sql_statistics.h"
+
+/*
+  An equi-height histogram which stores real values for bucket bounds.
+
+  Handles @@histogram_type=JSON_HB
+
+  Histogram format in JSON:
+
+  {
+    // The next three are saved but not currently analyzed:
+    "target_histogram_size": nnn,
+    "collected_at": "(date and time)",
+    "collected_by": "(server version)",
+
+    "histogram_hb": [
+      { "start": "value", "size":nnn.nn, "ndv": nnn },
+      ...
+
+      // Optionally, start and/or end can be replaced with _hex variant
+      { "start_hex: "value", "size":nnn.nn, "ndv":nnn},
+
+      ...
+      { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"},
+    ]
+  }
+
+  Histogram is a JSON object. It has some global properties and "histogram_hb"
+  member whose value is a JSON array of histogram buckets.
+
+  Each bucket is an object with these members:
+    "start" - the first value in the bucket.
+    "size"  - fraction of table rows that is contained in the bucket.
+    "ndv"   - Number of Distinct Values in the bucket.
+    "end"   - Optionally, the last value in the bucket.
+
+  A bucket is a single-point bucket if it has ndv=1.
+
+  Most buckets have no "end" member: the bucket is assumed to contain all
+  values up to the "start" of the next bucket.
+
+  The exception is single-point buckets where last value is the same as the
+  first value.
+
+  start/end can be replaced with start_hex/end_hex. In _hex variant, the
+  constant is encoded in hex. This encoding is used to handle so called 
+  "unassigned characters": some non-UTF8 charsets have byte combinations that
+  are not mapped to any UTF8 character.
+*/
+
+class Histogram_json_hb : public Histogram_base
+{
+  size_t size; /* Number of elements in the histogram */
+
+  /* Collection-time only: collected histogram in the JSON form. */
+  std::string json_text;
+
+  struct Bucket
+  {
+    // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this
+    // value is in this bucket.
+    std::string start_value;
+
+    // Cumulative fraction: The fraction of table rows that fall into this
+    //  and preceding buckets.
+    double cum_fract;
+
+    // Number of distinct values in the bucket.
+    longlong ndv;
+  };
+
+  std::vector<Bucket> buckets;
+
+  std::string last_bucket_end_endp;
+
+public:
+  static constexpr const char* JSON_NAME="histogram_hb";
+
+  bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
+             Field *field, Histogram_type type_arg,
+             const char *hist_data, size_t hist_data_len) override;
+
+  void serialize(Field *field) override;
+
+  Histogram_builder *create_builder(Field *col, uint col_len,
+                                    ha_rows rows) override;
+
+  // returns number of buckets in the histogram
+  uint get_width() override
+  {
+    return (uint)size;
+  }
+
+  Histogram_type get_type() override
+  {
+    return JSON_HB;
+  }
+
+  /*
+    @brief
+      This used to be the size of the histogram on disk, which was redundant
+      (one can check the size directly). Return the number of buckets instead.
+  */
+  uint get_size() override
+  {
+    return (uint)size;
+  }
+
+  void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
+                           ulonglong size) override;
+
+  double point_selectivity(Field *field, key_range *endpoint,
+                           double avg_sel) override;
+  double range_selectivity(Field *field, key_range *min_endp,
+                           key_range *max_endp, double avg_sel) override;
+
+  void set_json_text(ulonglong sz, const char *json_text_arg,
+                     size_t json_text_len)
+  {
+    size= (size_t) sz;
+    json_text.assign(json_text_arg, json_text_len);
+  }
+
+private:
+  int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
+                   bool *assigned_last_end, const char **err);
+
+  double get_left_fract(int idx);
+  std::string& get_end_value(int idx);
+  int find_bucket(const Field *field, const uchar *lookup_val, int *cmp);
+};
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 06063cb9ae1..37cf054dfb0 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3277,7 +3277,10 @@ double records_in_column_ranges(PARAM *param, uint idx,
       break;
     }
     total_rows += rows;
-  }    
+  }
+  if (total_rows == 0)
+    total_rows= MY_MIN(1, rows2double(param->table->stat_records()));
+
   return total_rows;
 } 
 
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 2d90793b90f..3b5c8010a6d 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -8913,3 +8913,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
         eng "Convert partition is not supported for subpartitioned table."
 ER_PROVIDER_NOT_LOADED
         eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
+ER_JSON_HISTOGRAM_PARSE_FAILED
+        eng "Failed to parse histogram for table %s.%s: %s at offset %d."
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index 26e3d67641d..028838b65e4 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -1045,6 +1045,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       else
         compl_result_code= HA_ADMIN_FAILED;
 
+      if (table->table)
+        free_statistics_for_table(thd, table->table);
       if (compl_result_code)
         result_code= HA_ADMIN_FAILED;
       else
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 1f034f490c8..84d0902193b 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -28,11 +28,15 @@
 #include "sql_base.h"
 #include "key.h"
 #include "sql_statistics.h"
+#include "opt_histogram_json.h"
 #include "opt_range.h"
 #include "uniques.h"
 #include "sql_show.h"
 #include "sql_partition.h"
 
+#include <vector>
+#include <string>
+
 /*
   The system variable 'use_stat_tables' can take one of the
   following values:
@@ -57,8 +61,11 @@
   the collected statistics in the persistent statistical tables only
   when the value of the variable 'use_stat_tables' is not
   equal to "never".
-*/ 
-   
+*/
+
+Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
+                                 THD *owner);
+
 /* Currently there are only 3 persistent statistical tables */
 static const uint STATISTICS_TABLES= 3;
 
@@ -178,12 +185,12 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
   },
   {
     { STRING_WITH_LEN("hist_type") },
-    { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')") },
+    { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") },
     { STRING_WITH_LEN("utf8mb3") }
   },
   {
     { STRING_WITH_LEN("histogram") },
-    { STRING_WITH_LEN("varbinary(255)") },
+    { STRING_WITH_LEN("longblob") },
     { NULL, 0 }
   }
 };
@@ -307,7 +314,7 @@ public:
 
   inline void init(THD *thd, Field * table_field);
   inline bool add();
-  inline void finish(ha_rows rows, double sample_fraction);
+  inline bool finish(MEM_ROOT *mem_root, ha_rows rows, double sample_fraction);
   inline void cleanup();
 };
 
@@ -1064,15 +1071,23 @@ public:
           stat_field->store(stats->get_avg_frequency());
           break; 
         case COLUMN_STAT_HIST_SIZE:
-          stat_field->store(stats->histogram.get_size());
+          // Note: this is dumb. the histogram size is stored with the
+          // histogram!
+          stat_field->store(stats->histogram?
+                              stats->histogram->get_size() : 0);
           break;
         case COLUMN_STAT_HIST_TYPE:
-          stat_field->store(stats->histogram.get_type() + 1);
+          if (stats->histogram)
+            stat_field->store(stats->histogram->get_type() + 1);
+          else
+            stat_field->set_null();
           break;
         case COLUMN_STAT_HISTOGRAM:
-	  stat_field->store((char *)stats->histogram.get_values(),
-                            stats->histogram.get_size(), &my_charset_bin);
-          break;           
+          if (stats->histogram)
+            stats->histogram->serialize(stat_field);
+          else
+            stat_field->set_null();
+          break;
         }
       }
     }
@@ -1100,6 +1115,8 @@ public:
   void get_stat_values()
   {
     table_field->read_stats->set_all_nulls();
+    // default: hist_type=NULL means there's no histogram
+    table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM;
 
     if (table_field->read_stats->min_value)
       table_field->read_stats->min_value->set_null();
@@ -1111,7 +1128,7 @@ public:
       char buff[MAX_FIELD_WIDTH];
       String val(buff, sizeof(buff), &my_charset_bin);
 
-      for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++)
+      for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++)
       {  
         Field *stat_field= stat_table->field[i];
 
@@ -1155,13 +1172,28 @@ public:
             table_field->read_stats->set_avg_frequency(stat_field->val_real());
             break;
           case COLUMN_STAT_HIST_SIZE:
-            table_field->read_stats->histogram.set_size(stat_field->val_int());
+            /*
+              Ignore the contents of mysql.column_stats.hist_size. We take the
+              size from the mysql.column_stats.histogram column, itself.
+            */
             break;            
           case COLUMN_STAT_HIST_TYPE:
-            Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
-                                                        1);
-            table_field->read_stats->histogram.set_type(hist_type);
-            break;            
+            {
+              /*
+                Save the histogram type. The histogram itself will be read in
+                read_histograms_for_table().
+              */
+              Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
+                                                          1);
+              table_field->read_stats->histogram_type_on_disk= hist_type;
+              break;
+            }
+          case COLUMN_STAT_HISTOGRAM:
+            /*
+              Do nothing here: we take the histogram length from the 'histogram'
+              column itself
+            */
+            break;
           }
         }
       }
@@ -1182,9 +1214,9 @@ public:
     The method assumes that the value of histogram size and the pointer to
     the histogram location has been already set in the fields size and values
     of read_stats->histogram.
-  */    
+  */
 
-  void get_histogram_value()
+  Histogram_base * load_histogram(MEM_ROOT *mem_root)
   {
     if (find_stat())
     {
@@ -1194,14 +1226,60 @@ public:
       Field *stat_field= stat_table->field[fldno];
       table_field->read_stats->set_not_null(fldno);
       stat_field->val_str(&val);
-      memcpy(table_field->read_stats->histogram.get_values(),
-             val.ptr(), table_field->read_stats->histogram.get_size());
+      Histogram_type hist_type=
+        table_field->read_stats->histogram_type_on_disk;
+
+      Histogram_base *hist;
+      if (!(hist= create_histogram(mem_root, hist_type, NULL)))
+        return NULL;
+      Field *field= table->field[table_field->field_index];
+      if (!hist->parse(mem_root, db_name->str, table_name->str,
+                       field, hist_type,
+                       val.ptr(), val.length()))
+      {
+        table_field->read_stats->histogram= hist;
+        return hist;
+      }
+      else
+        delete hist;
     }
+    return NULL;
   }
-
 };
 
 
+bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*,
+                             Field*, Histogram_type type_arg,
+                             const char *hist_data, size_t hist_data_len)
+{
+  /* On-disk an in-memory formats are the same. Just copy the data. */
+  type= type_arg;
+  size= (uint8) hist_data_len; // 'size' holds the size of histogram in bytes
+  if (!(values= (uchar*)alloc_root(mem_root, hist_data_len)))
+    return true;
+
+  memcpy(values, hist_data, hist_data_len);
+  return false;
+}
+
+/*
+  Save the histogram data info a table field.
+*/
+void Histogram_binary::serialize(Field *field)
+{
+  field->store((char*)values, size, &my_charset_bin);
+}
+
+void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
+                                           Histogram_type htype_arg,
+                                           ulonglong size_arg)
+{
+  type= htype_arg;
+  values= (uchar*)alloc_root(mem_root, (size_t)size_arg);
+  size= (uint8) size_arg;
+}
+
+
 /*
   An object of the class Index_stat is created to read statistical
   data on tables from the statistical table table_stat, to update
@@ -1512,62 +1590,39 @@ public:
   }
 };
 
-/*
-  Histogram_builder is a helper class that is used to build histograms
-  for columns
-*/
-
-class Histogram_builder
+class Histogram_binary_builder : public Histogram_builder
 {
-  Field *column;           /* table field for which the histogram is built */
-  uint col_length;         /* size of this field                           */
-  ha_rows records;         /* number of records the histogram is built for */
   Field *min_value;        /* pointer to the minimal value for the field   */
   Field *max_value;        /* pointer to the maximal value for the field   */
-  Histogram *histogram;    /* the histogram location                       */
+  Histogram_binary *histogram;  /* the histogram location                  */
   uint hist_width;         /* the number of points in the histogram        */
   double bucket_capacity;  /* number of rows in a bucket of the histogram  */ 
   uint curr_bucket;        /* number of the current bucket to be built     */
-  ulonglong count;         /* number of values retrieved                   */
-  ulonglong count_distinct;    /* number of distinct values retrieved      */
-  /* number of distinct values that occured only once  */
-  ulonglong count_distinct_single_occurence;
 
-public: 
-  Histogram_builder(Field *col, uint col_len, ha_rows rows)
-    : column(col), col_length(col_len), records(rows)
+public:
+  Histogram_binary_builder(Field *col, uint col_len, ha_rows rows)
+    : Histogram_builder(col, col_len, rows)
   {
     Column_statistics *col_stats= col->collected_stats;
     min_value= col_stats->min_value;
     max_value= col_stats->max_value;
-    histogram= &col_stats->histogram;
+    histogram= (Histogram_binary*)col_stats->histogram;
     hist_width= histogram->get_width();
     bucket_capacity= (double) records / (hist_width + 1);
     curr_bucket= 0;
-    count= 0;
-    count_distinct= 0;
-    count_distinct_single_occurence= 0;
   }
 
-  ulonglong get_count_distinct() const { return count_distinct; }
-  ulonglong get_count_single_occurence() const
+  int next(void *elem, element_count elem_cnt) override
   {
-    return count_distinct_single_occurence;
-  }
-
-  int next(void *elem, element_count elem_cnt)
-  {
-    count_distinct++;
-    if (elem_cnt == 1)
-      count_distinct_single_occurence++;
-    count+= elem_cnt;
+    counters.next(elem, elem_cnt);
+    ulonglong count= counters.get_count();
     if (curr_bucket == hist_width)
       return 0;
     if (count > bucket_capacity * (curr_bucket + 1))
     {
       column->store_field_value((uchar *) elem, col_length);
       histogram->set_value(curr_bucket,
-                           column->pos_in_interval(min_value, max_value)); 
+                           column->pos_in_interval(min_value, max_value));
       curr_bucket++;
       while (curr_bucket != hist_width &&
              count > bucket_capacity * (curr_bucket + 1))
@@ -1578,25 +1633,51 @@ public:
     }
     return 0;
   }
+  void finalize() override {}
 };
 
 
+Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len,
+                                                    ha_rows rows)
+{
+  return new Histogram_binary_builder(col, col_len, rows);
+}
+
+
+Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
+                                 THD *owner)
+{
+  Histogram_base *res= NULL;
+  switch (hist_type) {
+  case SINGLE_PREC_HB:
+  case DOUBLE_PREC_HB:
+    res= new Histogram_binary();
+    break;
+  case JSON_HB:
+    res= new Histogram_json_hb();
+    break;
+  default:
+    DBUG_ASSERT(0);
+  }
+
+  if (res)
+    res->set_owner(owner);
+  return res;
+}
+
+
 C_MODE_START
 
-int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
+static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
 {
   Histogram_builder *hist_builder= (Histogram_builder *) arg;
   return hist_builder->next(elem, elem_cnt);
 }
 
-
-
-static int count_distinct_single_occurence_walk(void *elem,
-                                                element_count count, void *arg)
+int basic_stats_collector_walk(void *elem, element_count count,
+                               void *arg)
 {
-  ((ulonglong*)arg)[0]+= 1;
-  if (count == 1)
-    ((ulonglong*)arg)[1]+= 1;
+  ((Basic_stats_collector*)arg)->next(elem, count);
   return 0;
 }
 
@@ -1681,23 +1762,35 @@ public:
   */
   void walk_tree()
   {
-    ulonglong counts[2] = {0, 0};
-    tree->walk(table_field->table,
-               count_distinct_single_occurence_walk, counts);
-    distincts= counts[0];
-    distincts_single_occurence= counts[1];
+    Basic_stats_collector stats_collector;
+    tree->walk(table_field->table, basic_stats_collector_walk,
+               (void*)&stats_collector );
+    distincts= stats_collector.get_count_distinct();
+    distincts_single_occurence= stats_collector.get_count_single_occurence();
   }
 
   /*
     @brief
     Calculate a histogram of the tree
   */
-   void walk_tree_with_histogram(ha_rows rows)
+  bool walk_tree_with_histogram(ha_rows rows)
   {
-    Histogram_builder hist_builder(table_field, tree_key_length, rows);
-    tree->walk(table_field->table,  histogram_build_walk, (void *) &hist_builder);
-    distincts= hist_builder.get_count_distinct();
-    distincts_single_occurence= hist_builder.get_count_single_occurence();
+    Histogram_base *hist= table_field->collected_stats->histogram;
+    Histogram_builder *hist_builder=
+       hist->create_builder(table_field, tree_key_length, rows);
+
+    if (tree->walk(table_field->table, histogram_build_walk,
+                   (void*)hist_builder))
+    {
+      delete hist_builder;
+      return true; // Error
+    }
+    hist_builder->finalize();
+    distincts= hist_builder->counters.get_count_distinct();
+    distincts_single_occurence= hist_builder->counters.
+                                  get_count_single_occurence();
+    delete hist_builder;
+    return false;
   }
 
   ulonglong get_count_distinct()
@@ -1712,20 +1805,11 @@ public:
 
   /*
     @brief
-    Get the size of the histogram in bytes built for table_field
-  */
-  uint get_hist_size()
-  {
-    return table_field->collected_stats->histogram.get_size();
-  }
-
-  /*
-    @brief
     Get the pointer to the histogram built for table_field
   */
-  uchar *get_histogram()
+  Histogram_base *get_histogram()
   {
-    return table_field->collected_stats->histogram.get_values();
+    return table_field->collected_stats->histogram;
   }
 
 };
@@ -2125,26 +2209,13 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
   ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root,
                                                sizeof(ulonglong) * key_parts);
 
-  uint hist_size= thd->variables.histogram_size;
-  Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type);
-  uchar *histogram= NULL;
-  if (hist_size > 0)
-  {
-    if ((histogram= (uchar *) alloc_root(&table->mem_root,
-                                         hist_size * columns)))
-      bzero(histogram, hist_size * columns);
-
-  }
-
-  if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency ||
-      (hist_size && !histogram))
+  if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency)
     DBUG_RETURN(1);
 
   table->collected_stats= table_stats;
   table_stats->column_stats= column_stats;
   table_stats->index_stats= index_stats;
   table_stats->idx_avg_frequency= idx_avg_frequency;
-  table_stats->histograms= histogram;
   
   memset(column_stats, 0, sizeof(Column_statistics) * columns);
 
@@ -2152,10 +2223,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
   {
     if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
     {
-      column_stats->histogram.set_size(hist_size);
-      column_stats->histogram.set_type(hist_type);
-      column_stats->histogram.set_values(histogram);
-      histogram+= hist_size;
+      column_stats->histogram = NULL;
       (*field_ptr)->collected_stats= column_stats++;
     }
   }
@@ -2177,6 +2245,25 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
   DBUG_RETURN(0);
 }
 
+/*
+  Free the "local" statistics for table.
+  We only free the statistics that is not on MEM_ROOT and needs to be
+  explicitly freed.
+*/
+void free_statistics_for_table(THD *thd, TABLE *table)
+{
+  for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
+  {
+    // Only delete the histograms that are exclusivly owned by this thread
+    if ((*field_ptr)->collected_stats &&
+        (*field_ptr)->collected_stats->histogram &&
+        (*field_ptr)->collected_stats->histogram->get_owner() == thd)
+    {
+      delete (*field_ptr)->collected_stats->histogram;
+      (*field_ptr)->collected_stats->histogram= NULL;
+    }
+  }
+}
 
 /**
   @brief 
@@ -2383,7 +2470,8 @@ bool Column_statistics_collected::add()
 */
 
 inline
-void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
+bool Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows,
+                                         double sample_fraction)
 {
   double val;
 
@@ -2401,13 +2489,32 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
   }
   if (count_distinct)
   {
-    uint hist_size= count_distinct->get_hist_size();
+    uint hist_size= current_thd->variables.histogram_size;
+    Histogram_type hist_type= 
+      (Histogram_type) (current_thd->variables.histogram_type);
+    bool have_histogram= false;
+    if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
+    {
+      have_histogram= true;
+      histogram= create_histogram(mem_root, hist_type, current_thd);
+      histogram->init_for_collection(mem_root, hist_type, hist_size);
+    }
 
     /* Compute cardinality statistics and optionally histogram. */
-    if (hist_size == 0)
+    if (!have_histogram)
       count_distinct->walk_tree();
     else
-      count_distinct->walk_tree_with_histogram(rows - nulls);
+    {
+      if (count_distinct->walk_tree_with_histogram(rows - nulls))
+      {
+        delete histogram;
+        histogram= NULL;
+
+        delete count_distinct;
+        count_distinct= NULL;
+        return true; // Error
+      }
+    }
 
     ulonglong distincts= count_distinct->get_count_distinct();
     ulonglong distincts_single_occurence=
@@ -2442,15 +2549,14 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
       set_not_null(COLUMN_STAT_AVG_FREQUENCY);
     }
     else
-      hist_size= 0;
-    histogram.set_size(hist_size);
+      have_histogram= false;
+
     set_not_null(COLUMN_STAT_HIST_SIZE);
-    if (hist_size && distincts)
+    if (have_histogram && distincts && histogram)
     {
       set_not_null(COLUMN_STAT_HIST_TYPE);
-      histogram.set_values(count_distinct->get_histogram());
       set_not_null(COLUMN_STAT_HISTOGRAM);
-    } 
+    }
     delete count_distinct;
     count_distinct= NULL;
   }
@@ -2459,7 +2565,8 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
     val= 1.0;
     set_avg_frequency(val); 
     set_not_null(COLUMN_STAT_AVG_FREQUENCY);
-  } 
+  }
+  return false;
 }
 
 
@@ -2710,7 +2817,10 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
       continue;
     bitmap_set_bit(table->write_set, table_field->field_index); 
     if (!rc)
-      table_field->collected_stats->finish(rows, sample_fraction);
+    {
+      rc= table_field->collected_stats->finish(&table->mem_root, rows,
+                                               sample_fraction);
+    }
     else
       table_field->collected_stats->cleanup();
   }
@@ -2790,7 +2900,7 @@ int update_statistics_for_table(THD *thd, TABLE *table)
 
   start_new_trans new_trans(thd);
 
-  if (open_stat_tables(thd, tables, TRUE))
+  if ((open_stat_tables(thd, tables, TRUE)))
     DBUG_RETURN(rc);
    
   save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
@@ -2916,16 +3026,17 @@ int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
    
   /* Read statistics from the statistical table column_stats */
   stat_table= stat_tables[COLUMN_STAT].table;
-  ulong total_hist_size= 0;
+  bool have_histograms= false;
   Column_stat column_stat(stat_table, table);
   for (field_ptr= table_share->field; *field_ptr; field_ptr++)
   {
     table_field= *field_ptr;
     column_stat.set_key_fields(table_field);
     column_stat.get_stat_values();
-    total_hist_size+= table_field->read_stats->histogram.get_size();
+    if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM)
+      have_histograms= true;
   }
-  table_share->stats_cb.total_hist_size= total_hist_size;
+  table_share->stats_cb.have_histograms= have_histograms;
 
   /* Read statistics from the statistical table index_stats */
   stat_table= stat_tables[INDEX_STAT].table;
@@ -3021,6 +3132,9 @@ void delete_stat_values_for_table_share(TABLE_SHARE *table_share)
       delete column_stats->max_value;
       column_stats->max_value= NULL;
     }
+
+    delete column_stats->histogram;
+    column_stats->histogram=NULL;
   }
 }
 
@@ -3065,28 +3179,28 @@ int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
 
   if (stats_cb->start_histograms_load())
   {
-    uchar *histogram= (uchar *) alloc_root(&stats_cb->mem_root,
-                                           stats_cb->total_hist_size);
-    if (!histogram)
-    {
-      stats_cb->abort_histograms_load();
-      DBUG_RETURN(1);
-    }
-    memset(histogram, 0, stats_cb->total_hist_size);
-
     Column_stat column_stat(stat_tables[COLUMN_STAT].table, table);
+
+    /*
+      The process of histogram loading makes use of the field it is for. Mark
+      all fields as readable/writable in order to allow that.
+    */
+    MY_BITMAP *old_sets[2];
+    dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
+
     for (Field **field_ptr= table->s->field; *field_ptr; field_ptr++)
     {
       Field *table_field= *field_ptr;
-      if (uint hist_size= table_field->read_stats->histogram.get_size())
+      if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM)
       {
         column_stat.set_key_fields(table_field);
-        table_field->read_stats->histogram.set_values(histogram);
-        column_stat.get_histogram_value();
-        histogram+= hist_size;
+        table_field->read_stats->histogram=
+          column_stat.load_histogram(&stats_cb->mem_root);
       }
     }
     stats_cb->end_histograms_load();
+
+    dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
   }
   table->histograms_are_read= true;
   DBUG_RETURN(0);
@@ -3775,15 +3889,11 @@ double get_column_range_cardinality(Field *field,
       if (avg_frequency > 1.0 + 0.000001 && 
           col_stats->min_max_values_are_provided())
       {
-        Histogram *hist= &col_stats->histogram;
-        if (hist->is_usable(thd))
+        Histogram_base *hist = col_stats->histogram;
+        if (hist && hist->is_usable(thd))
         {
-          store_key_image_to_rec(field, (uchar *) min_endp->key,
-                                 field->key_length());
-          double pos= field->pos_in_interval(col_stats->min_value,
-                                             col_stats->max_value);
           res= col_non_nulls * 
-	       hist->point_selectivity(pos,
+	       hist->point_selectivity(field, min_endp,
                                        avg_frequency / col_non_nulls);
         }
       }
@@ -3798,34 +3908,41 @@ double get_column_range_cardinality(Field *field,
   {
     if (col_stats->min_max_values_are_provided())
     {
-      double sel, min_mp_pos, max_mp_pos;
-
-      if (min_endp && !(field->null_ptr && min_endp->key[0]))
+      Histogram_base *hist= col_stats->histogram;
+      double avg_frequency= col_stats->get_avg_frequency();
+      double sel;
+      if (hist && hist->is_usable(thd))
       {
-        store_key_image_to_rec(field, (uchar *) min_endp->key,
-                               field->key_length());
-        min_mp_pos= field->pos_in_interval(col_stats->min_value,
-                                           col_stats->max_value);
+        sel= hist->range_selectivity(field, min_endp, max_endp,
+                                     avg_frequency / col_non_nulls);
+        res= col_non_nulls * sel;
       }
       else
-        min_mp_pos= 0.0;
-      if (max_endp)
       {
-        store_key_image_to_rec(field, (uchar *) max_endp->key,
-                               field->key_length());
-        max_mp_pos= field->pos_in_interval(col_stats->min_value,
-                                           col_stats->max_value);
-      }
-      else
-        max_mp_pos= 1.0;
+        double min_mp_pos, max_mp_pos;
+        if (min_endp && !(field->null_ptr && min_endp->key[0]))
+        {
+          store_key_image_to_rec(field, (uchar *) min_endp->key,
+                                 field->key_length());
+          min_mp_pos=
+              field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+        }
+        else
+          min_mp_pos= 0.0;
+        if (max_endp)
+        {
+          store_key_image_to_rec(field, (uchar *) max_endp->key,
+                                 field->key_length());
+          max_mp_pos=
+              field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+        }
+        else
+          max_mp_pos= 1.0;
 
-      Histogram *hist= &col_stats->histogram;
-      if (hist->is_usable(thd))
-        sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
-      else
-        sel= (max_mp_pos - min_mp_pos);
-      res= col_non_nulls * sel;
-      set_if_bigger(res, col_stats->get_avg_frequency());
+        sel = (max_mp_pos - min_mp_pos);
+        res= col_non_nulls * sel;
+        set_if_bigger(res, avg_frequency);
+      }
     }
     else
       res= col_non_nulls;
@@ -3835,13 +3952,13 @@ double get_column_range_cardinality(Field *field,
   return res;
 }
 
-
-
 /*
   Estimate selectivity of "col=const" using a histogram
   
-  @param pos      Position of the "const" between column's min_value and 
-                  max_value.  This is a number in [0..1] range.
+  @param field    the field to estimate its selectivity.
+
+  @param endpoint The constant
+
   @param avg_sel  Average selectivity of condition "col=const" in this table.
                   It is calcuated as (#non_null_values / #distinct_values).
   
@@ -3870,9 +3987,15 @@ double get_column_range_cardinality(Field *field,
       value.
 */
 
-double Histogram::point_selectivity(double pos, double avg_sel)
+double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
+                                           double avg_sel)
 {
   double sel;
+  Column_statistics *col_stats= field->read_stats;
+  store_key_image_to_rec(field, (uchar *) endpoint->key,
+                         field->key_length());
+  double pos= field->pos_in_interval(col_stats->min_value,
+                                     col_stats->max_value);
   /* Find the bucket that contains the value 'pos'. */
   uint min= find_bucket(pos, TRUE);
   uint pos_value= (uint) (pos * prec_factor());
@@ -3906,7 +4029,7 @@ double Histogram::point_selectivity(double pos, double avg_sel)
     /* 
       The value 'pos' fits within one single histogram bucket.
 
-      Histogram buckets have the same numbers of rows, but they cover
+      Histogram_binary buckets have the same numbers of rows, but they cover
       different ranges of values.
 
       We assume that values are uniformly distributed across the [0..1] value
@@ -3951,6 +4074,43 @@ double Histogram::point_selectivity(double pos, double avg_sel)
   return sel;
 }
 
+
+double Histogram_binary::range_selectivity(Field *field,
+                                           key_range *min_endp,
+                                           key_range *max_endp,
+                                           double avg_sel)
+{
+  double sel, min_mp_pos, max_mp_pos;
+  Column_statistics *col_stats= field->read_stats;
+
+  if (min_endp && !(field->null_ptr && min_endp->key[0]))
+  {
+    store_key_image_to_rec(field, (uchar *) min_endp->key,
+                           field->key_length());
+    min_mp_pos=
+        field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+  }
+  else
+    min_mp_pos= 0.0;
+  if (max_endp)
+  {
+    store_key_image_to_rec(field, (uchar *) max_endp->key,
+                           field->key_length());
+    max_mp_pos=
+        field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+  }
+  else
+    max_mp_pos= 1.0;
+
+  double bucket_sel= 1.0 / (get_width() + 1);
+  uint min= find_bucket(min_mp_pos, TRUE);
+  uint max= find_bucket(max_mp_pos, FALSE);
+  sel= bucket_sel * (max - min + 1);
+
+  set_if_bigger(sel, avg_sel);
+  return sel;
+}
+
 /*
   Check whether the table is one of the persistent statistical tables.
 */
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 35b3aa33acc..c0df15ea4ad 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -16,6 +16,9 @@
 #ifndef SQL_STATISTICS_H
 #define SQL_STATISTICS_H
 
+#include <vector>
+#include <string>
+
 /*
   For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are
   similar to the COMPLEMENTARY and PREFERABLY respectively except that
@@ -42,7 +45,9 @@ typedef
 enum enum_histogram_type
 {
   SINGLE_PREC_HB,
-  DOUBLE_PREC_HB
+  DOUBLE_PREC_HB,
+  JSON_HB,
+  INVALID_HISTOGRAM
 } Histogram_type;
 
 enum enum_stat_tables
@@ -120,6 +125,7 @@ int read_statistics_for_tables(THD *thd, TABLE_LIST *tables);
 int collect_statistics_for_table(THD *thd, TABLE *table);
 void delete_stat_values_for_table_share(TABLE_SHARE *table_share);
 int alloc_statistics_for_table(THD *thd, TABLE *table);
+void free_statistics_for_table(THD *thd, TABLE *table);
 int update_statistics_for_table(THD *thd, TABLE *table);
 int delete_statistics_for_table(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *tab);
 int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
@@ -140,9 +146,82 @@ double get_column_range_cardinality(Field *field,
 bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
 bool is_eits_usable(Field* field);
 
-class Histogram
+class Histogram_builder;
+
+/*
+  Common base for all histograms
+*/
+class Histogram_base
 {
+public:
+  virtual bool parse(MEM_ROOT *mem_root,
+                     const char *db_name, const char *table_name,
+                     Field *field, Histogram_type type_arg,
+                     const char *hist_data, size_t hist_data_len)= 0;
+  virtual void serialize(Field *to_field)= 0;
+
+  virtual Histogram_type get_type()=0;
+
+  virtual uint get_width()=0;
+
+  /*
+    The creation-time workflow is:
+     * create a histogram
+     * init_for_collection()
+     * create_builder()
+     * feed the data to the builder
+     * serialize();
+  */
+  virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
+                                   ulonglong size)=0;
+  virtual Histogram_builder *create_builder(Field *col, uint col_len,
+                                            ha_rows rows)=0;
+
+  /*
+    This function checks that histograms should be usable only when
+      1) the level of optimizer_use_condition_selectivity > 3
+  */
+  bool is_usable(THD *thd)
+  {
+    return thd->variables.optimizer_use_condition_selectivity > 3;
+  }
+
+
+  virtual double point_selectivity(Field *field, key_range *endpoint,
+                                   double avg_sel)=0;
+  virtual double range_selectivity(Field *field, key_range *min_endp,
+                                   key_range *max_endp, double avg_sel)=0;
+
+  /*
+    Legacy: return the size of the histogram on disk.
 
+    This will be stored in mysql.column_stats.hist_size column.
+    The value is not really needed as one can look at
+    LENGTH(mysql.column_stats.histogram) directly.
+  */
+  virtual uint get_size()=0;
+  virtual ~Histogram_base()= default;
+
+  Histogram_base() : owner(NULL) {}
+
+  /*
+    Memory management: a histogram may be (exclusively) "owned" by a particular
+    thread (done for histograms that are being collected).  By default, a
+    histogram has owner==NULL and is not owned by any particular thread.
+  */
+  THD *get_owner() { return owner; }
+  void set_owner(THD *thd) { owner=thd; }
+private:
+  THD *owner;
+};
+
+
+/*
+  A Height-balanced histogram that stores numeric fractions
+*/
+
+class Histogram_binary : public Histogram_base
+{
 private:
   Histogram_type type;
   uint8 size; /* Size of values array, in bytes */
@@ -155,22 +234,25 @@ private:
       return ((uint) (1 << 8) - 1);
     case DOUBLE_PREC_HB:
       return ((uint) (1 << 16) - 1);
+    default:
+      DBUG_ASSERT(0);
     }
     return 1;
   }
 
 public:
-  uint get_width()
+  uint get_width() override
   {
     switch (type) {
     case SINGLE_PREC_HB:
       return size;
     case DOUBLE_PREC_HB:
       return size / 2;
+    default:
+      DBUG_ASSERT(0);
     }
     return 0;
   }
-
 private:
   uint get_value(uint i)
   {
@@ -180,6 +262,8 @@ private:
       return (uint) (((uint8 *) values)[i]);
     case DOUBLE_PREC_HB:
       return (uint) uint2korr(values + i * 2);
+    default:
+      DBUG_ASSERT(0);
     }
     return 0;
   }
@@ -224,70 +308,124 @@ private:
   }
 
 public:
+  uint get_size() override {return (uint)size;}
 
-  uint get_size() { return (uint) size; }
-
-  Histogram_type get_type() { return type; }
-
-  uchar *get_values() { return (uchar *) values; }
+  Histogram_type get_type() override { return type; }
 
-  void set_size (ulonglong sz) { size= (uint8) sz; }
-
-  void set_type (Histogram_type t) { type= t; }
-
-  void set_values (uchar *vals) { values= (uchar *) vals; }
-
-  bool is_available() { return get_size() > 0 && get_values(); }
-
-  /*
-    This function checks that histograms should be usable only when
-      1) the level of optimizer_use_condition_selectivity > 3
-      2) histograms have been collected
-  */
-  bool is_usable(THD *thd)
-  {
-    return thd->variables.optimizer_use_condition_selectivity > 3 &&
-           is_available();
-  }
+  bool parse(MEM_ROOT *mem_root, const char*, const char*, Field*,
+             Histogram_type type_arg, const char *hist_data,
+             size_t hist_data_len) override;
+  void serialize(Field *to_field) override;
+  void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
+                           ulonglong size) override;
+  Histogram_builder *create_builder(Field *col, uint col_len,
+                                    ha_rows rows) override;
 
   void set_value(uint i, double val)
   {
     switch (type) {
-    case SINGLE_PREC_HB:   
+    case SINGLE_PREC_HB:
       ((uint8 *) values)[i]= (uint8) (val * prec_factor());
       return;
     case DOUBLE_PREC_HB:
       int2store(values + i * 2, val * prec_factor());
       return;
+    default:
+      DBUG_ASSERT(0);
+      return;
     }
   }
 
   void set_prev_value(uint i)
   {
     switch (type) {
-    case SINGLE_PREC_HB:   
+    case SINGLE_PREC_HB:
       ((uint8 *) values)[i]= ((uint8 *) values)[i-1];
       return;
     case DOUBLE_PREC_HB:
       int2store(values + i * 2, uint2korr(values + i * 2 - 2));
       return;
+    default:
+      DBUG_ASSERT(0);
+      return;
     }
   }
 
-  double range_selectivity(double min_pos, double max_pos)
-  {
-    double sel;
-    double bucket_sel= 1.0/(get_width() + 1);  
-    uint min= find_bucket(min_pos, TRUE);
-    uint max= find_bucket(max_pos, FALSE);
-    sel= bucket_sel * (max - min + 1);
-    return sel;
-  } 
-  
+  double range_selectivity(Field *field, key_range *min_endp,
+                           key_range *max_endp, double avg_sel) override;
+
   /*
     Estimate selectivity of "col=const" using a histogram
   */
-  double point_selectivity(double pos, double avg_sel);
+  double point_selectivity(Field *field, key_range *endpoint,
+                           double avg_sel) override;
+};
+
+
+/*
+  This is used to collect the the basic statistics from a Unique object:
+   - count of values
+   - count of distinct values
+   - count of distinct values that have occurred only once
+*/
+
+class Basic_stats_collector
+{
+  ulonglong count;         /* number of values retrieved                   */
+  ulonglong count_distinct;    /* number of distinct values retrieved      */
+  /* number of distinct values that occured only once  */
+  ulonglong count_distinct_single_occurence;
+
+public:
+  Basic_stats_collector()
+  {
+    count= 0;
+    count_distinct= 0;
+    count_distinct_single_occurence= 0;
+  }
+
+  ulonglong get_count_distinct() const { return count_distinct; }
+  ulonglong get_count_single_occurence() const
+  {
+    return count_distinct_single_occurence;
+  }
+  ulonglong get_count() const { return count; }
+
+  void next(void *elem, element_count elem_cnt)
+  {
+    count_distinct++;
+    if (elem_cnt == 1)
+      count_distinct_single_occurence++;
+    count+= elem_cnt;
+  }
+};
+
+
+/*
+  Histogram_builder is a helper class that is used to build histograms
+  for columns.
+
+  Do not create directly, call Histogram->get_builder(...);
+*/
+
+class Histogram_builder
+{
+protected:
+  Field *column;           /* table field for which the histogram is built */
+  uint col_length;         /* size of this field                           */
+  ha_rows records;         /* number of records the histogram is built for */
+
+  Histogram_builder(Field *col, uint col_len, ha_rows rows) :
+    column(col), col_length(col_len), records(rows)
+  {}
+
+public:
+  // A histogram builder will also collect the counters
+  Basic_stats_collector counters;
+
+  virtual int next(void *elem, element_count elem_cnt)=0;
+  virtual void finalize()=0;
+  virtual ~Histogram_builder(){}
 };
 
 
@@ -308,7 +446,6 @@ public:
 
   /* Array of records per key for index prefixes */
   ulonglong *idx_avg_frequency;
-  uchar *histograms;                /* Sequence of histograms       */                    
 };
 
 
@@ -370,8 +507,10 @@ private:
   ulonglong avg_frequency;
 
 public:
+  /* Histogram type as specified in mysql.column_stats.hist_type */
+  Histogram_type histogram_type_on_disk;
 
-  Histogram histogram;
+  Histogram_base *histogram;
 
   uint32 no_values_provided_bitmap()
   {
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 7127cbc00f6..447c1f07310 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -6496,7 +6496,8 @@ static Sys_var_enum Sys_histogram_type(
        "Specifies type of the histograms created by ANALYZE. "
        "Possible values are: "
        "SINGLE_PREC_HB - single precision height-balanced, "
-       "DOUBLE_PREC_HB - double precision height-balanced.",
+       "DOUBLE_PREC_HB - double precision height-balanced, "
+       "JSON_HB - height-balanced, stored as JSON.",
        SESSION_VAR(histogram_type), CMD_LINE(REQUIRED_ARG),
        histogram_types, DEFAULT(1));
 
diff --git a/sql/table.h b/sql/table.h
index 6aa75df39c6..221e18bc926 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -679,16 +679,21 @@ class TABLE_STATISTICS_CB
 public:
   MEM_ROOT  mem_root; /* MEM_ROOT to allocate statistical data for the table */
   Table_statistics *table_stats; /* Structure to access the statistical data */
-  ulong total_hist_size;         /* Total size of all histograms */
+
+  /*
+    Whether the table has histograms.
+    (If the table has none, histograms_are_ready() can finish sooner)
+  */
+  bool have_histograms;
 
   bool histograms_are_ready() const
   {
-    return !total_hist_size || hist_state.is_ready();
+    return !have_histograms || hist_state.is_ready();
   }
 
   bool start_histograms_load()
   {
-    return total_hist_size && hist_state.start_load();
+    return have_histograms && hist_state.start_load();
   }
 
   void end_histograms_load() { hist_state.end_load(); }