summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/mrb/scripts/expression_size_estimator.rb
blob: 73b44cae3dd236c13fb54f42abe48dc4b4f3410c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
module Groonga
  class ExpressionSizeEstimator
    def initialize(expression, table)
      @expression = expression
      @table = table
      @table_size = @table.size
    end

    def estimate
      builder = ScanInfoBuilder.new(@expression, Operator::OR, 0)
      data_list = builder.build
      return @table_size if data_list.nil?

      or_data_list = group_data_list(data_list)
      or_sizes = or_data_list.collect do |and_data_list|
        and_sizes = and_data_list.collect do |data|
          size = estimate_data(data)
          if data.logical_op == Operator::AND_NOT
            size = @table_size - size
            size = 0 if size < 0
          end
          size
        end
        and_sizes.min
      end
      or_sizes.max
    end

    private
    def group_data_list(data_list)
      or_data_list = [[]]
      data_list.each do |data|
        next if data.op == Operator::NOP

        and_data_list = or_data_list.last
        if and_data_list.empty?
          and_data_list << data
        else
          case data.logical_op
          when Operator::AND, Operator::AND_NOT
            and_data_list << data
          else
            and_data_list = [data]
            or_data_list << and_data_list
          end
        end
      end
      or_data_list
    end

    def estimate_data(data)
      search_index = data.search_indexes.first
      return @table_size if search_index.nil?

      index_column = resolve_index_column(search_index.index_column,
                                          data.op)
      return @table_size if index_column.nil?

      size = nil
      case data.op
      when Operator::MATCH
        size = estimate_match(data, index_column)
      when Operator::REGEXP
        size = estimate_regexp(data, index_column)
      when Operator::EQUAL
        size = estimate_equal(data, index_column)
      when Operator::LESS,
           Operator::LESS_EQUAL,
           Operator::GREATER,
           Operator::GREATER_EQUAL
        size = estimate_range(data, index_column)
      when Operator::CALL
        procedure = data.args.first
        if procedure.is_a?(Procedure) and procedure.name == "between"
          size = estimate_between(data, index_column)
        end
      end
      size || @table_size
    end

    def resolve_index_column(index_column, operator)
      while index_column.is_a?(Accessor)
        index_info = index_column.find_index(operator)
        return nil if index_info.nil?
        index_column = index_info.index
      end

      index_column
    end

    def estimate_match(data, index_column)
      index_column.estimate_size(:query => data.query.value)
    end

    def estimate_regexp(data, index_column)
      index_column.estimate_size(:query => data.query.value,
                                 :mode => data.op)
    end

    def estimate_equal(data, index_column)
      lexicon = index_column.lexicon
      term_id = lexicon[data.query]
      return 0 if term_id.nil?

      index_column.estimate_size(:term_id => term_id)
    end

    def estimate_range(data, index_column)
      lexicon = index_column.lexicon
      value = data.query.value
      options = {}
      case data.op
      when Operator::LESS
        options[:max] = value
        options[:flags] = TableCursorFlags::LT
      when Operator::LESS_EQUAL
        options[:max] = value
        options[:flags] = TableCursorFlags::LE
      when Operator::GREATER
        options[:min] = value
        options[:flags] = TableCursorFlags::GT
      when Operator::GREATER_EQUAL
        options[:min] = value
        options[:flags] = TableCursorFlags::GE
      end
      TableCursor.open(lexicon, options) do |cursor|
        index_column.estimate_size(:lexicon_cursor => cursor)
      end
    end

    def estimate_between(data, index_column)
      lexicon = index_column.lexicon
      _, _, min, min_border, max, max_border = data.args
      options = {
        :min => min,
        :max => max,
        :flags => 0,
      }
      if min_border == "include"
        options[:flags] |= TableCursorFlags::LT
      else
        options[:flags] |= TableCursorFlags::LE
      end
      if max_border == "include"
        options[:flags] |= TableCursorFlags::GT
      else
        options[:flags] |= TableCursorFlags::GE
      end

      TableCursor.open(lexicon, options) do |cursor|
        index_column.estimate_size(:lexicon_cursor => cursor)
      end
    end
  end
end