summaryrefslogtreecommitdiff
path: root/chromium/net/extras/preload_data/decoder.h
blob: b6076ac344c16ae73f01352fe9ff755228f3c02a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_
#define NET_EXTRAS_PRELOAD_DATA_DECODER_H_

#include <stdint.h>

#include <string>

namespace net {

namespace extras {

// Decodes an entry from preloaded data.
// Clients must implement ReadEntry() method to read the specific type of data
// they are interested in.
class PreloadDecoder {
 public:
  // These must match the values in net/tools/huffman_trie/trie/trie_writer.h.
  enum : char { kEndOfString = 0, kEndOfTable = 127 };

  // BitReader is a class that allows a bytestring to be read bit-by-bit.
  class BitReader {
   public:
    BitReader(const uint8_t* bytes, size_t num_bits);

    BitReader(const BitReader&) = delete;
    BitReader& operator=(const BitReader&) = delete;

    // Next sets |*out| to the next bit from the input. It returns false if no
    // more bits are available or true otherwise.
    bool Next(bool* out);

    // Read sets the |num_bits| least-significant bits of |*out| to the value of
    // the next |num_bits| bits from the input. It returns false if there are
    // insufficient bits in the input or true otherwise.
    bool Read(unsigned num_bits, uint32_t* out);

    // Decodes a size_t from the reader, putting the resulting value in |*out|.
    // Returns false if there are insufficient bits to read and true otherwise.
    //
    // This function's inverse is TrieBitBuffer::WriteSize.
    //
    // The encoding is a prefix code optimized for small values (less than 4).
    // It is designed for the lengths of prefixes in the HSTS Preload list trie.
    // Compared to the unary encoding that was previously used (where the number
    // of bits used is one plus the value being encoded), this uses one more bit
    // for encoding 0 and 1, and the same number of bits for encoding 2, and
    // fewer bits for encoding values greater than 2. At the time of writing,
    // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the
    // remaining 54% were greater than 2.
    //
    // This encoding scheme uses a variable number of bits to encode each value.
    // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used
    // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed
    // values are as follows:
    //
    //   0: 0b00
    //   1: 0b100
    //   2: 0b101
    //   3: 0b110
    //
    // Note that none of the fixed values are prefixed with 0b01 or 0b111. These
    // prefixes are used with a unary-like encoding for values 4 and above.
    // Zero or more 1s, followed by a 0, are appended to one of those prefixes.
    // Even values use the prefix 0b01, and odd values use the prefix 0b111. The
    // number of 1s to append is half the value (rounded down) minus 1.
    bool DecodeSize(size_t* out);

    // Seek sets the current offest in the input to bit number |offset|. It
    // returns true if |offset| is within the range of the input and false
    // otherwise.
    bool Seek(size_t offset);

   private:
    const uint8_t* const bytes_;
    const size_t num_bits_;
    const size_t num_bytes_;
    // current_byte_index_ contains the current byte offset in |bytes_|.
    size_t current_byte_index_ = 0;
    // current_byte_ contains the current byte of the input.
    uint8_t current_byte_;
    // num_bits_used_ contains the number of bits of |current_byte_| that have
    // been read.
    unsigned num_bits_used_ = 8;
  };

  // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is
  // simply encoded as a series of two-byte structures. The first byte
  // determines the "0" pointer for that node and the second the "1" pointer.
  // Each byte either has the MSB set, in which case the bottom 7 bits are the
  // value for that position, or else the bottom seven bits contain the index of
  // a node.
  //
  // The tree is decoded by walking rather than a table-driven approach.
  class HuffmanDecoder {
   public:
    HuffmanDecoder(const uint8_t* tree, size_t tree_bytes);

    HuffmanDecoder(const HuffmanDecoder&) = delete;
    HuffmanDecoder& operator=(const HuffmanDecoder&) = delete;

    bool Decode(PreloadDecoder::BitReader* reader, char* out) const;

   private:
    const uint8_t* const tree_;
    const size_t tree_bytes_;
  };

  PreloadDecoder(const uint8_t* huffman_tree,
                 size_t huffman_tree_size,
                 const uint8_t* trie,
                 size_t trie_bits,
                 size_t trie_root_position);

  PreloadDecoder(const PreloadDecoder&) = delete;
  PreloadDecoder& operator=(const PreloadDecoder&) = delete;

  virtual ~PreloadDecoder();

  // Resolves search keyword given by |search| in the preloaded data. Returns
  // false on internal error and true otherwise. After a successful return,
  // |*out_found| is true iff a relevant entry has been found. In the case of
  // HSTS data, |search| is the hostname being searched.
  //
  // Although this code should be robust, it never processes attacker-controlled
  // data -- it only operates on the preloaded data built into the binary.
  //
  // The preloaded data is represented as a trie and matches |search|
  // backwards. Each node in the trie starts with a number of characters, which
  // must match exactly. After that is a dispatch table which maps the next
  // character in the search keyword to another node in the trie.
  //
  // In the dispatch table, the zero character represents the "end of string"
  // (which is the *beginning* of the search keyword since we process it
  // backwards). The value in that case is special -- rather than an offset to
  // another trie node, it contains the searched entry (for HSTS data, it
  // contains whether subdomains are included, pinsets etc.). Clients must
  // implement ReadEntry to read the entry at this location.
  //
  // Dispatch tables are always given in order, but the "end of string" (zero)
  // value always comes before an entry for '.'.
  bool Decode(const std::string& search, bool* out_found);

 protected:
  virtual bool ReadEntry(BitReader* reader,
                         const std::string& search,
                         size_t current_search_offset,
                         bool* out_found) = 0;

  const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; }

 private:
  HuffmanDecoder huffman_decoder_;
  BitReader bit_reader_;

  const size_t trie_root_position_;
};

}  // namespace extras

}  // namespace net

#endif  // NET_EXTRAS_PRELOAD_DATA_DECODER_H_