summaryrefslogtreecommitdiff
path: root/gtk/gtkcountingbloomfilterprivate.h
blob: 15c7d4a07f9a1fa798aea9ccff112afe04471e98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * Copyright © 2020 Benjamin Otte
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
 *
 * Authors: Benjamin Otte <otte@gnome.org>
 */

#pragma once

#include <glib.h>


G_BEGIN_DECLS

/*
 * GtkCountingBloomFilter:
 *
 * This implements a counting bloom filter. A bloom filter is a space-efficient
 * probabilistic data structure that is used to test whether an element may be
 * a member of a set.  
 * The Wikipedia links provide a lot more details into how and why this data
 * structure works and when to use it.
 *
 * This implementation is based on similar implementations in web browsers, because it's
 * original use case is the same: Making CSS lookups fast.
 *
 * As such, the number of bits is hardcoded to 12 and the elements in the set
 * are 16bit hash values.  
 * It is possible to use 32bit hash values or a different number of bits, should this
 * be considered useful.
 *
 * See: [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter),
 *   [Counting Bloom filter](https://en.wikipedia.org/wiki/Counting_Bloom_filter)
 */

/* The number of bits from the hash we care about */
#define GTK_COUNTING_BLOOM_FILTER_BITS (12)

/* The necessary size of the filter */
#define GTK_COUNTING_BLOOM_FILTER_SIZE (1 << GTK_COUNTING_BLOOM_FILTER_BITS)

typedef struct _GtkCountingBloomFilter GtkCountingBloomFilter;

struct _GtkCountingBloomFilter
{
  guint8        buckets[GTK_COUNTING_BLOOM_FILTER_SIZE];
};

static inline void      gtk_counting_bloom_filter_add           (GtkCountingBloomFilter         *self,
                                                                 guint16                         hash);
static inline void      gtk_counting_bloom_filter_remove        (GtkCountingBloomFilter         *self,
                                                                 guint16                         hash);
static inline gboolean  gtk_counting_bloom_filter_may_contain   (const GtkCountingBloomFilter   *self,
                                                                 guint16                         hash);


/*
 * GTK_COUNTING_BLOOM_FILTER_INIT:
 *
 * Initialize the bloom filter. As bloom filters are always stack-allocated,
 * initialization should happen when defining them, like:
 * ```c
 * GtkCountingBloomFilter filter = GTK_COUNTING_BLOOM_FILTER_INIT;
 * ```
 *
 * The filter does not need to be freed.
 */
#define GTK_COUNTING_BLOOM_FILTER_INIT {{0}}

/*
 * gtk_counting_bloom_filter_add:
 * @self: a `GtkCountingBloomFilter`
 * @hash: a hash value to add to the filter
 *
 * Adds the hash value to the filter.
 *
 * If the same hash value gets added multiple times, it will
 * be considered as contained in the hash until it has been
 * removed as many times.
 **/
static inline void
gtk_counting_bloom_filter_add (GtkCountingBloomFilter *self,
                               guint16                 hash)
{
  guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;

  if (self->buckets[bucket] == 255)
    return;

  self->buckets[bucket]++;
}

/*
 * gtk_counting_bloom_filter_remove:
 * @self: a `GtkCountingBloomFilter`
 * @hash: a hash value to remove from the filter
 *
 * Removes a hash value from the filter that has previously
 * been added via gtk_counting_bloom_filter_add().
 **/
static inline void
gtk_counting_bloom_filter_remove (GtkCountingBloomFilter *self,
                                  guint16                 hash)
{
  guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;

  if (self->buckets[bucket] == 255)
    return;

  g_assert (self->buckets[bucket] > 0);

  self->buckets[bucket]--;
}

/*
 * gtk_counting_bloom_filter_may_contain:
 * @self: a `GtkCountingBloomFilter`
 * @hash: the hash value to check
 *
 * Checks if @hash may be contained in @self.
 *
 * A return value of %FALSE means that @hash is definitely not part
 * of @self.
 *
 * A return value of %TRUE means that @hash may or may not have been
 * added to @self. In that case a different method must be used to
 * confirm that @hash is indeed part of the set.
 *
 * Returns: %FALSE if @hash is not part of @self.
 **/
static inline gboolean
gtk_counting_bloom_filter_may_contain (const GtkCountingBloomFilter *self,
                                       guint16                       hash)
{
  guint16 bucket = hash % GTK_COUNTING_BLOOM_FILTER_SIZE;

  return self->buckets[bucket] != 0;
}


G_END_DECLS