summaryrefslogtreecommitdiff
path: root/chromium/components/federated_learning/sim_hash.h
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/federated_learning/sim_hash.h')
-rw-r--r--chromium/components/federated_learning/sim_hash.h44
1 files changed, 44 insertions, 0 deletions
diff --git a/chromium/components/federated_learning/sim_hash.h b/chromium/components/federated_learning/sim_hash.h
new file mode 100644
index 00000000000..55030d9d2e3
--- /dev/null
+++ b/chromium/components/federated_learning/sim_hash.h
@@ -0,0 +1,44 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
+#define COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
+
+#include <set>
+#include <unordered_set>
+
+namespace federated_learning {
+
+// A 2^64 bit vector
+class LargeBitVector {
+ public:
+ LargeBitVector();
+ LargeBitVector(const LargeBitVector&);
+ ~LargeBitVector();
+
+ void SetBit(uint64_t pos);
+ const std::set<uint64_t>& PositionsOfSetBits() const;
+
+ private:
+ // Sparse representation of a 2^64 bit vector. Each number in
+ // |positions_of_set_bits_| represents the position of a bit that is being
+ // set.
+ std::set<uint64_t> positions_of_set_bits_;
+};
+
+// Set the two seeds used for generating the random gaussian.
+void SetSeedsForTesting(uint64_t seed1, uint64_t seed2);
+
+// SimHash a 2^64 bit vector to an |output_dimensions| bit number.
+// |output_dimensions| must be greater than 0 and no greater than 64.
+uint64_t SimHashBits(const LargeBitVector& input, size_t output_dimensions);
+
+// SimHash a set of strings to an |output_dimensions| bit number.
+// |output_dimensions| must be greater than 0 and no greater than 64.
+uint64_t SimHashStrings(const std::unordered_set<std::string>& input,
+ size_t output_dimensions);
+
+} // namespace federated_learning
+
+#endif // COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_