iblt.cpp
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * Copyright (c) 2014-2024, The University of Memphis
4  *
5  * This file is part of PSync.
6  * See AUTHORS.md for complete list of PSync authors and contributors.
7  *
8  * PSync is free software: you can redistribute it and/or modify it under the terms
9  * of the GNU Lesser General Public License as published by the Free Software Foundation,
10  * either version 3 of the License, or (at your option) any later version.
11  *
12  * PSync is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14  * PURPOSE. See the GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License along with
17  * PSync, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
18  *
19 
20  * This file incorporates work covered by the following copyright and
21  * permission notice:
22 
23  * The MIT License (MIT)
24 
25  * Copyright (c) 2014 Gavin Andresen
26 
27  * Permission is hereby granted, free of charge, to any person obtaining a copy
28  * of this software and associated documentation files (the "Software"), to deal
29  * in the Software without restriction, including without limitation the rights
30  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31  * copies of the Software, and to permit persons to whom the Software is
32  * furnished to do so, subject to the following conditions:
33 
34  * The above copyright notice and this permission notice shall be included in all
35  * copies or substantial portions of the Software.
36 
37  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43  * SOFTWARE.
44 */
45 
46 #include "PSync/detail/iblt.hpp"
47 #include "PSync/detail/util.hpp"
48 
49 #include <ndn-cxx/util/exception.hpp>
50 
51 namespace psync::detail {
52 
53 namespace be = boost::endian;
54 
55 constexpr size_t ENTRY_SIZE = sizeof(HashTableEntry::count) + sizeof(HashTableEntry::keySum) +
57 
58 bool
60 {
61  if (count == 1 || count == -1) {
62  uint32_t check = murmurHash3(N_HASHCHECK, keySum);
63  return keyCheck == check;
64  }
65 
66  return false;
67 }
68 
69 bool
71 {
72  return count == 0 && keySum == 0 && keyCheck == 0;
73 }
74 
75 IBLT::IBLT(size_t expectedNumEntries, CompressionScheme scheme)
76  : m_compressionScheme(scheme)
77 {
78  // 1.5x expectedNumEntries gives very low probability of decoding failure
79  size_t nEntries = expectedNumEntries + expectedNumEntries / 2;
80  // make nEntries exactly divisible by N_HASH
81  size_t remainder = nEntries % N_HASH;
82  if (remainder != 0) {
83  nEntries += (N_HASH - remainder);
84  }
85 
86  m_hashTable.resize(nEntries);
87 }
88 
89 void
90 IBLT::initialize(const ndn::name::Component& ibltName)
91 {
92  auto decompressed = decompress(m_compressionScheme, ibltName.value_bytes());
93  if (decompressed->size() != ENTRY_SIZE * m_hashTable.size()) {
94  NDN_THROW(Error("Received IBF cannot be decoded!"));
95  }
96 
97  const uint8_t* input = decompressed->data();
98  for (auto& entry : m_hashTable) {
99  entry.count = be::endian_load<int32_t, sizeof(int32_t), be::order::big>(input);
100  input += sizeof(entry.count);
101 
102  entry.keySum = be::endian_load<uint32_t, sizeof(uint32_t), be::order::big>(input);
103  input += sizeof(entry.keySum);
104 
105  entry.keyCheck = be::endian_load<uint32_t, sizeof(uint32_t), be::order::big>(input);
106  input += sizeof(entry.keyCheck);
107  }
108 }
109 
110 static inline void
111 ibltUpdate(std::vector<HashTableEntry>& ht, int32_t plusOrMinus, uint32_t key)
112 {
113  size_t bucketsPerHash = ht.size() / N_HASH;
114 
115  for (size_t i = 0; i < N_HASH; i++) {
116  size_t startEntry = i * bucketsPerHash;
117  uint32_t h = murmurHash3(i, key);
118  HashTableEntry& entry = ht.at(startEntry + (h % bucketsPerHash));
119  entry.count += plusOrMinus;
120  entry.keySum ^= key;
121  entry.keyCheck ^= murmurHash3(N_HASHCHECK, key);
122  }
123 }
124 
125 void
126 IBLT::insert(uint32_t key)
127 {
128  ibltUpdate(m_hashTable, 1, key);
129 }
130 
131 void
132 IBLT::erase(uint32_t key)
133 {
134  ibltUpdate(m_hashTable, -1, key);
135 }
136 
137 void
138 IBLT::appendToName(ndn::Name& name) const
139 {
140  std::vector<uint8_t> buffer(ENTRY_SIZE * m_hashTable.size());
141  uint8_t* output = buffer.data();
142  for (const auto& entry : m_hashTable) {
143  be::endian_store<int32_t, sizeof(int32_t), be::order::big>(output, entry.count);
144  output += sizeof(entry.count);
145 
146  be::endian_store<uint32_t, sizeof(uint32_t), be::order::big>(output, entry.keySum);
147  output += sizeof(entry.keySum);
148 
149  be::endian_store<uint32_t, sizeof(uint32_t), be::order::big>(output, entry.keyCheck);
150  output += sizeof(entry.keyCheck);
151  }
152 
153  auto compressed = compress(m_compressionScheme, buffer);
154  name.append(ndn::name::Component(std::move(compressed)));
155 }
156 
157 std::ostream&
158 operator<<(std::ostream& os, const IBLT& iblt)
159 {
160  os << "count keySum keyCheckMatch\n";
161  for (const auto& entry : iblt.getHashTable()) {
162  os << entry.count << " " << entry.keySum << " "
163  << ((entry.isEmpty() || murmurHash3(N_HASHCHECK, entry.keySum) == entry.keyCheck) ? "true" : "false")
164  << "\n";
165  }
166  return os;
167 }
168 
169 IBLTDiff
170 operator-(const IBLT& lhs, const IBLT& rhs)
171 {
172  const auto& lht = lhs.getHashTable();
173  const auto& rht = rhs.getHashTable();
174  BOOST_ASSERT(lht.size() == rht.size());
175 
176  std::vector<HashTableEntry> peeled(lht.size());
177  std::transform(lht.begin(), lht.end(), rht.begin(), peeled.begin(),
178  [] (const HashTableEntry& lhe, const HashTableEntry& rhe) {
179  HashTableEntry diff;
180  diff.count = lhe.count - rhe.count;
181  diff.keySum = lhe.keySum ^ rhe.keySum;
182  diff.keyCheck = lhe.keyCheck ^ rhe.keyCheck;
183  return diff;
184  }
185  );
186 
187  std::set<uint32_t> positive;
188  std::set<uint32_t> negative;
189 
190  size_t nErased = 0;
191  do {
192  nErased = 0;
193  for (const auto& entry : peeled) {
194  if (entry.isPure()) {
195  if (entry.count == 1) {
196  positive.insert(entry.keySum);
197  }
198  else {
199  negative.insert(entry.keySum);
200  }
201  ibltUpdate(peeled, -entry.count, entry.keySum);
202  ++nErased;
203  }
204  }
205  } while (nErased > 0);
206 
207  // If any buckets for one of the hash functions is not empty,
208  // then we didn't peel them all:
209  bool canDecode = std::all_of(peeled.begin(), peeled.end(),
210  [] (const HashTableEntry& entry) { return entry.isEmpty(); });
211  return {canDecode, std::move(positive), std::move(negative)};
212 }
213 
214 } // namespace psync::detail
Invertible Bloom Lookup Table (Invertible Bloom Filter)
Definition: iblt.hpp:95
IBLT(size_t expectedNumEntries, CompressionScheme scheme)
constructor
Definition: iblt.cpp:75
void insert(uint32_t key)
Definition: iblt.cpp:126
const std::vector< HashTableEntry > & getHashTable() const
Definition: iblt.hpp:128
void initialize(const ndn::name::Component &ibltName)
Populate the hash table using the vector representation of IBLT.
Definition: iblt.cpp:90
void appendToName(ndn::Name &name) const
Appends self to name.
Definition: iblt.cpp:138
void erase(uint32_t key)
Definition: iblt.cpp:132
constexpr size_t N_HASH
Definition: iblt.hpp:86
uint32_t murmurHash3(const void *key, size_t len, uint32_t seed)
Definition: util.cpp:58
constexpr size_t N_HASHCHECK
Definition: iblt.hpp:87
std::ostream & operator<<(std::ostream &os, const IBLT &iblt)
Definition: iblt.cpp:158
IBLTDiff operator-(const IBLT &lhs, const IBLT &rhs)
Compute the difference between two IBLTs.
Definition: iblt.cpp:170
std::shared_ptr< ndn::Buffer > compress(CompressionScheme scheme, ndn::span< const uint8_t > buffer)
Definition: util.cpp:124
std::shared_ptr< ndn::Buffer > decompress(CompressionScheme scheme, ndn::span< const uint8_t > buffer)
Definition: util.cpp:183
constexpr size_t ENTRY_SIZE
Definition: iblt.cpp:55
CompressionScheme
Definition: common.hpp:43