iblt.cpp
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * Copyright (c) 2014-2020, The University of Memphis
4  *
5  * This file is part of PSync.
6  * See AUTHORS.md for complete list of PSync authors and contributors.
7  *
8  * PSync is free software: you can redistribute it and/or modify it under the terms
9  * of the GNU Lesser General Public License as published by the Free Software Foundation,
10  * either version 3 of the License, or (at your option) any later version.
11  *
12  * PSync is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14  * PURPOSE. See the GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License along with
17  * PSync, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
18  *
19 
20  * This file incorporates work covered by the following copyright and
21  * permission notice:
22 
23  * The MIT License (MIT)
24 
25  * Copyright (c) 2014 Gavin Andresen
26 
27  * Permission is hereby granted, free of charge, to any person obtaining a copy
28  * of this software and associated documentation files (the "Software"), to deal
29  * in the Software without restriction, including without limitation the rights
30  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31  * copies of the Software, and to permit persons to whom the Software is
32  * furnished to do so, subject to the following conditions:
33 
34  * The above copyright notice and this permission notice shall be included in all
35  * copies or substantial portions of the Software.
36 
37  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43  * SOFTWARE.
44 */
45 
46 #include "PSync/detail/iblt.hpp"
47 #include "PSync/detail/util.hpp"
48 
49 #include <ndn-cxx/util/exception.hpp>
50 
51 namespace psync {
52 namespace detail {
53 
54 namespace be = boost::endian;
55 
56 const size_t N_HASH(3);
57 const size_t N_HASHCHECK(11);
58 
59 bool
61 {
62  if (count == 1 || count == -1) {
63  uint32_t check = murmurHash3(N_HASHCHECK, keySum);
64  return keyCheck == check;
65  }
66 
67  return false;
68 }
69 
70 bool
72 {
73  return count == 0 && keySum == 0 && keyCheck == 0;
74 }
75 
76 IBLT::IBLT(size_t expectedNumEntries, CompressionScheme scheme)
77  : m_compressionScheme(scheme)
78 {
79  // 1.5x expectedNumEntries gives very low probability of decoding failure
80  size_t nEntries = expectedNumEntries + expectedNumEntries / 2;
81  // make nEntries exactly divisible by N_HASH
82  size_t remainder = nEntries % N_HASH;
83  if (remainder != 0) {
84  nEntries += (N_HASH - remainder);
85  }
86 
87  m_hashTable.resize(nEntries);
88 }
89 
90 void
91 IBLT::initialize(const ndn::name::Component& ibltName)
92 {
93  const auto& values = extractValueFromName(ibltName);
94 
95  if (3 * m_hashTable.size() != values.size()) {
96  NDN_THROW(Error("Received IBF cannot be decoded!"));
97  }
98 
99  for (size_t i = 0; i < m_hashTable.size(); i++) {
100  HashTableEntry& entry = m_hashTable.at(i);
101  if (values[i * 3] != 0) {
102  entry.count = values[i * 3];
103  entry.keySum = values[(i * 3) + 1];
104  entry.keyCheck = values[(i * 3) + 2];
105  }
106  }
107 }
108 
109 void
110 IBLT::update(int plusOrMinus, uint32_t key)
111 {
112  size_t bucketsPerHash = m_hashTable.size() / N_HASH;
113 
114  for (size_t i = 0; i < N_HASH; i++) {
115  size_t startEntry = i * bucketsPerHash;
116  uint32_t h = murmurHash3(i, key);
117  HashTableEntry& entry = m_hashTable.at(startEntry + (h % bucketsPerHash));
118  entry.count += plusOrMinus;
119  entry.keySum ^= key;
120  entry.keyCheck ^= murmurHash3(N_HASHCHECK, key);
121  }
122 }
123 
124 void
125 IBLT::insert(uint32_t key)
126 {
127  update(INSERT, key);
128 }
129 
130 void
131 IBLT::erase(uint32_t key)
132 {
133  update(ERASE, key);
134 }
135 
136 bool
137 IBLT::listEntries(std::set<uint32_t>& positive, std::set<uint32_t>& negative) const
138 {
139  IBLT peeled = *this;
140 
141  size_t nErased = 0;
142  do {
143  nErased = 0;
144  for (const auto& entry : peeled.m_hashTable) {
145  if (entry.isPure()) {
146  if (entry.count == 1) {
147  positive.insert(entry.keySum);
148  }
149  else {
150  negative.insert(entry.keySum);
151  }
152  peeled.update(-entry.count, entry.keySum);
153  ++nErased;
154  }
155  }
156  } while (nErased > 0);
157 
158  // If any buckets for one of the hash functions is not empty,
159  // then we didn't peel them all:
160  for (const auto& entry : peeled.m_hashTable) {
161  if (entry.isEmpty() != true) {
162  return false;
163  }
164  }
165 
166  return true;
167 }
168 
169 IBLT
170 IBLT::operator-(const IBLT& other) const
171 {
172  BOOST_ASSERT(m_hashTable.size() == other.m_hashTable.size());
173 
174  IBLT result(*this);
175  for (size_t i = 0; i < m_hashTable.size(); i++) {
176  HashTableEntry& e1 = result.m_hashTable.at(i);
177  const HashTableEntry& e2 = other.m_hashTable.at(i);
178  e1.count -= e2.count;
179  e1.keySum ^= e2.keySum;
180  e1.keyCheck ^= e2.keyCheck;
181  }
182 
183  return result;
184 }
185 
186 void
187 IBLT::appendToName(ndn::Name& name) const
188 {
189  constexpr size_t unitSize = sizeof(m_hashTable[0].count) +
190  sizeof(m_hashTable[0].keySum) +
191  sizeof(m_hashTable[0].keyCheck);
192 
193  size_t tableSize = unitSize * m_hashTable.size();
194  std::vector<uint8_t> table(tableSize);
195 
196  for (size_t i = 0; i < m_hashTable.size(); i++) {
197  uint32_t count = be::native_to_big(static_cast<uint32_t>(m_hashTable[i].count));
198  uint32_t keySum = be::native_to_big(static_cast<uint32_t>(m_hashTable[i].keySum));
199  uint32_t keyCheck = be::native_to_big(static_cast<uint32_t>(m_hashTable[i].keyCheck));
200 
201  std::memcpy(&table[i * unitSize], &count, sizeof(count));
202  std::memcpy(&table[(i * unitSize) + 4], &keySum, sizeof(keySum));
203  std::memcpy(&table[(i * unitSize) + 8], &keyCheck, sizeof(keyCheck));
204  }
205 
206  auto compressed = compress(m_compressionScheme, table.data(), table.size());
207  name.append(ndn::name::Component(std::move(compressed)));
208 }
209 
210 std::vector<uint32_t>
211 IBLT::extractValueFromName(const ndn::name::Component& ibltName) const
212 {
213  auto decompressedBuf = decompress(m_compressionScheme, ibltName.value(), ibltName.value_size());
214 
215  if (decompressedBuf->size() % 4 != 0) {
216  NDN_THROW(Error("Received IBF cannot be decompressed correctly!"));
217  }
218 
219  size_t n = decompressedBuf->size() / 4;
220  std::vector<uint32_t> values(n, 0);
221 
222  for (size_t i = 0; i < n; i++) {
223  uint32_t t;
224  std::memcpy(&t, &(*decompressedBuf)[i * 4], sizeof(t));
225  values[i] = be::big_to_native(t);
226  }
227 
228  return values;
229 }
230 
231 bool
232 operator==(const IBLT& iblt1, const IBLT& iblt2)
233 {
234  auto iblt1HashTable = iblt1.getHashTable();
235  auto iblt2HashTable = iblt2.getHashTable();
236  if (iblt1HashTable.size() != iblt2HashTable.size()) {
237  return false;
238  }
239 
240  size_t N = iblt1HashTable.size();
241 
242  for (size_t i = 0; i < N; i++) {
243  if (iblt1HashTable[i].count != iblt2HashTable[i].count ||
244  iblt1HashTable[i].keySum != iblt2HashTable[i].keySum ||
245  iblt1HashTable[i].keyCheck != iblt2HashTable[i].keyCheck)
246  return false;
247  }
248 
249  return true;
250 }
251 
252 bool
253 operator!=(const IBLT& iblt1, const IBLT& iblt2)
254 {
255  return !(iblt1 == iblt2);
256 }
257 
258 std::ostream&
259 operator<<(std::ostream& os, const IBLT& iblt)
260 {
261  os << "count keySum keyCheckMatch\n";
262  for (const auto& entry : iblt.getHashTable()) {
263  os << entry.count << " " << entry.keySum << " "
264  << ((entry.isEmpty() || murmurHash3(N_HASHCHECK, entry.keySum) == entry.keyCheck) ? "true" : "false")
265  << "\n";
266  }
267  return os;
268 }
269 
270 } // namespace detail
271 } // namespace psync
void appendToName(ndn::Name &name) const
Appends self to name.
Definition: iblt.cpp:187
std::ostream & operator<<(std::ostream &os, const IBLT &iblt)
Definition: iblt.cpp:259
const size_t N_HASH
Invertible Bloom Lookup Table (Invertible Bloom Filter)
Definition: iblt.hpp:81
Definition: common.hpp:33
IBLT(size_t expectedNumEntries, CompressionScheme scheme)
constructor
Definition: iblt.cpp:76
std::shared_ptr< ndn::Buffer > decompress(CompressionScheme scheme, const uint8_t *buffer, size_t bufferSize)
Definition: util.cpp:176
bool operator!=(const IBLT &iblt1, const IBLT &iblt2)
Definition: iblt.cpp:253
void insert(uint32_t key)
Definition: iblt.cpp:125
bool listEntries(std::set< uint32_t > &positive, std::set< uint32_t > &negative) const
List all the entries in the IBLT.
Definition: iblt.cpp:137
bool operator==(const IBLT &iblt1, const IBLT &iblt2)
Definition: iblt.cpp:232
CompressionScheme
Definition: common.hpp:35
std::shared_ptr< ndn::Buffer > compress(CompressionScheme scheme, const uint8_t *buffer, size_t bufferSize)
Definition: util.cpp:120
void initialize(const ndn::name::Component &ibltName)
Populate the hash table using the vector representation of IBLT.
Definition: iblt.cpp:91
std::vector< uint32_t > extractValueFromName(const ndn::name::Component &ibltName) const
Extracts IBLT from name component.
Definition: iblt.cpp:211
void erase(uint32_t key)
Definition: iblt.cpp:131
IBLT operator-(const IBLT &other) const
Definition: iblt.cpp:170
const size_t N_HASHCHECK
uint32_t murmurHash3(const void *key, size_t len, uint32_t seed)
Definition: util.cpp:61
const std::vector< HashTableEntry > & getHashTable() const
Definition: iblt.hpp:130