iblt.cpp
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * Copyright (c) 2014-2019, The University of Memphis
4  *
5  * This file is part of PSync.
6  * See AUTHORS.md for complete list of PSync authors and contributors.
7  *
8  * PSync is free software: you can redistribute it and/or modify it under the terms
9  * of the GNU Lesser General Public License as published by the Free Software Foundation,
10  * either version 3 of the License, or (at your option) any later version.
11  *
12  * PSync is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14  * PURPOSE. See the GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License along with
17  * PSync, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
18  *
19 
20  * This file incorporates work covered by the following copyright and
21  * permission notice:
22 
23  * The MIT License (MIT)
24 
25  * Copyright (c) 2014 Gavin Andresen
26 
27  * Permission is hereby granted, free of charge, to any person obtaining a copy
28  * of this software and associated documentation files (the "Software"), to deal
29  * in the Software without restriction, including without limitation the rights
30  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31  * copies of the Software, and to permit persons to whom the Software is
32  * furnished to do so, subject to the following conditions:
33 
34  * The above copyright notice and this permission notice shall be included in all
35  * copies or substantial portions of the Software.
36 
37  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43  * SOFTWARE.
44 */
45 
46 #include "PSync/detail/iblt.hpp"
47 #include "PSync/detail/util.hpp"
48 
49 #include <boost/iostreams/device/array.hpp>
50 #include <boost/iostreams/filtering_stream.hpp>
51 #include <boost/iostreams/filter/zlib.hpp>
52 #include <boost/iostreams/copy.hpp>
53 
54 namespace psync {
55 
56 namespace bio = boost::iostreams;
57 
58 const size_t N_HASH(3);
59 const size_t N_HASHCHECK(11);
60 
61 bool
63 {
64  if (count == 1 || count == -1) {
65  uint32_t check = murmurHash3(N_HASHCHECK, keySum);
66  return keyCheck == check;
67  }
68 
69  return false;
70 }
71 
72 bool
74 {
75  return count == 0 && keySum == 0 && keyCheck == 0;
76 }
77 
78 IBLT::IBLT(size_t expectedNumEntries)
79 {
80  // 1.5x expectedNumEntries gives very low probability of decoding failure
81  size_t nEntries = expectedNumEntries + expectedNumEntries / 2;
82  // make nEntries exactly divisible by N_HASH
83  size_t remainder = nEntries % N_HASH;
84  if (remainder != 0) {
85  nEntries += (N_HASH - remainder);
86  }
87 
88  m_hashTable.resize(nEntries);
89 }
90 
91 void
92 IBLT::initialize(const ndn::name::Component& ibltName)
93 {
94  const auto& values = extractValueFromName(ibltName);
95 
96  if (3 * m_hashTable.size() != values.size()) {
97  BOOST_THROW_EXCEPTION(Error("Received IBF cannot be decoded!"));
98  }
99 
100  for (size_t i = 0; i < m_hashTable.size(); i++) {
101  HashTableEntry& entry = m_hashTable.at(i);
102  if (values[i * 3] != 0) {
103  entry.count = values[i * 3];
104  entry.keySum = values[(i * 3) + 1];
105  entry.keyCheck = values[(i * 3) + 2];
106  }
107  }
108 }
109 
110 void
111 IBLT::update(int plusOrMinus, uint32_t key)
112 {
113  size_t bucketsPerHash = m_hashTable.size() / N_HASH;
114 
115  for (size_t i = 0; i < N_HASH; i++) {
116  size_t startEntry = i * bucketsPerHash;
117  uint32_t h = murmurHash3(i, key);
118  HashTableEntry& entry = m_hashTable.at(startEntry + (h % bucketsPerHash));
119  entry.count += plusOrMinus;
120  entry.keySum ^= key;
121  entry.keyCheck ^= murmurHash3(N_HASHCHECK, key);
122  }
123 }
124 
125 void
126 IBLT::insert(uint32_t key)
127 {
128  update(INSERT, key);
129 }
130 
131 void
132 IBLT::erase(uint32_t key)
133 {
134  update(ERASE, key);
135 }
136 
137 bool
138 IBLT::listEntries(std::set<uint32_t>& positive, std::set<uint32_t>& negative) const
139 {
140  IBLT peeled = *this;
141 
142  size_t nErased = 0;
143  do {
144  nErased = 0;
145  for (const auto& entry : peeled.m_hashTable) {
146  if (entry.isPure()) {
147  if (entry.count == 1) {
148  positive.insert(entry.keySum);
149  }
150  else {
151  negative.insert(entry.keySum);
152  }
153  peeled.update(-entry.count, entry.keySum);
154  ++nErased;
155  }
156  }
157  } while (nErased > 0);
158 
159  // If any buckets for one of the hash functions is not empty,
160  // then we didn't peel them all:
161  for (const auto& entry : peeled.m_hashTable) {
162  if (entry.isEmpty() != true) {
163  return false;
164  }
165  }
166 
167  return true;
168 }
169 
170 IBLT
171 IBLT::operator-(const IBLT& other) const
172 {
173  BOOST_ASSERT(m_hashTable.size() == other.m_hashTable.size());
174 
175  IBLT result(*this);
176  for (size_t i = 0; i < m_hashTable.size(); i++) {
177  HashTableEntry& e1 = result.m_hashTable.at(i);
178  const HashTableEntry& e2 = other.m_hashTable.at(i);
179  e1.count -= e2.count;
180  e1.keySum ^= e2.keySum;
181  e1.keyCheck ^= e2.keyCheck;
182  }
183 
184  return result;
185 }
186 
187 bool
188 operator==(const IBLT& iblt1, const IBLT& iblt2)
189 {
190  auto iblt1HashTable = iblt1.getHashTable();
191  auto iblt2HashTable = iblt2.getHashTable();
192  if (iblt1HashTable.size() != iblt2HashTable.size()) {
193  return false;
194  }
195 
196  size_t N = iblt1HashTable.size();
197 
198  for (size_t i = 0; i < N; i++) {
199  if (iblt1HashTable[i].count != iblt2HashTable[i].count ||
200  iblt1HashTable[i].keySum != iblt2HashTable[i].keySum ||
201  iblt1HashTable[i].keyCheck != iblt2HashTable[i].keyCheck)
202  return false;
203  }
204 
205  return true;
206 }
207 
208 bool
209 operator!=(const IBLT& iblt1, const IBLT& iblt2)
210 {
211  return !(iblt1 == iblt2);
212 }
213 
214 std::ostream&
215 operator<<(std::ostream& out, const IBLT& iblt)
216 {
217  out << "count keySum keyCheckMatch\n";
218  for (const auto& entry : iblt.getHashTable()) {
219  out << entry.count << " " << entry.keySum << " ";
220  out << ((murmurHash3(N_HASHCHECK, entry.keySum) == entry.keyCheck) ||
221  (entry.isEmpty())? "true" : "false");
222  out << "\n";
223  }
224 
225  return out;
226 }
227 
228 void
229 IBLT::appendToName(ndn::Name& name) const
230 {
231  size_t n = m_hashTable.size();
232  size_t unitSize = (32 * 3) / 8; // hard coding
233  size_t tableSize = unitSize * n;
234 
235  std::vector<char> table(tableSize);
236 
237  for (size_t i = 0; i < n; i++) {
238  // table[i*12], table[i*12+1], table[i*12+2], table[i*12+3] --> hashTable[i].count
239 
240  table[(i * unitSize)] = 0xFF & m_hashTable[i].count;
241  table[(i * unitSize) + 1] = 0xFF & (m_hashTable[i].count >> 8);
242  table[(i * unitSize) + 2] = 0xFF & (m_hashTable[i].count >> 16);
243  table[(i * unitSize) + 3] = 0xFF & (m_hashTable[i].count >> 24);
244 
245  // table[i*12+4], table[i*12+5], table[i*12+6], table[i*12+7] --> hashTable[i].keySum
246 
247  table[(i * unitSize) + 4] = 0xFF & m_hashTable[i].keySum;
248  table[(i * unitSize) + 5] = 0xFF & (m_hashTable[i].keySum >> 8);
249  table[(i * unitSize) + 6] = 0xFF & (m_hashTable[i].keySum >> 16);
250  table[(i * unitSize) + 7] = 0xFF & (m_hashTable[i].keySum >> 24);
251 
252  // table[i*12+8], table[i*12+9], table[i*12+10], table[i*12+11] --> hashTable[i].keyCheck
253 
254  table[(i * unitSize) + 8] = 0xFF & m_hashTable[i].keyCheck;
255  table[(i * unitSize) + 9] = 0xFF & (m_hashTable[i].keyCheck >> 8);
256  table[(i * unitSize) + 10] = 0xFF & (m_hashTable[i].keyCheck >> 16);
257  table[(i * unitSize) + 11] = 0xFF & (m_hashTable[i].keyCheck >> 24);
258  }
259 
260  bio::filtering_streambuf<bio::input> in;
261  in.push(bio::zlib_compressor());
262  in.push(bio::array_source(table.data(), table.size()));
263 
264  std::stringstream sstream;
265  bio::copy(in, sstream);
266 
267  std::string compressedIBF = sstream.str();
268  name.append(compressedIBF.begin(), compressedIBF.end());
269 }
270 
271 std::vector<uint32_t>
272 IBLT::extractValueFromName(const ndn::name::Component& ibltName) const
273 {
274  std::string compressed(ibltName.value_begin(), ibltName.value_end());
275 
276  bio::filtering_streambuf<bio::input> in;
277  in.push(bio::zlib_decompressor());
278  in.push(bio::array_source(compressed.data(), compressed.size()));
279 
280  std::stringstream sstream;
281  bio::copy(in, sstream);
282  std::string ibltStr = sstream.str();
283 
284  std::vector<uint8_t> ibltValues(ibltStr.begin(), ibltStr.end());
285  size_t n = ibltValues.size() / 4;
286 
287  std::vector<uint32_t> values(n, 0);
288 
289  for (size_t i = 0; i < 4 * n; i += 4) {
290  uint32_t t = (ibltValues[i + 3] << 24) +
291  (ibltValues[i + 2] << 16) +
292  (ibltValues[i + 1] << 8) +
293  ibltValues[i];
294  values[i / 4] = t;
295  }
296 
297  return values;
298 }
299 
300 } // namespace psync
uint32_t keyCheck
Definition: iblt.hpp:63
void initialize(const ndn::name::Component &ibltName)
Populate the hash table using the vector representation of IBLT.
Definition: iblt.cpp:92
bool listEntries(std::set< uint32_t > &positive, std::set< uint32_t > &negative) const
List all the entries in the IBLT.
Definition: iblt.cpp:138
std::vector< HashTableEntry > getHashTable() const
Definition: iblt.hpp:129
IBLT operator-(const IBLT &other) const
Definition: iblt.cpp:171
uint32_t murmurHash3(uint32_t nHashSeed, const std::vector< unsigned char > &vDataToHash)
Definition: util.cpp:37
const size_t N_HASH
Invertible Bloom Lookup Table (Invertible Bloom Filter)
Definition: iblt.hpp:80
const size_t N_HASHCHECK
bool operator==(const BloomFilter &bf1, const BloomFilter &bf2)
uint32_t keySum
Definition: iblt.hpp:62
bool operator!=(const IBLT &iblt1, const IBLT &iblt2)
Definition: iblt.cpp:209
void erase(uint32_t key)
Definition: iblt.cpp:132
void insert(uint32_t key)
Definition: iblt.cpp:126
std::vector< uint32_t > extractValueFromName(const ndn::name::Component &ibltName) const
Extracts IBLT from name component.
Definition: iblt.cpp:272
bool isEmpty() const
Definition: iblt.cpp:73
bool isPure() const
Definition: iblt.cpp:62
std::ostream & operator<<(std::ostream &out, const BloomFilter &bf)
IBLT(size_t expectedNumEntries)
constructor
Definition: iblt.cpp:78
void appendToName(ndn::Name &name) const
Appends self to name.
Definition: iblt.cpp:229