Branch data Line data Source code
# 1 : : #ifndef _MINISKETCH_H_
# 2 : : #define _MINISKETCH_H_ 1
# 3 : :
# 4 : : #include <stdint.h>
# 5 : : #include <stdlib.h>
# 6 : :
# 7 : : #ifdef _MSC_VER
# 8 : : # include <compat.h>
# 9 : : #else
# 10 : : # include <unistd.h>
# 11 : : #endif
# 12 : :
# 13 : : #ifndef MINISKETCH_API
# 14 : : # if defined(_WIN32)
# 15 : : # ifdef MINISKETCH_BUILD
# 16 : : # define MINISKETCH_API __declspec(dllexport)
# 17 : : # else
# 18 : : # define MINISKETCH_API
# 19 : : # endif
# 20 : : # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
# 21 : : # define MINISKETCH_API __attribute__ ((visibility ("default")))
# 22 : : # else
# 23 : : # define MINISKETCH_API
# 24 : : # endif
# 25 : : #endif
# 26 : :
# 27 : : #ifdef __cplusplus
# 28 : : # if __cplusplus >= 201103L
# 29 : : # include <memory>
# 30 : : # include <vector>
# 31 : : # include <cassert>
# 32 : : # if __cplusplus >= 201703L
# 33 : : # include <optional>
# 34 : : # endif // __cplusplus >= 201703L
# 35 : : # endif // __cplusplus >= 201103L
# 36 : : extern "C" {
# 37 : : #endif // __cplusplus
# 38 : :
# 39 : : /** Opaque type for decoded sketches. */
# 40 : : typedef struct minisketch minisketch;
# 41 : :
# 42 : : /** Determine whether support for elements of `bits` bits was compiled in. */
# 43 : : MINISKETCH_API int minisketch_bits_supported(uint32_t bits);
# 44 : :
# 45 : : /** Determine the maximum number of implementations available.
# 46 : : *
# 47 : : * Multiple implementations may be available for a given element size, with
# 48 : : * different performance characteristics on different hardware.
# 49 : : *
# 50 : : * Each implementation is identified by a number from 0 to the output of this
# 51 : : * function call, inclusive. Note that not every combination of implementation
# 52 : : * and element size may exist (see further).
# 53 : : */
# 54 : : MINISKETCH_API uint32_t minisketch_implementation_max(void);
# 55 : :
# 56 : : /** Determine if the a combination of bits and implementation number is available.
# 57 : : *
# 58 : : * Returns 1 if it is, 0 otherwise.
# 59 : : */
# 60 : : MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);
# 61 : :
# 62 : : /** Construct a sketch for a given element size, implementation and capacity.
# 63 : : *
# 64 : : * If the combination of `bits` and `implementation` is unavailable, or when
# 65 : : * OOM occurs, NULL is returned. If minisketch_implementation_supported
# 66 : : * returns 1 for the specified bits and implementation, this will always succeed
# 67 : : * (except when allocation fails).
# 68 : : *
# 69 : : * If the result is not NULL, it must be destroyed using minisketch_destroy.
# 70 : : */
# 71 : : MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);
# 72 : :
# 73 : : /** Get the element size of a sketch in bits. */
# 74 : : MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);
# 75 : :
# 76 : : /** Get the capacity of a sketch. */
# 77 : : MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);
# 78 : :
# 79 : : /** Get the implementation of a sketch. */
# 80 : : MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);
# 81 : :
# 82 : : /** Set the seed for randomizing algorithm choices to a fixed value.
# 83 : : *
# 84 : : * By default, sketches are initialized with a random seed. This is important
# 85 : : * to avoid scenarios where an attacker could force worst-case behavior.
# 86 : : *
# 87 : : * This function initializes the seed to a user-provided value (any 64-bit
# 88 : : * integer is acceptable, regardless of field size).
# 89 : : *
# 90 : : * When seed is -1, a fixed internal value with predictable behavior is
# 91 : : * used. It is only intended for testing.
# 92 : : */
# 93 : : MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);
# 94 : :
# 95 : : /** Clone a sketch.
# 96 : : *
# 97 : : * The result must be destroyed using minisketch_destroy.
# 98 : : */
# 99 : : MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);
# 100 : :
# 101 : : /** Destroy a sketch.
# 102 : : *
# 103 : : * The pointer that was passed in may not be used anymore afterwards.
# 104 : : */
# 105 : : MINISKETCH_API void minisketch_destroy(minisketch* sketch);
# 106 : :
# 107 : : /** Compute the size in bytes for serializing a given sketch. */
# 108 : : MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);
# 109 : :
# 110 : : /** Serialize a sketch to bytes. */
# 111 : : MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);
# 112 : :
# 113 : : /** Deserialize a sketch from bytes. */
# 114 : : MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);
# 115 : :
# 116 : : /** Add an element to a sketch.
# 117 : : *
# 118 : : * If the element to be added is too large for the sketch, the most significant
# 119 : : * bits of the element are dropped. More precisely, if the element size of
# 120 : : * `sketch` is b bits, then this function adds the unsigned integer represented
# 121 : : * by the b least significant bits of `element` to `sketch`.
# 122 : : *
# 123 : : * If the element to be added is 0 (after potentially dropping the most significant
# 124 : : * bits), then this function is a no-op. Sketches cannot contain an element with
# 125 : : * the value 0.
# 126 : : *
# 127 : : * Note that adding the same element a second time removes it again.
# 128 : : */
# 129 : : MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);
# 130 : :
# 131 : : /** Merge the elements of another sketch into this sketch.
# 132 : : *
# 133 : : * After merging, `sketch` will contain every element that existed in one but not
# 134 : : * both of the input sketches. It can be seen as an exclusive or operation on
# 135 : : * the set elements. If the capacity of `other_sketch` is lower than `sketch`'s,
# 136 : : * merging reduces the capacity of `sketch` to that of `other_sketch`.
# 137 : : *
# 138 : : * This function returns the capacity of `sketch` after merging has been performed
# 139 : : * (where this capacity is at least 1), or 0 to indicate that merging has failed because
# 140 : : * the two input sketches differ in their element size or implementation. If 0 is
# 141 : : * returned, `sketch` (and its capacity) have not been modified.
# 142 : : *
# 143 : : * It is also possible to perform this operation directly on the serializations
# 144 : : * of two sketches with the same element size and capacity by performing a bitwise XOR
# 145 : : * of the serializations.
# 146 : : */
# 147 : : MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);
# 148 : :
# 149 : : /** Decode a sketch.
# 150 : : *
# 151 : : * `output` is a pointer to an array of `max_element` uint64_t's, which will be
# 152 : : * filled with the elements in this sketch.
# 153 : : *
# 154 : : * The return value is the number of decoded elements, or -1 if decoding failed.
# 155 : : */
# 156 : : MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);
# 157 : :
# 158 : : /** Compute the capacity needed to achieve a certain rate of false positives.
# 159 : : *
# 160 : : * A sketch with capacity c and no more than c elements can always be decoded
# 161 : : * correctly. However, if it has more than c elements, or contains just random
# 162 : : * bytes, it is possible that it will still decode, but the result will be
# 163 : : * nonsense. This can be counteracted by increasing the capacity slightly.
# 164 : : *
# 165 : : * Given a field size bits, an intended number of elements that can be decoded
# 166 : : * max_elements, and a false positive probability of 1 in 2**fpbits, this
# 167 : : * function computes the necessary capacity. It is only guaranteed to be
# 168 : : * accurate up to fpbits=256.
# 169 : : */
# 170 : : MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);
# 171 : :
# 172 : : /** Compute what max_elements can be decoded for a certain rate of false positives.
# 173 : : *
# 174 : : * This is the inverse operation of minisketch_compute_capacity. It determines,
# 175 : : * given a field size bits, a capacity of a sketch, and an acceptable false
# 176 : : * positive probability of 1 in 2**fpbits, what the maximum allowed
# 177 : : * max_elements value is. If no value of max_elements would give the desired
# 178 : : * false positive probability, 0 is returned.
# 179 : : *
# 180 : : * Note that this is not an exact inverse of minisketch_compute_capacity. For
# 181 : : * example, with bits=32, fpbits=16, and max_elements=8,
# 182 : : * minisketch_compute_capacity will return 9, as capacity 8 would only have a
# 183 : : * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
# 184 : : * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
# 185 : : * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
# 186 : : * capacity=9 will return 9.
# 187 : : */
# 188 : : MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);
# 189 : :
# 190 : : #ifdef __cplusplus
# 191 : : }
# 192 : :
# 193 : : #if __cplusplus >= 201103L
# 194 : : /** Simple RAII C++11 wrapper around the minisketch API. */
# 195 : : class Minisketch
# 196 : : {
# 197 : : struct Deleter
# 198 : : {
# 199 : : void operator()(minisketch* ptr) const
# 200 : 844 : {
# 201 : 844 : minisketch_destroy(ptr);
# 202 : 844 : }
# 203 : : };
# 204 : :
# 205 : : std::unique_ptr<minisketch, Deleter> m_minisketch;
# 206 : :
# 207 : : public:
# 208 : : /** Check whether the library supports fields of the given size. */
# 209 : 0 : static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }
# 210 : :
# 211 : : /** Get the highest supported implementation number. */
# 212 : 2 : static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }
# 213 : :
# 214 : : /** Check whether the library supports fields with a given size and implementation number.
# 215 : : * If a particular field size `bits` is supported, implementation 0 is always supported for it.
# 216 : : * Higher implementation numbers may or may not be available as well, up to MaxImplementation().
# 217 : : */
# 218 : 46 : static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }
# 219 : :
# 220 : : /** Given field size and a maximum number of decodable elements n, compute what capacity c to
# 221 : : * use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
# 222 : : * being decoded incorrectly (and will instead fail when decoding for up to n elements).
# 223 : : *
# 224 : : * See minisketch_compute_capacity for more details. */
# 225 : 0 : static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }
# 226 : :
# 227 : : /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
# 228 : 0 : static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }
# 229 : :
# 230 : : /** Construct a clone of the specified sketch. */
# 231 : : Minisketch(const Minisketch& sketch) noexcept
# 232 : 0 : {
# 233 : 0 : if (sketch.m_minisketch) {
# 234 : 0 : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
# 235 : 0 : }
# 236 : 0 : }
# 237 : :
# 238 : : /** Make this Minisketch a clone of the specified one. */
# 239 : : Minisketch& operator=(const Minisketch& sketch) noexcept
# 240 : 0 : {
# 241 : 0 : if (sketch.m_minisketch) {
# 242 : 0 : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
# 243 : 0 : }
# 244 : 0 : return *this;
# 245 : 0 : }
# 246 : :
# 247 : : /** Check whether this Minisketch object is valid. */
# 248 : 0 : explicit operator bool() const noexcept { return bool{m_minisketch}; }
# 249 : :
# 250 : : /** Construct an (invalid) Minisketch object. */
# 251 : : Minisketch() noexcept = default;
# 252 : :
# 253 : : /** Move constructor. */
# 254 : 200 : Minisketch(Minisketch&&) noexcept = default;
# 255 : :
# 256 : : /** Move assignment. */
# 257 : : Minisketch& operator=(Minisketch&&) noexcept = default;
# 258 : :
# 259 : : /** Construct a Minisketch object with the specified parameters.
# 260 : : *
# 261 : : * If bits is not BitsSupported(), or the combination of bits and capacity is not
# 262 : : * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
# 263 : : * object will be constructed. Use operator bool() to check that this isn't the
# 264 : : * case before performing any other operations. */
# 265 : : Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
# 266 : 844 : {
# 267 : 844 : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
# 268 : 844 : }
# 269 : :
# 270 : : /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
# 271 : : * It may construct an invalid object, which you may need to check for. */
# 272 : : static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
# 273 : 0 : {
# 274 : 0 : return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
# 275 : 0 : }
# 276 : :
# 277 : : /** Return the field size for a (valid) Minisketch object. */
# 278 : 0 : uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }
# 279 : :
# 280 : : /** Return the capacity for a (valid) Minisketch object. */
# 281 : 0 : size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }
# 282 : :
# 283 : : /** Return the implementation number for a (valid) Minisketch object. */
# 284 : 0 : uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }
# 285 : :
# 286 : : /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
# 287 : : Minisketch& SetSeed(uint64_t seed) noexcept
# 288 : 0 : {
# 289 : 0 : minisketch_set_seed(m_minisketch.get(), seed);
# 290 : 0 : return *this;
# 291 : 0 : }
# 292 : :
# 293 : : /** Add (or remove, if already present) an element to a (valid) Minisketch object.
# 294 : : * See minisketch_add_uint64(). */
# 295 : : Minisketch& Add(uint64_t element) noexcept
# 296 : 1957072 : {
# 297 : 1957072 : minisketch_add_uint64(m_minisketch.get(), element);
# 298 : 1957072 : return *this;
# 299 : 1957072 : }
# 300 : :
# 301 : : /** Merge sketch into *this; both have to be valid Minisketch objects.
# 302 : : * See minisketch_merge for details. */
# 303 : : Minisketch& Merge(const Minisketch& sketch) noexcept
# 304 : 200 : {
# 305 : 200 : minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
# 306 : 200 : return *this;
# 307 : 200 : }
# 308 : :
# 309 : : /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
# 310 : : * vector's size permits. */
# 311 : : bool Decode(std::vector<uint64_t>& result) const
# 312 : 0 : {
# 313 : 0 : ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
# 314 : 0 : if (ret == -1) return false;
# 315 : 0 : result.resize(ret);
# 316 : 0 : return true;
# 317 : 0 : }
# 318 : :
# 319 : : /** Get the serialized size in bytes for this (valid) Minisketch object.. */
# 320 : 800 : size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }
# 321 : :
# 322 : : /** Serialize this (valid) Minisketch object as a byte vector. */
# 323 : : std::vector<unsigned char> Serialize() const
# 324 : 400 : {
# 325 : 400 : std::vector<unsigned char> result(GetSerializedSize());
# 326 : 400 : minisketch_serialize(m_minisketch.get(), result.data());
# 327 : 400 : return result;
# 328 : 400 : }
# 329 : :
# 330 : : /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
# 331 : : * and size() members). */
# 332 : : template<typename T>
# 333 : : Minisketch& Deserialize(
# 334 : : const T& obj,
# 335 : : typename std::enable_if<
# 336 : : std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
# 337 : : std::is_convertible<decltype(obj.size()), std::size_t>::value,
# 338 : : std::nullptr_t
# 339 : : >::type = nullptr) noexcept
# 340 : 400 : {
# 341 : 400 : assert(GetSerializedSize() == obj.size());
# 342 : 0 : minisketch_deserialize(m_minisketch.get(), obj.data());
# 343 : 400 : return *this;
# 344 : 400 : }
# 345 : :
# 346 : : #if __cplusplus >= 201703L
# 347 : : /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
# 348 : : std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
# 349 : 244 : {
# 350 : 244 : std::vector<uint64_t> result(max_elements);
# 351 : 244 : ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
# 352 [ - + ]: 244 : if (ret == -1) return {};
# 353 : 244 : result.resize(ret);
# 354 : 244 : return result;
# 355 : 244 : }
# 356 : :
# 357 : : /** C++17 only: similar to Decode(), but with specified false positive probability. */
# 358 : : std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
# 359 : 0 : {
# 360 : 0 : return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
# 361 : 0 : }
# 362 : : #endif // __cplusplus >= 201703L
# 363 : : };
# 364 : : #endif // __cplusplus >= 201103L
# 365 : : #endif // __cplusplus
# 366 : :
# 367 : : #endif // _MINISKETCH_H_
|