From 3d7a9da1700a584ff2d1a3b252f084e0de7d0c82 Mon Sep 17 00:00:00 2001 From: Alex Potsides Date: Tue, 30 Apr 2024 07:27:43 +0100 Subject: [PATCH] feat: add cuckoo filter (#2510) Add a filter type that lets us efficiently remove items as well as add them. It would be better to use the `bloom-filters` module at this point but it adds 50KB+ to browser bundles for very simple use cases so it's not suitable. We can revisit if https://github.com/Callidon/bloom-filters/issues/70 is ever resolved. --- packages/peer-collections/src/filter.ts | 11 +- packages/peer-collections/test/filter.spec.ts | 4 + packages/peer-collections/tsconfig.json | 3 + packages/utils/package.json | 9 +- .../utils/src/{ => filters}/bloom-filter.ts | 27 +-- packages/utils/src/filters/bucket.ts | 64 ++++++ packages/utils/src/filters/cuckoo-filter.ts | 197 ++++++++++++++++++ packages/utils/src/filters/fingerprint.ts | 44 ++++ packages/utils/src/filters/hashes.ts | 38 ++++ packages/utils/src/filters/index.ts | 9 + .../src/filters/scalable-cuckoo-filter.ts | 111 ++++++++++ packages/utils/src/filters/utils.ts | 3 + .../test/{ => filter}/bloom-filter.spec.ts | 34 +-- .../utils/test/filter/cuckoo-filter.spec.ts | 53 +++++ .../filter/scalable-cuckoo-filter.spec.ts | 55 +++++ 15 files changed, 626 insertions(+), 36 deletions(-) rename packages/utils/src/{ => filters}/bloom-filter.ts (83%) create mode 100644 packages/utils/src/filters/bucket.ts create mode 100644 packages/utils/src/filters/cuckoo-filter.ts create mode 100644 packages/utils/src/filters/fingerprint.ts create mode 100644 packages/utils/src/filters/hashes.ts create mode 100644 packages/utils/src/filters/index.ts create mode 100644 packages/utils/src/filters/scalable-cuckoo-filter.ts create mode 100644 packages/utils/src/filters/utils.ts rename packages/utils/test/{ => filter}/bloom-filter.spec.ts (84%) create mode 100644 packages/utils/test/filter/cuckoo-filter.spec.ts create mode 100644 packages/utils/test/filter/scalable-cuckoo-filter.spec.ts diff --git a/packages/peer-collections/src/filter.ts b/packages/peer-collections/src/filter.ts index 59349cbc03..38e9c3accc 100644 --- a/packages/peer-collections/src/filter.ts +++ b/packages/peer-collections/src/filter.ts @@ -1,15 +1,16 @@ -import { BloomFilter } from '@libp2p/utils/bloom-filter' +import { createScalableCuckooFilter } from '@libp2p/utils/filters' import type { PeerId } from '@libp2p/interface' +import type { Filter } from '@libp2p/utils/filters' /** * Uses a Bloom filter to implement a mechansim for deduplicating PeerIds in a * way that uses a fixed amount of memory. */ export class PeerFilter { - private readonly filter: BloomFilter + private readonly filter: Filter constructor (size: number, errorRate?: number) { - this.filter = BloomFilter.create(size, errorRate) + this.filter = createScalableCuckooFilter(size, errorRate) } has (peerId: PeerId): boolean { @@ -19,6 +20,10 @@ export class PeerFilter { add (peerId: PeerId): void { this.filter.add(peerId.toBytes()) } + + remove (peerId: PeerId): void { + this.filter.remove?.(peerId.toBytes()) + } } export function peerFilter (size: number): PeerFilter { diff --git a/packages/peer-collections/test/filter.spec.ts b/packages/peer-collections/test/filter.spec.ts index c30a6ba007..e60acf1fae 100644 --- a/packages/peer-collections/test/filter.spec.ts +++ b/packages/peer-collections/test/filter.spec.ts @@ -12,5 +12,9 @@ describe('peer-filter', () => { filter.add(peer) expect(filter.has(peer)).to.be.true() + + filter.remove(peer) + + expect(filter.has(peer)).to.be.false() }) }) diff --git a/packages/peer-collections/tsconfig.json b/packages/peer-collections/tsconfig.json index 4795efb1d1..b2d1cb966d 100644 --- a/packages/peer-collections/tsconfig.json +++ b/packages/peer-collections/tsconfig.json @@ -16,6 +16,9 @@ }, { "path": "../peer-id-factory" + }, + { + "path": "../utils" } ] } diff --git a/packages/utils/package.json b/packages/utils/package.json index 9490980d8f..eb39c44bdc 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -56,14 +56,14 @@ "types": "./dist/src/array-equals.d.ts", "import": "./dist/src/array-equals.js" }, - "./bloom-filter": { - "types": "./dist/src/bloom-filter.d.ts", - "import": "./dist/src/bloom-filter.js" - }, "./close-source": { "types": "./dist/src/close-source.d.ts", "import": "./dist/src/close-source.js" }, + "./filters": { + "types": "./dist/src/filters/index.d.ts", + "import": "./dist/src/filters/index.js" + }, "./ip-port-to-multiaddr": { "types": "./dist/src/ip-port-to-multiaddr.d.ts", "import": "./dist/src/ip-port-to-multiaddr.js" @@ -137,6 +137,7 @@ "@libp2p/logger": "^4.0.11", "@multiformats/multiaddr": "^12.2.1", "@multiformats/multiaddr-matcher": "^1.2.0", + "@sindresorhus/fnv1a": "^3.1.0", "@types/murmurhash3js-revisited": "^3.0.3", "delay": "^6.0.0", "get-iterator": "^2.0.1", diff --git a/packages/utils/src/bloom-filter.ts b/packages/utils/src/filters/bloom-filter.ts similarity index 83% rename from packages/utils/src/bloom-filter.ts rename to packages/utils/src/filters/bloom-filter.ts index 308cbfc0ac..a41bfd59bb 100644 --- a/packages/utils/src/bloom-filter.ts +++ b/packages/utils/src/filters/bloom-filter.ts @@ -4,6 +4,7 @@ import mur from 'murmurhash3js-revisited' import { Uint8ArrayList } from 'uint8arraylist' import { alloc } from 'uint8arrays/alloc' import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' +import type { Filter } from './index.js' const LN2_SQUARED = Math.LN2 * Math.LN2 @@ -13,16 +14,7 @@ export interface BloomFilterOptions { bits?: number } -export class BloomFilter { - /** - * Create a `BloomFilter` with the smallest `bits` and `hashes` value for the - * specified item count and error rate. - */ - static create (itemcount: number, errorRate: number = 0.005): BloomFilter { - const opts = optimize(itemcount, errorRate) - return new BloomFilter(opts) - } - +export class BloomFilter implements Filter { public readonly seeds: number[] public readonly bits: number public buffer: Uint8Array @@ -111,9 +103,18 @@ export class BloomFilter { } } -function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } { - const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED) - const hashes = Math.round((bits / itemcount) * Math.LN2) +/** + * Create a `BloomFilter` with the smallest `bits` and `hashes` value for the + * specified item count and error rate. + */ +export function createBloomFilter (itemcount: number, errorRate: number = 0.005): Filter { + const opts = optimize(itemcount, errorRate) + return new BloomFilter(opts) +} + +function optimize (itemCount: number, errorRate: number = 0.005): { bits: number, hashes: number } { + const bits = Math.round(-1 * itemCount * Math.log(errorRate) / LN2_SQUARED) + const hashes = Math.round((bits / itemCount) * Math.LN2) return { bits, hashes } } diff --git a/packages/utils/src/filters/bucket.ts b/packages/utils/src/filters/bucket.ts new file mode 100644 index 0000000000..5462c13943 --- /dev/null +++ b/packages/utils/src/filters/bucket.ts @@ -0,0 +1,64 @@ +import { Fingerprint } from './fingerprint.js' +import { getRandomInt } from './utils.js' + +export class Bucket { + private readonly contents: Array + + constructor (size: number) { + this.contents = new Array(size).fill(null) + } + + has (fingerprint: Fingerprint): boolean { + if (!(fingerprint instanceof Fingerprint)) { + throw new TypeError('Invalid Fingerprint') + } + + return this.contents.some((fp) => { + return fingerprint.equals(fp) + }) + } + + add (fingerprint: Fingerprint): boolean { + if (!(fingerprint instanceof Fingerprint)) { + throw new TypeError('Invalid Fingerprint') + } + + for (let i = 0; i < this.contents.length; i++) { + if (this.contents[i] == null) { + this.contents[i] = fingerprint + return true + } + } + + return true + } + + swap (fingerprint: Fingerprint): Fingerprint | null { + if (!(fingerprint instanceof Fingerprint)) { + throw new TypeError('Invalid Fingerprint') + } + + const i = getRandomInt(0, this.contents.length - 1) + const current = this.contents[i] + this.contents[i] = fingerprint + + return current + } + + remove (fingerprint: Fingerprint): boolean { + if (!(fingerprint instanceof Fingerprint)) { + throw new TypeError('Invalid Fingerprint') + } + + const found = this.contents.findIndex((fp) => { + return fingerprint.equals(fp) + }) + + if (found > -1) { + this.contents[found] = null + return true + } else { + return false + } + } +} diff --git a/packages/utils/src/filters/cuckoo-filter.ts b/packages/utils/src/filters/cuckoo-filter.ts new file mode 100644 index 0000000000..7b6d4e26de --- /dev/null +++ b/packages/utils/src/filters/cuckoo-filter.ts @@ -0,0 +1,197 @@ +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' +import { Bucket } from './bucket.js' +import { Fingerprint, MAX_FINGERPRINT_SIZE } from './fingerprint.js' +import { fnv1a, type Hash } from './hashes.js' +import { getRandomInt } from './utils.js' +import type { Filter } from './index.js' + +const maxCuckooCount = 500 + +export interface CuckooFilterInit { + /** + * How many items the filter is expected to contain + */ + filterSize: number + + /** + * How many items to put in each bucket + */ + bucketSize?: number + + /** + * How many bytes the fingerprint is expected to be + */ + fingerprintSize?: number + + /** + * A non-cryptographic hash implementation + */ + hash?: Hash + + /** + * A number used to seed the hash + */ + seed?: number +} + +export class CuckooFilter implements Filter { + private readonly bucketSize: number + private readonly filterSize: number + private readonly fingerprintSize: number + private readonly buckets: Bucket[] + public count: number + private readonly hash: Hash + private readonly seed: number + + constructor (init: CuckooFilterInit) { + this.filterSize = init.filterSize + this.bucketSize = init.bucketSize ?? 4 + this.fingerprintSize = init.fingerprintSize ?? 2 + this.count = 0 + this.buckets = [] + this.hash = init.hash ?? fnv1a + this.seed = init.seed ?? getRandomInt(0, Math.pow(2, 10)) + } + + add (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) + const j = this.hash.hash(item, this.seed) % this.filterSize + const k = (j ^ fingerprint.hash()) % this.filterSize + + if (this.buckets[j] == null) { + this.buckets[j] = new Bucket(this.bucketSize) + } + + if (this.buckets[k] == null) { + this.buckets[k] = new Bucket(this.bucketSize) + } + + if (this.buckets[j].add(fingerprint) || this.buckets[k].add(fingerprint)) { + this.count++ + return true + } + + const rand = [j, k] + let i = rand[getRandomInt(0, rand.length - 1)] + + if (this.buckets[i] == null) { + this.buckets[i] = new Bucket(this.bucketSize) + } + + for (let n = 0; n < maxCuckooCount; n++) { + const swapped = this.buckets[i].swap(fingerprint) + + if (swapped == null) { + continue + } + + i = (i ^ swapped.hash()) % this.filterSize + + if (this.buckets[i] == null) { + this.buckets[i] = new Bucket(this.bucketSize) + } + + if (this.buckets[i].add(swapped)) { + this.count++ + + return true + } else { + continue + } + } + + return false + } + + has (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) + const j = this.hash.hash(item, this.seed) % this.filterSize + const inJ = this.buckets[j]?.has(fingerprint) ?? false + + if (inJ) { + return inJ + } + + const k = (j ^ fingerprint.hash()) % this.filterSize + + return this.buckets[k]?.has(fingerprint) ?? false + } + + remove (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) + const j = this.hash.hash(item, this.seed) % this.filterSize + const inJ = this.buckets[j]?.remove(fingerprint) ?? false + + if (inJ) { + this.count-- + return inJ + } + + const k = (j ^ fingerprint.hash()) % this.filterSize + const inK = this.buckets[k]?.remove(fingerprint) ?? false + + if (inK) { + this.count-- + } + + return inK + } + + get reliable (): boolean { + return Math.floor(100 * (this.count / this.filterSize)) <= 95 + } +} + +// max load constants, defined in the cuckoo paper +const MAX_LOAD = { + 1: 0.5, + 2: 0.84, + 4: 0.95, + 8: 0.98 +} + +function calculateBucketSize (errorRate: number = 0.001): 2 | 4 | 8 { + if (errorRate > 0.002) { + return 2 + } + + if (errorRate > 0.00001) { + return 4 + } + + return 8 +} + +export function optimize (maxItems: number, errorRate: number = 0.001): CuckooFilterInit { + // https://www.eecs.harvard.edu/~michaelm/postscripts/cuckoo-conext2014.pdf + // Section 5.1 Optimal Bucket Size + const bucketSize = calculateBucketSize(errorRate) + const load = MAX_LOAD[bucketSize] + + // https://stackoverflow.com/questions/57555236/how-to-size-a-cuckoo-filter/57617208#57617208 + const filterSize = Math.round(maxItems / load) + const fingerprintSize = Math.min(Math.ceil(Math.log(filterSize / bucketSize)) + 2, MAX_FINGERPRINT_SIZE) + + return { + filterSize, + bucketSize, + fingerprintSize + } +} + +export function createCuckooFilter (maxItems: number, errorRate: number = 0.005): Filter { + const opts = optimize(maxItems, errorRate) + return new CuckooFilter(opts) +} diff --git a/packages/utils/src/filters/fingerprint.ts b/packages/utils/src/filters/fingerprint.ts new file mode 100644 index 0000000000..f34693de35 --- /dev/null +++ b/packages/utils/src/filters/fingerprint.ts @@ -0,0 +1,44 @@ +import { alloc as uint8ArrayAlloc } from 'uint8arrays/alloc' +import { equals as uint8ArrayEquals } from 'uint8arrays/equals' +import type { Hash } from './hashes' + +export const MAX_FINGERPRINT_SIZE = 64 + +export class Fingerprint { + private readonly fp: Uint8Array + private readonly h: Hash + private readonly seed: number + + constructor (buf: Uint8Array, hash: Hash, seed: number, fingerprintSize: number = 2) { + if (fingerprintSize > MAX_FINGERPRINT_SIZE) { + throw new TypeError('Invalid Fingerprint Size') + } + + const fnv = hash.hashV(buf, seed) + const fp = uint8ArrayAlloc(fingerprintSize) + + for (let i = 0; i < fp.length; i++) { + fp[i] = fnv[i] + } + + if (fp.length === 0) { + fp[0] = 7 + } + + this.fp = fp + this.h = hash + this.seed = seed + } + + hash (): number { + return this.h.hash(this.fp, this.seed) + } + + equals (other?: any): boolean { + if (!(other?.fp instanceof Uint8Array)) { + return false + } + + return uint8ArrayEquals(this.fp, other.fp) + } +} diff --git a/packages/utils/src/filters/hashes.ts b/packages/utils/src/filters/hashes.ts new file mode 100644 index 0000000000..489f841e6e --- /dev/null +++ b/packages/utils/src/filters/hashes.ts @@ -0,0 +1,38 @@ +import fnv1aHash from '@sindresorhus/fnv1a' +import mur from 'murmurhash3js-revisited' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' + +export interface Hash { + hash(input: Uint8Array, seed: number): number + hashV(input: Uint8Array, seed: number): Uint8Array +} + +export const murmur3: Hash = { + hash: (input, seed) => { + return mur.x86.hash32(input, seed) + }, + hashV: (input, seed) => { + return numberToBuffer(murmur3.hash(input, seed)) + } +} + +export const fnv1a: Hash = { + hash: (input) => { + return Number(fnv1aHash(input, { + size: 32 + })) + }, + hashV: (input, seed) => { + return numberToBuffer(fnv1a.hash(input, seed)) + } +} + +export function numberToBuffer (num: bigint | number): Uint8Array { + let hex = num.toString(16) + + if (hex.length % 2 === 1) { + hex = `0${hex}` + } + + return uint8ArrayFromString(hex, 'base16') +} diff --git a/packages/utils/src/filters/index.ts b/packages/utils/src/filters/index.ts new file mode 100644 index 0000000000..6f13a9aa1f --- /dev/null +++ b/packages/utils/src/filters/index.ts @@ -0,0 +1,9 @@ +export { BloomFilter, createBloomFilter, type BloomFilterOptions } from './bloom-filter.js' +export { CuckooFilter, createCuckooFilter, type CuckooFilterInit } from './cuckoo-filter.js' +export { ScalableCuckooFilter, createScalableCuckooFilter, type ScalableCuckooFilterInit } from './scalable-cuckoo-filter.js' + +export interface Filter { + add(item: Uint8Array | string): void + has(item: Uint8Array | string): boolean + remove?(buf: Uint8Array | string): boolean +} diff --git a/packages/utils/src/filters/scalable-cuckoo-filter.ts b/packages/utils/src/filters/scalable-cuckoo-filter.ts new file mode 100644 index 0000000000..247bcb93f0 --- /dev/null +++ b/packages/utils/src/filters/scalable-cuckoo-filter.ts @@ -0,0 +1,111 @@ +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' +import { CuckooFilter, optimize, type CuckooFilterInit } from './cuckoo-filter.js' +import { fnv1a, type Hash } from './hashes.js' +import { getRandomInt } from './utils.js' +import type { Filter } from './index.js' + +export interface ScalableCuckooFilterInit extends CuckooFilterInit { + /** + * A number to multiply maxItems by when adding new sub-filters + */ + scale?: number +} + +export class ScalableCuckooFilter implements Filter { + private readonly filterSize: number + private readonly bucketSize: number + private readonly fingerprintSize: number + private readonly scale: number + private readonly filterSeries: CuckooFilter[] + private readonly hash: Hash + private readonly seed: number + + constructor (init: ScalableCuckooFilterInit) { + this.bucketSize = init.bucketSize ?? 4 + this.filterSize = init.filterSize ?? (1 << 18) / this.bucketSize + this.fingerprintSize = init.fingerprintSize ?? 2 + this.scale = init.scale ?? 2 + this.hash = init.hash ?? fnv1a + this.seed = init.seed ?? getRandomInt(0, Math.pow(2, 10)) + this.filterSeries = [ + new CuckooFilter({ + filterSize: this.filterSize, + bucketSize: this.bucketSize, + fingerprintSize: this.fingerprintSize, + hash: this.hash, + seed: this.seed + }) + ] + } + + add (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + if (this.has(item)) { + return true + } + + let current = this.filterSeries.find((cuckoo) => { + return cuckoo.reliable + }) + + if (current == null) { + const curSize = this.filterSize * Math.pow(this.scale, this.filterSeries.length) + + current = new CuckooFilter({ + filterSize: curSize, + bucketSize: this.bucketSize, + fingerprintSize: this.fingerprintSize, + hash: this.hash, + seed: this.seed + }) + + this.filterSeries.push(current) + } + + return current.add(item) + } + + has (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + for (let i = 0; i < this.filterSeries.length; i++) { + if (this.filterSeries[i].has(item)) { + return true + } + } + + return false + } + + remove (item: Uint8Array | string): boolean { + if (typeof item === 'string') { + item = uint8ArrayFromString(item) + } + + for (let i = 0; i < this.filterSeries.length; i++) { + if (this.filterSeries[i].remove(item)) { + return true + } + } + + return false + } + + get count (): number { + return this.filterSeries.reduce((acc, curr) => { + return acc + curr.count + }, 0) + } +} + +export function createScalableCuckooFilter (maxItems: number, errorRate: number = 0.001, options?: Pick): Filter { + return new ScalableCuckooFilter({ + ...optimize(maxItems, errorRate), + ...(options ?? {}) + }) +} diff --git a/packages/utils/src/filters/utils.ts b/packages/utils/src/filters/utils.ts new file mode 100644 index 0000000000..732025ca3b --- /dev/null +++ b/packages/utils/src/filters/utils.ts @@ -0,0 +1,3 @@ +export function getRandomInt (min: number, max: number): number { + return Math.floor(Math.random() * (max - min)) + min +} diff --git a/packages/utils/test/bloom-filter.spec.ts b/packages/utils/test/filter/bloom-filter.spec.ts similarity index 84% rename from packages/utils/test/bloom-filter.spec.ts rename to packages/utils/test/filter/bloom-filter.spec.ts index 27f8377c56..0672278129 100644 --- a/packages/utils/test/bloom-filter.spec.ts +++ b/packages/utils/test/filter/bloom-filter.spec.ts @@ -1,7 +1,7 @@ // ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE import { expect } from 'aegir/chai' import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' -import { BloomFilter } from '../src/bloom-filter.js' +import { BloomFilter, createBloomFilter } from '../../src/filters/bloom-filter.js' function hasBitsSet (buffer: Uint8Array): number { let isset = 0 @@ -33,27 +33,29 @@ describe('bloom-filter', () => { expect(filter.seeds[4]).to.equal(5) }) - describe('createOptimal()', () => { + describe('createBloomFilter()', () => { it('creates a filter with good defaults', () => { - let filter = BloomFilter.create(95) - expect(filter.bits).to.equal(1048) - expect(filter.seeds.length).to.equal(8) + let filter = createBloomFilter(95) + expect(filter).to.have.property('bits', 1048) + expect(filter).to.have.property('seeds').with.lengthOf(8) - filter = BloomFilter.create(148) - expect(filter.bits).to.equal(1632) - expect(filter.seeds.length).to.equal(8) + filter = createBloomFilter(148) + expect(filter).to.have.property('bits', 1632) + expect(filter).to.have.property('seeds').with.lengthOf(8) - filter = BloomFilter.create(10) - expect(filter.bits).to.equal(110) - expect(filter.seeds.length).to.equal(8) + filter = createBloomFilter(10) + expect(filter).to.have.property('bits', 110) + expect(filter).to.have.property('seeds').with.lengthOf(8) }) - it('createOptimal() lets you specify an error rate', () => { - let filter = BloomFilter.create(20000) - expect(filter.bits).to.equal(220555) + it('createBloomFilter() lets you specify an error rate', () => { + let filter = createBloomFilter(20000) + expect(filter).to.have.property('bits', 220555) + // @ts-expect-error private field const previous = filter.bits - filter = BloomFilter.create(20000, 0.2) + filter = createBloomFilter(20000, 0.2) + // @ts-expect-error private field expect(filter.bits).to.be.below(previous) }) }) @@ -141,7 +143,7 @@ describe('bloom-filter', () => { return result } - const filter = BloomFilter.create(100) + const filter = createBloomFilter(100) const words: string[] = [] for (let i = 0; i < 100; i++) { diff --git a/packages/utils/test/filter/cuckoo-filter.spec.ts b/packages/utils/test/filter/cuckoo-filter.spec.ts new file mode 100644 index 0000000000..823d2c6d8d --- /dev/null +++ b/packages/utils/test/filter/cuckoo-filter.spec.ts @@ -0,0 +1,53 @@ +import { randomBytes } from '@libp2p/crypto' +import { expect } from 'aegir/chai' +import { CuckooFilter } from '../../src/filters/cuckoo-filter.js' + +describe('cuckoo-filter', () => { + let keys: Uint8Array[] + let cuckoo: CuckooFilter + + beforeEach(() => { + keys = [] + cuckoo = new CuckooFilter({ + filterSize: 1500, + bucketSize: 6, + fingerprintSize: 4 + }) + }) + + it('add 1500 keys', () => { + for (let i = 0; i < 1500; i++) { + const rand = randomBytes(36) + keys.push(rand) + + expect(cuckoo.add(rand)).to.be.true() + } + + expect(cuckoo.count).to.equal(1500) + }) + + it('check keys are in filter', () => { + for (const key of keys) { + expect(cuckoo.has(key)).to.be.true() + } + }) + + it('removes keys', () => { + for (const key of keys) { + expect(cuckoo.remove(key)).to.be.true() + expect(cuckoo.has(key)).to.be.false() + } + + expect(cuckoo.count).to.equal(0) + }) + + it('becomes unreliable', () => { + while (true) { + cuckoo.add(randomBytes(36)) + + if (!cuckoo.reliable) { + break + } + } + }) +}) diff --git a/packages/utils/test/filter/scalable-cuckoo-filter.spec.ts b/packages/utils/test/filter/scalable-cuckoo-filter.spec.ts new file mode 100644 index 0000000000..cd8a172ac4 --- /dev/null +++ b/packages/utils/test/filter/scalable-cuckoo-filter.spec.ts @@ -0,0 +1,55 @@ +import { randomBytes } from '@libp2p/crypto' +import { expect } from 'aegir/chai' +import { ScalableCuckooFilter, createScalableCuckooFilter } from '../../src/filters/scalable-cuckoo-filter.js' + +describe('scalable-cuckoo-filter', () => { + let keys: Uint8Array[] + let cuckoo: ScalableCuckooFilter + + beforeEach(() => { + keys = [] + cuckoo = new ScalableCuckooFilter({ + filterSize: 1500, + bucketSize: 6, + fingerprintSize: 4 + }) + }) + + it('add 150k keys', () => { + for (let i = 0; i < 150000; i++) { + const rand = randomBytes(36) + keys.push(rand) + + expect(cuckoo.add(rand)).to.be.true() + } + + // collisions may occur + expect(cuckoo.count).to.be.greaterThan(140000) + expect(cuckoo).to.have.nested.property('filterSeries.length') + .that.is.greaterThan(1) + }) + + it('check keys are in filter', () => { + for (const key of keys) { + expect(cuckoo.has(key)).to.be.true() + } + }) + + it('removes keys', () => { + for (const key of keys) { + expect(cuckoo.remove(key)).to.be.true() + expect(cuckoo.has(key)).to.be.false() + } + + expect(cuckoo.count).to.equal(0) + }) + + it('optimises input', () => { + const filter = createScalableCuckooFilter(100000, 0.001) + const key = randomBytes(32) + + filter.add(key) + + expect(filter.has(key)).to.equal(true) + }) +})