Skip to content

Commit

Permalink
feat: add cuckoo filter (#2510)
Browse files Browse the repository at this point in the history
Add a filter type that lets us efficiently remove items as well as
add them.

It would be better to use the `bloom-filters` module at this point
but it adds 50KB+ to browser bundles for very simple use cases so
it's not suitable.  We can revisit if Callidon/bloom-filters#70
is ever resolved.
  • Loading branch information
achingbrain authored Apr 30, 2024
1 parent 3bc94b4 commit 3d7a9da
Show file tree
Hide file tree
Showing 15 changed files with 626 additions and 36 deletions.
11 changes: 8 additions & 3 deletions packages/peer-collections/src/filter.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import { BloomFilter } from '@libp2p/utils/bloom-filter'
import { createScalableCuckooFilter } from '@libp2p/utils/filters'
import type { PeerId } from '@libp2p/interface'
import type { Filter } from '@libp2p/utils/filters'

/**
* Uses a Bloom filter to implement a mechansim for deduplicating PeerIds in a
* way that uses a fixed amount of memory.
*/
export class PeerFilter {
private readonly filter: BloomFilter
private readonly filter: Filter

constructor (size: number, errorRate?: number) {
this.filter = BloomFilter.create(size, errorRate)
this.filter = createScalableCuckooFilter(size, errorRate)
}

has (peerId: PeerId): boolean {
Expand All @@ -19,6 +20,10 @@ export class PeerFilter {
add (peerId: PeerId): void {
this.filter.add(peerId.toBytes())
}

remove (peerId: PeerId): void {
this.filter.remove?.(peerId.toBytes())
}
}

export function peerFilter (size: number): PeerFilter {
Expand Down
4 changes: 4 additions & 0 deletions packages/peer-collections/test/filter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,9 @@ describe('peer-filter', () => {
filter.add(peer)

expect(filter.has(peer)).to.be.true()

filter.remove(peer)

expect(filter.has(peer)).to.be.false()
})
})
3 changes: 3 additions & 0 deletions packages/peer-collections/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
},
{
"path": "../peer-id-factory"
},
{
"path": "../utils"
}
]
}
9 changes: 5 additions & 4 deletions packages/utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@
"types": "./dist/src/array-equals.d.ts",
"import": "./dist/src/array-equals.js"
},
"./bloom-filter": {
"types": "./dist/src/bloom-filter.d.ts",
"import": "./dist/src/bloom-filter.js"
},
"./close-source": {
"types": "./dist/src/close-source.d.ts",
"import": "./dist/src/close-source.js"
},
"./filters": {
"types": "./dist/src/filters/index.d.ts",
"import": "./dist/src/filters/index.js"
},
"./ip-port-to-multiaddr": {
"types": "./dist/src/ip-port-to-multiaddr.d.ts",
"import": "./dist/src/ip-port-to-multiaddr.js"
Expand Down Expand Up @@ -137,6 +137,7 @@
"@libp2p/logger": "^4.0.11",
"@multiformats/multiaddr": "^12.2.1",
"@multiformats/multiaddr-matcher": "^1.2.0",
"@sindresorhus/fnv1a": "^3.1.0",
"@types/murmurhash3js-revisited": "^3.0.3",
"delay": "^6.0.0",
"get-iterator": "^2.0.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import mur from 'murmurhash3js-revisited'
import { Uint8ArrayList } from 'uint8arraylist'
import { alloc } from 'uint8arrays/alloc'
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
import type { Filter } from './index.js'

const LN2_SQUARED = Math.LN2 * Math.LN2

Expand All @@ -13,16 +14,7 @@ export interface BloomFilterOptions {
bits?: number
}

export class BloomFilter {
/**
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
* specified item count and error rate.
*/
static create (itemcount: number, errorRate: number = 0.005): BloomFilter {
const opts = optimize(itemcount, errorRate)
return new BloomFilter(opts)
}

export class BloomFilter implements Filter {
public readonly seeds: number[]
public readonly bits: number
public buffer: Uint8Array
Expand Down Expand Up @@ -111,9 +103,18 @@ export class BloomFilter {
}
}

function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED)
const hashes = Math.round((bits / itemcount) * Math.LN2)
/**
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
* specified item count and error rate.
*/
export function createBloomFilter (itemcount: number, errorRate: number = 0.005): Filter {
const opts = optimize(itemcount, errorRate)
return new BloomFilter(opts)
}

function optimize (itemCount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
const bits = Math.round(-1 * itemCount * Math.log(errorRate) / LN2_SQUARED)
const hashes = Math.round((bits / itemCount) * Math.LN2)

return { bits, hashes }
}
Expand Down
64 changes: 64 additions & 0 deletions packages/utils/src/filters/bucket.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { Fingerprint } from './fingerprint.js'
import { getRandomInt } from './utils.js'

export class Bucket {
private readonly contents: Array<Fingerprint | null>

constructor (size: number) {
this.contents = new Array(size).fill(null)
}

has (fingerprint: Fingerprint): boolean {
if (!(fingerprint instanceof Fingerprint)) {
throw new TypeError('Invalid Fingerprint')
}

return this.contents.some((fp) => {
return fingerprint.equals(fp)
})
}

add (fingerprint: Fingerprint): boolean {
if (!(fingerprint instanceof Fingerprint)) {
throw new TypeError('Invalid Fingerprint')
}

for (let i = 0; i < this.contents.length; i++) {
if (this.contents[i] == null) {
this.contents[i] = fingerprint
return true
}
}

return true
}

swap (fingerprint: Fingerprint): Fingerprint | null {
if (!(fingerprint instanceof Fingerprint)) {
throw new TypeError('Invalid Fingerprint')
}

const i = getRandomInt(0, this.contents.length - 1)
const current = this.contents[i]
this.contents[i] = fingerprint

return current
}

remove (fingerprint: Fingerprint): boolean {
if (!(fingerprint instanceof Fingerprint)) {
throw new TypeError('Invalid Fingerprint')
}

const found = this.contents.findIndex((fp) => {
return fingerprint.equals(fp)
})

if (found > -1) {
this.contents[found] = null
return true
} else {
return false
}
}
}
197 changes: 197 additions & 0 deletions packages/utils/src/filters/cuckoo-filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
import { Bucket } from './bucket.js'
import { Fingerprint, MAX_FINGERPRINT_SIZE } from './fingerprint.js'
import { fnv1a, type Hash } from './hashes.js'
import { getRandomInt } from './utils.js'
import type { Filter } from './index.js'

const maxCuckooCount = 500

export interface CuckooFilterInit {
/**
* How many items the filter is expected to contain
*/
filterSize: number

/**
* How many items to put in each bucket
*/
bucketSize?: number

/**
* How many bytes the fingerprint is expected to be
*/
fingerprintSize?: number

/**
* A non-cryptographic hash implementation
*/
hash?: Hash

/**
* A number used to seed the hash
*/
seed?: number
}

export class CuckooFilter implements Filter {
private readonly bucketSize: number
private readonly filterSize: number
private readonly fingerprintSize: number
private readonly buckets: Bucket[]
public count: number
private readonly hash: Hash
private readonly seed: number

constructor (init: CuckooFilterInit) {
this.filterSize = init.filterSize
this.bucketSize = init.bucketSize ?? 4
this.fingerprintSize = init.fingerprintSize ?? 2
this.count = 0
this.buckets = []
this.hash = init.hash ?? fnv1a
this.seed = init.seed ?? getRandomInt(0, Math.pow(2, 10))
}

add (item: Uint8Array | string): boolean {
if (typeof item === 'string') {
item = uint8ArrayFromString(item)
}

const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
const j = this.hash.hash(item, this.seed) % this.filterSize
const k = (j ^ fingerprint.hash()) % this.filterSize

if (this.buckets[j] == null) {
this.buckets[j] = new Bucket(this.bucketSize)
}

if (this.buckets[k] == null) {
this.buckets[k] = new Bucket(this.bucketSize)
}

if (this.buckets[j].add(fingerprint) || this.buckets[k].add(fingerprint)) {
this.count++
return true
}

const rand = [j, k]
let i = rand[getRandomInt(0, rand.length - 1)]

if (this.buckets[i] == null) {
this.buckets[i] = new Bucket(this.bucketSize)
}

for (let n = 0; n < maxCuckooCount; n++) {
const swapped = this.buckets[i].swap(fingerprint)

if (swapped == null) {
continue
}

i = (i ^ swapped.hash()) % this.filterSize

if (this.buckets[i] == null) {
this.buckets[i] = new Bucket(this.bucketSize)
}

if (this.buckets[i].add(swapped)) {
this.count++

return true
} else {
continue
}
}

return false
}

has (item: Uint8Array | string): boolean {
if (typeof item === 'string') {
item = uint8ArrayFromString(item)
}

const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
const j = this.hash.hash(item, this.seed) % this.filterSize
const inJ = this.buckets[j]?.has(fingerprint) ?? false

if (inJ) {
return inJ
}

const k = (j ^ fingerprint.hash()) % this.filterSize

return this.buckets[k]?.has(fingerprint) ?? false
}

remove (item: Uint8Array | string): boolean {
if (typeof item === 'string') {
item = uint8ArrayFromString(item)
}

const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
const j = this.hash.hash(item, this.seed) % this.filterSize
const inJ = this.buckets[j]?.remove(fingerprint) ?? false

if (inJ) {
this.count--
return inJ
}

const k = (j ^ fingerprint.hash()) % this.filterSize
const inK = this.buckets[k]?.remove(fingerprint) ?? false

if (inK) {
this.count--
}

return inK
}

get reliable (): boolean {
return Math.floor(100 * (this.count / this.filterSize)) <= 95
}
}

// max load constants, defined in the cuckoo paper
const MAX_LOAD = {
1: 0.5,
2: 0.84,
4: 0.95,
8: 0.98
}

function calculateBucketSize (errorRate: number = 0.001): 2 | 4 | 8 {
if (errorRate > 0.002) {
return 2
}

if (errorRate > 0.00001) {
return 4
}

return 8
}

export function optimize (maxItems: number, errorRate: number = 0.001): CuckooFilterInit {
// https://www.eecs.harvard.edu/~michaelm/postscripts/cuckoo-conext2014.pdf
// Section 5.1 Optimal Bucket Size
const bucketSize = calculateBucketSize(errorRate)
const load = MAX_LOAD[bucketSize]

// https://stackoverflow.com/questions/57555236/how-to-size-a-cuckoo-filter/57617208#57617208
const filterSize = Math.round(maxItems / load)
const fingerprintSize = Math.min(Math.ceil(Math.log(filterSize / bucketSize)) + 2, MAX_FINGERPRINT_SIZE)

return {
filterSize,
bucketSize,
fingerprintSize
}
}

export function createCuckooFilter (maxItems: number, errorRate: number = 0.005): Filter {
const opts = optimize(maxItems, errorRate)
return new CuckooFilter(opts)
}
Loading

0 comments on commit 3d7a9da

Please sign in to comment.