diff --git a/packages/utils/package.json b/packages/utils/package.json index 76e1529c..8ff02299 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -58,12 +58,10 @@ "@ipld/dag-cbor": "^9.2.0", "@ipld/dag-json": "^10.2.0", "@ipld/dag-pb": "^4.1.0", - "@libp2p/crypto": "^5.0.0", "@libp2p/interface": "^2.0.0", "@libp2p/logger": "^5.0.0", "@libp2p/utils": "^6.0.0", "@multiformats/dns": "^1.0.1", - "@types/murmurhash3js-revisited": "^3.0.3", "any-signal": "^4.1.1", "blockstore-core": "^5.0.0", "cborg": "^4.0.9", @@ -76,13 +74,12 @@ "it-merge": "^3.0.3", "mortice": "^3.0.4", "multiformats": "^13.1.0", - "murmurhash3js-revisited": "^3.0.0", "p-defer": "^4.0.1", "progress-events": "^1.0.0", - "uint8arraylist": "^2.4.8", "uint8arrays": "^5.0.2" }, "devDependencies": { + "@libp2p/crypto": "^5.0.0", "@libp2p/peer-id": "^5.0.0", "@types/sinon": "^17.0.3", "aegir": "^44.0.1", diff --git a/packages/utils/src/abstract-session.ts b/packages/utils/src/abstract-session.ts index 889df563..c2033445 100644 --- a/packages/utils/src/abstract-session.ts +++ b/packages/utils/src/abstract-session.ts @@ -1,11 +1,12 @@ import { DEFAULT_SESSION_MIN_PROVIDERS, DEFAULT_SESSION_MAX_PROVIDERS, InsufficientProvidersError } from '@helia/interface' import { TypedEventEmitter, setMaxListeners } from '@libp2p/interface' +import { createScalableCuckooFilter } from '@libp2p/utils/filters' import { Queue } from '@libp2p/utils/queue' import { base64 } from 'multiformats/bases/base64' import pDefer from 'p-defer' -import { BloomFilter } from './bloom-filter.js' import type { BlockBroker, BlockRetrievalOptions, CreateSessionOptions } from '@helia/interface' import type { AbortOptions, ComponentLogger, Logger } from '@libp2p/interface' +import type { Filter } from '@libp2p/utils/filters' import type { CID } from 'multiformats/cid' import type { DeferredPromise } from 'p-defer' import type { ProgressEvent } from 'progress-events' @@ -31,7 +32,7 @@ export abstract class AbstractSession = {}): Promise { diff --git a/packages/utils/src/bloom-filter.ts b/packages/utils/src/bloom-filter.ts deleted file mode 100644 index 308cbfc0..00000000 --- a/packages/utils/src/bloom-filter.ts +++ /dev/null @@ -1,141 +0,0 @@ -// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE -import { randomBytes } from '@libp2p/crypto' -import mur from 'murmurhash3js-revisited' -import { Uint8ArrayList } from 'uint8arraylist' -import { alloc } from 'uint8arrays/alloc' -import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' - -const LN2_SQUARED = Math.LN2 * Math.LN2 - -export interface BloomFilterOptions { - seeds?: number[] - hashes?: number - bits?: number -} - -export class BloomFilter { - /** - * Create a `BloomFilter` with the smallest `bits` and `hashes` value for the - * specified item count and error rate. - */ - static create (itemcount: number, errorRate: number = 0.005): BloomFilter { - const opts = optimize(itemcount, errorRate) - return new BloomFilter(opts) - } - - public readonly seeds: number[] - public readonly bits: number - public buffer: Uint8Array - - constructor (options: BloomFilterOptions = {}) { - if (options.seeds != null) { - this.seeds = options.seeds - } else { - this.seeds = generateSeeds(options.hashes ?? 8) - } - - this.bits = options.bits ?? 1024 - this.buffer = alloc(Math.ceil(this.bits / 8)) - } - - /** - * Add an item to the filter - */ - add (item: Uint8Array | string): void { - if (typeof item === 'string') { - item = uint8ArrayFromString(item) - } - - for (let i = 0; i < this.seeds.length; i++) { - const hash = mur.x86.hash32(item, this.seeds[i]) - const bit = hash % this.bits - - this.setbit(bit) - } - } - - /** - * Test if the filter has an item. If it returns false it definitely does not - * have the item. If it returns true, it probably has the item but there's - * an `errorRate` chance it doesn't. - */ - has (item: Uint8Array | string): boolean { - if (typeof item === 'string') { - item = uint8ArrayFromString(item) - } - - for (let i = 0; i < this.seeds.length; i++) { - const hash = mur.x86.hash32(item, this.seeds[i]) - const bit = hash % this.bits - - const isSet = this.getbit(bit) - - if (!isSet) { - return false - } - } - - return true - } - - /** - * Reset the filter - */ - clear (): void { - this.buffer.fill(0) - } - - setbit (bit: number): void { - let pos = 0 - let shift = bit - while (shift > 7) { - pos++ - shift -= 8 - } - - let bitfield = this.buffer[pos] - bitfield |= (0x1 << shift) - this.buffer[pos] = bitfield - } - - getbit (bit: number): boolean { - let pos = 0 - let shift = bit - while (shift > 7) { - pos++ - shift -= 8 - } - - const bitfield = this.buffer[pos] - return (bitfield & (0x1 << shift)) !== 0 - } -} - -function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } { - const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED) - const hashes = Math.round((bits / itemcount) * Math.LN2) - - return { bits, hashes } -} - -function generateSeeds (count: number): number[] { - let buf: Uint8ArrayList - let j: number - const seeds = [] - - for (let i = 0; i < count; i++) { - buf = new Uint8ArrayList(randomBytes(4)) - seeds[i] = buf.getUint32(0, true) - - // Make sure we don't end up with two identical seeds, - // which is unlikely but possible. - for (j = 0; j < i; j++) { - if (seeds[i] === seeds[j]) { - i-- - break - } - } - } - - return seeds -} diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 02676ba0..a1a13ed3 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -44,8 +44,6 @@ import type { MultihashHasher } from 'multiformats/hashes/interface' export { AbstractSession } from './abstract-session.js' export type { AbstractCreateSessionOptions, BlockstoreSessionEvents, AbstractSessionComponents } from './abstract-session.js' -export { BloomFilter } from './bloom-filter.js' -export type { BloomFilterOptions } from './bloom-filter.js' export type { BlockStorage, BlockStorageInit } diff --git a/packages/utils/test/bloom-filter.spec.ts b/packages/utils/test/bloom-filter.spec.ts deleted file mode 100644 index 27f8377c..00000000 --- a/packages/utils/test/bloom-filter.spec.ts +++ /dev/null @@ -1,199 +0,0 @@ -// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE -import { expect } from 'aegir/chai' -import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' -import { BloomFilter } from '../src/bloom-filter.js' - -function hasBitsSet (buffer: Uint8Array): number { - let isset = 0 - for (let i = 0; i < buffer.length; i++) { - isset |= (buffer[i] !== 0 ? 1 : 0) - } - return isset -} - -describe('bloom-filter', () => { - it('constructs a filter of the requested size', () => { - const filter = new BloomFilter({ hashes: 4, bits: 32 }) - expect(filter.seeds).to.have.lengthOf(4) - expect(filter.bits).to.equal(32) - expect(filter.buffer).to.be.an.instanceOf(Uint8Array) - }) - - it('zeroes out its storage buffer', () => { - const filter = new BloomFilter({ hashes: 3, bits: 64 }) - for (let i = 0; i < filter.buffer.length; i++) { - expect(filter.buffer[i]).to.equal(0) - } - }) - - it('uses passed-in seeds if provided', () => { - const filter = new BloomFilter({ bits: 256, seeds: [1, 2, 3, 4, 5] }) - expect(filter.seeds.length).to.equal(5) - expect(filter.seeds[0]).to.equal(1) - expect(filter.seeds[4]).to.equal(5) - }) - - describe('createOptimal()', () => { - it('creates a filter with good defaults', () => { - let filter = BloomFilter.create(95) - expect(filter.bits).to.equal(1048) - expect(filter.seeds.length).to.equal(8) - - filter = BloomFilter.create(148) - expect(filter.bits).to.equal(1632) - expect(filter.seeds.length).to.equal(8) - - filter = BloomFilter.create(10) - expect(filter.bits).to.equal(110) - expect(filter.seeds.length).to.equal(8) - }) - - it('createOptimal() lets you specify an error rate', () => { - let filter = BloomFilter.create(20000) - expect(filter.bits).to.equal(220555) - const previous = filter.bits - - filter = BloomFilter.create(20000, 0.2) - expect(filter.bits).to.be.below(previous) - }) - }) - - describe('setbit() and getbit()', () => { - it('sets the specified bit', () => { - const filter = new BloomFilter({ hashes: 3, bits: 16 }) - - filter.setbit(0) - let val = filter.getbit(0) - expect(val).to.equal(true) - - filter.setbit(1) - val = filter.getbit(1) - expect(val).to.equal(true) - - val = filter.getbit(2) - expect(val).to.equal(false) - - filter.setbit(10) - val = filter.getbit(10) - expect(val).to.equal(true) - }) - - it('can set all bits', () => { - let i: number - let value: number - - const filter = new BloomFilter({ hashes: 3, bits: 16 }) - expect(filter.buffer.length).to.equal(2) - - for (i = 0; i < 16; i++) { - filter.setbit(i) - } - - for (i = 0; i < 2; i++) { - value = filter.buffer[i] - expect(value).to.equal(255) - } - }) - - it('slides over into the next buffer slice when setting bits', () => { - let val - const filter = new BloomFilter({ hashes: 3, bits: 64 }) - - filter.setbit(8) - val = filter.buffer[1] - expect(val).to.equal(1) - - filter.setbit(17) - val = filter.buffer[2] - expect(val).to.equal(2) - - filter.setbit(34) - val = filter.buffer[4] - expect(val).to.equal(4) - }) - }) - - describe('add()', () => { - it('can store buffers', () => { - const filter = new BloomFilter({ hashes: 4, bits: 128 }) - - expect(hasBitsSet(filter.buffer)).to.equal(0) - filter.add(uint8ArrayFromString('cat')) - expect(hasBitsSet(filter.buffer)).to.equal(1) - }) - - it('can store strings', () => { - const filter = new BloomFilter({ hashes: 4, bits: 128 }) - filter.add('cat') - - expect(hasBitsSet(filter.buffer)).to.equal(1) - }) - - it('can add a hundred random items', () => { - const alpha = '0123456789abcdefghijklmnopqrstuvwxyz' - function randomWord (length?: number): string { - length = length ?? Math.ceil(Math.random() * 20) - let result = '' - for (let i = 0; i < length; i++) { - result += alpha[Math.floor(Math.random() * alpha.length)] - } - - return result - } - - const filter = BloomFilter.create(100) - const words: string[] = [] - - for (let i = 0; i < 100; i++) { - const w = randomWord() - words.push(w) - filter.add(w) - } - - for (let i = 0; i < words.length; i++) { - expect(filter.has(words[i])).to.equal(true) - } - }) - }) - - describe('has()', () => { - it('returns true when called on a stored item', () => { - const filter = new BloomFilter({ hashes: 3, bits: 16 }) - filter.add('cat') - - expect(hasBitsSet(filter.buffer)).to.equal(1) - expect(filter.has('cat')).to.be.true() - }) - - it('returns false for items not in the set (mostly)', () => { - const filter = new BloomFilter({ hashes: 4, bits: 50 }) - filter.add('cat') - expect(filter.has('dog')).to.be.false() - }) - - it('responds appropriately for arrays of added items', () => { - const filter = new BloomFilter({ hashes: 3, bits: 128 }) - filter.add('cat') - filter.add('dog') - filter.add('wallaby') - - expect(filter.has('cat')).to.equal(true) - expect(filter.has('dog')).to.equal(true) - expect(filter.has('wallaby')).to.equal(true) - expect(filter.has('orange')).to.equal(false) - }) - }) - - describe('clear()', () => { - it('clears the filter', () => { - const filter = new BloomFilter({ hashes: 3, bits: 128 }) - filter.add('cat') - filter.add('dog') - filter.add('wallaby') - expect(hasBitsSet(filter.buffer)).to.equal(1) - - filter.clear() - expect(hasBitsSet(filter.buffer)).to.equal(0) - }) - }) -})