diff --git a/package-lock.json b/package-lock.json index 73cb07e..97d0a5c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "@ipld/dag-json": "^10.1.7", "@ipld/dag-pb": "^4.0.8", "@web3-storage/content-claims": "^4.0.5", - "@web3-storage/gateway-lib": "github:web3-storage/gateway-lib#5a027e05be62f985407b46bce70748f543d302b7", + "@web3-storage/gateway-lib": "^5.0.0", "cardex": "^3.0.0", "dagula": "^7.3.0", "http-range-parse": "^1.0.0", @@ -5507,17 +5507,16 @@ } }, "node_modules/@web3-storage/gateway-lib": { - "version": "4.1.0", - "resolved": "git+ssh://git@github.com/web3-storage/gateway-lib.git#5a027e05be62f985407b46bce70748f543d302b7", - "integrity": "sha512-k4ZkHCu1D/iFy/ZyFlg6F+Sh8APLrA+wb9np5+kXegP6M7Ho1uY6npVubHA/Cw3B/0kf/kB0RAVhwNeHLOukVA==", - "license": "Apache-2.0 OR MIT", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@web3-storage/gateway-lib/-/gateway-lib-5.0.0.tgz", + "integrity": "sha512-jplyNJpXyv9/EUv0oQr9+35+CoHw6o8V3nzgNremUE9Akt4oyU6UKq6iutb1ARrtjVNNWr9ww8G2kVW3LgkhTA==", "dependencies": { "@httpland/range-parser": "^1.2.0", "@ipld/car": "^5.2.6", "@web3-storage/handlebars": "^1.0.0", "bytes": "^3.1.2", "chardet": "^2.0.0", - "dagula": "github:web3-storage/dagula#feat/add-block-streaming-interface", + "dagula": "^8.0.0", "magic-bytes.js": "^1.8.0", "mrmime": "^1.0.1", "multiformats": "^13.0.1", @@ -5526,6 +5525,46 @@ "uint8arrays": "^5.0.1" } }, + "node_modules/@web3-storage/gateway-lib/node_modules/dagula": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/dagula/-/dagula-8.0.0.tgz", + "integrity": "sha512-VtcVOK2idIf2/2Yoi+zviq9OveksiB2cyJAT6FASrzngomBSSN516ym4b8wM6v6aq66gxaBFo9p9D2ZUCcrEDw==", + "dependencies": { + "@chainsafe/libp2p-noise": "^11.0.0", + "@chainsafe/libp2p-yamux": "^4.0.2", + "@ipld/car": "^5.2.6", + "@ipld/dag-cbor": "^9.0.8", + "@ipld/dag-json": "^10.0.0", + "@ipld/dag-pb": "^4.0.8", + "@libp2p/interface-connection": "^3.0.8", + "@libp2p/interface-peer-id": "^2.0.1", + "@libp2p/interface-registrar": "^2.0.8", + "@libp2p/interfaces": "^3.3.1", + "@libp2p/mplex": "^7.1.1", + "@libp2p/tcp": "^6.0.9", + "@libp2p/websockets": "^5.0.3", + "@multiformats/blake2": "^2.0.1", + "@multiformats/multiaddr": "^11.3.0", + "archy": "^1.0.0", + "conf": "^11.0.1", + "debug": "^4.3.4", + "ipfs-unixfs": "^11.1.0", + "ipfs-unixfs-exporter": "^13.3.0", + "it-length-prefixed": "^8.0.4", + "it-pipe": "^2.0.3", + "libp2p": "^0.42.2", + "multiformats": "^13.0.1", + "p-defer": "^4.0.0", + "protobufjs": "^7.0.0", + "sade": "^1.8.1", + "streaming-iterables": "^8.0.1", + "timeout-abort-controller": "^3.0.0", + "varint": "^6.0.0" + }, + "bin": { + "dagula": "bin.js" + } + }, "node_modules/@web3-storage/gateway-lib/node_modules/uint8arrays": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/uint8arrays/-/uint8arrays-5.0.1.tgz", diff --git a/package.json b/package.json index 4d0b831..12b42ea 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,7 @@ "@ipld/dag-json": "^10.1.7", "@ipld/dag-pb": "^4.0.8", "@web3-storage/content-claims": "^4.0.5", - "@web3-storage/gateway-lib": "github:web3-storage/gateway-lib#5a027e05be62f985407b46bce70748f543d302b7", + "@web3-storage/gateway-lib": "^5.0.0", "cardex": "^3.0.0", "dagula": "^7.3.0", "http-range-parse": "^1.0.0", diff --git a/src/lib/blockstore.js b/src/lib/blockstore.js index 013d5ec..0afdb9b 100644 --- a/src/lib/blockstore.js +++ b/src/lib/blockstore.js @@ -1,6 +1,5 @@ import { readBlockHead, asyncIterableReader } from '@ipld/car/decoder' import { base58btc } from 'multiformats/bases/base58' -import * as Link from 'multiformats/link' import defer from 'p-defer' import { OrderedCarBlockBatcher } from './block-batch.js' import * as IndexEntry from './dag-index/entry.js' @@ -39,17 +38,11 @@ export class R2Blockstore { if (!entry) return if (IndexEntry.isLocated(entry)) { - // if host is "w3s.link" then content can be found in CARPARK - const url = entry.site.location.find(l => l.hostname === 'w3s.link') - // TODO: allow fetch from _any_ URL - if (!url) return - - const link = Link.parse(url.pathname.split('/')[2]) - const digestString = base58btc.encode(link.multihash.bytes) - const key = `${digestString}/${digestString}.blob` - const res = await this._dataBucket.get(key, { range: entry.site.range }) - if (!res) return - return { cid, bytes: new Uint8Array(await res.arrayBuffer()) } + const keyAndOptions = IndexEntry.toBucketGet(entry) + if (!keyAndOptions) return + + const res = await this._dataBucket.get(keyAndOptions[0], keyAndOptions[1]) + return res ? { cid, bytes: new Uint8Array(await res.arrayBuffer()) } : undefined } const carPath = `${entry.origin}/${entry.origin}.car` @@ -91,23 +84,11 @@ export class R2Blockstore { const entry = await this._idx.get(cid) if (!entry) return - // stream API exists only for blobs (i.e. location claimed) - if (IndexEntry.isLocated(entry)) { - // if host is "w3s.link" then content can be found in CARPARK - const url = entry.site.location.find(l => l.hostname === 'w3s.link') - // TODO: allow fetch from any URL - if (!url) return - - const link = Link.parse(url.pathname.split('/')[2]) - const digestString = base58btc.encode(link.multihash.bytes) - const key = `${digestString}/${digestString}.blob` - const first = entry.site.range.offset + (options?.range?.[0] ?? 0) - const last = entry.site.range.offset + (options?.range?.[1] ?? (entry.site.range.length - 1)) - const range = { offset: first, length: last - first + 1 } - const res = await this._dataBucket.get(key, { range }) - if (!res) return - return /** @type {ReadableStream} */ (res.body) - } + const keyAndOptions = IndexEntry.toBucketGet(entry, options) + if (!keyAndOptions) return + + const res = await this._dataBucket.get(keyAndOptions[0], keyAndOptions[1]) + return /** @type {ReadableStream|undefined} */ (res?.body) } } diff --git a/src/lib/dag-index/entry.js b/src/lib/dag-index/entry.js index ad273f6..5b9f29a 100644 --- a/src/lib/dag-index/entry.js +++ b/src/lib/dag-index/entry.js @@ -1,3 +1,7 @@ +import * as Link from 'multiformats/link' +import { base58btc } from 'multiformats/bases/base58' +import { CAR_CODE } from '../../constants.js' + /** * An index entry is "located" if a content claim has specified it's location * i.e. it is of type `LocatedIndexEntry`. @@ -6,3 +10,30 @@ * @returns {entry is import('./api.js').LocatedIndexEntry} */ export const isLocated = entry => 'site' in entry + +/** + * @param {import('./api.js').IndexEntry} entry + * @param {import('dagula').RangeOptions} [options] + * @returns {[key: string, options: import('@cloudflare/workers-types').R2GetOptions]|undefined} + */ +export const toBucketGet = (entry, options) => { + if (!isLocated(entry)) return + + // if host is "w3s.link" then content can be found in CARPARK + const url = entry.site.location.find(l => l.hostname === 'w3s.link') + if (!url) return + + const link = Link.parse(url.pathname.split('/')[2]) + + let key + if (link.code === CAR_CODE) { + key = `${link}/${link}.car` + } else { + const digestString = base58btc.encode(link.multihash.bytes) + key = `${digestString}/${digestString}.blob` + } + const first = entry.site.range.offset + (options?.range?.[0] ?? 0) + const last = entry.site.range.offset + (options?.range?.[1] ?? (entry.site.range.length - 1)) + const range = { offset: first, length: last - first + 1 } + return [key, { range }] +} diff --git a/test/helpers/builder.js b/test/helpers/builder.js index 8797f39..7e7a17f 100644 --- a/test/helpers/builder.js +++ b/test/helpers/builder.js @@ -3,6 +3,7 @@ import { pack } from 'ipfs-car/pack' import * as Link from 'multiformats/link' import { sha256 } from 'multiformats/hashes/sha2' import { base58btc } from 'multiformats/bases/base58' +import * as raw from 'multiformats/codecs/raw' import { CarIndexer } from '@ipld/car' import { concat } from 'uint8arrays' import { TreewalkCarSplitter } from 'carbites/treewalk' @@ -13,7 +14,7 @@ import * as CAR from './car.js' /** * @typedef {import('multiformats').ToString} MultihashString - * @typedef {import('multiformats').ToString} ShardCID + * @typedef {import('multiformats').ToString} ShardCID * @typedef {number} Offset */ @@ -107,24 +108,28 @@ export class Builder { /** @type {import('cardex/api').CARLink[]} */ const carCids = [] + /** @type {import('multiformats').Link[]} */ + const blobCids = [] /** @type {Array<{ cid: import('multiformats').Link, carCid: import('cardex/api').CARLink }>} */ const indexes = [] const splitter = await TreewalkCarSplitter.fromIterable(out, TARGET_SHARD_SIZE) for await (const car of splitter.cars()) { const carBytes = concat(await collect(car)) - const carCid = Link.create(CAR.code, await sha256.digest(carBytes)) if (options.asBlob) { - this.#carpark.put(toBlobKey(carCid.multihash), carBytes) + const blobCid = Link.create(raw.code, await sha256.digest(carBytes)) + this.#carpark.put(toBlobKey(blobCid.multihash), carBytes) + blobCids.push(blobCid) } else { + const carCid = Link.create(CAR.code, await sha256.digest(carBytes)) this.#carpark.put(toCarKey(carCid), carBytes) if (options.satnav ?? true) { await this.#writeIndex(carCid, carBytes) } indexes.push(await this.#writeIndexCar(carBytes)) + carCids.push(carCid) } - carCids.push(carCid) } if (!options.asBlob && (options.dudewhere ?? true)) { @@ -132,7 +137,7 @@ export class Builder { await this.#writeLinks(dataCid, carCids) } - return { dataCid, carCids, indexes } + return { dataCid, carCids, indexes, blobCids } } /** diff --git a/test/helpers/content-claims.js b/test/helpers/content-claims.js index 009ed2a..af798a1 100644 --- a/test/helpers/content-claims.js +++ b/test/helpers/content-claims.js @@ -119,10 +119,10 @@ export const generateClaims = async (signer, dataCid, carCid, carStream, indexCi /** * @param {import('@ucanto/interface').Signer} signer - * @param {import('cardex/api').CARLink} carCid + * @param {import('multiformats').Link} shard * @param {ReadableStream} carStream CAR file data */ -export const generateBlockLocationClaims = async (signer, carCid, carStream) => { +export const generateBlockLocationClaims = async (signer, shard, carStream) => { /** @type {Claims} */ const claims = new LinkMap() @@ -131,8 +131,8 @@ export const generateBlockLocationClaims = async (signer, carCid, carStream) => .pipeTo(new WritableStream({ async write ({ cid, blockOffset, blockLength }) { const blocks = claims.get(cid) ?? [] - const location = new URL(`https://w3s.link/ipfs/${carCid}?format=raw`) - blocks.push(await generateLocationClaim(signer, carCid, location, blockOffset, blockLength)) + const location = new URL(`https://w3s.link/ipfs/${shard}?format=raw`) + blocks.push(await generateLocationClaim(signer, shard, location, blockOffset, blockLength)) claims.set(cid, blocks) } })) diff --git a/test/index.spec.js b/test/index.spec.js index ce1fff5..9b81c74 100644 --- a/test/index.spec.js +++ b/test/index.spec.js @@ -228,14 +228,14 @@ describe('freeway', () => { it('should use location content claim', async () => { const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] // no dudewhere or satnav so only content claims can satisfy the request - const { dataCid, carCids } = await builder.add(input, { asBlob: true }) + const { dataCid, blobCids } = await builder.add(input, { asBlob: true }) const carpark = await miniflare.getR2Bucket('CARPARK') - const res = await carpark.get(toBlobKey(carCids[0].multihash)) + const res = await carpark.get(toBlobKey(blobCids[0].multihash)) assert(res) // @ts-expect-error nodejs ReadableStream does not implement ReadableStream interface correctly - const claims = await generateBlockLocationClaims(claimsService.signer, carCids[0], res.body) + const claims = await generateBlockLocationClaims(claimsService.signer, blobCids[0], res.body) claimsService.setClaims(claims) const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${dataCid}/${input[0].path}`) @@ -400,24 +400,24 @@ describe('freeway', () => { it('should GET a byte range of raw block', async () => { const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] // no dudewhere or satnav so only content claims can satisfy the request - const { carCids } = await builder.add(input, { asBlob: true }) + const { blobCids } = await builder.add(input, { asBlob: true }) const carpark = await miniflare.getR2Bucket('CARPARK') - const res0 = await carpark.get(toBlobKey(carCids[0].multihash)) + const res0 = await carpark.get(toBlobKey(blobCids[0].multihash)) assert(res0) - const url = new URL(`https://w3s.link/ipfs/${carCids[0]}?format=raw`) - const claim = await generateLocationClaim(claimsService.signer, carCids[0], url, 0, input[0].content.length) - claimsService.setClaims(new LinkMap([[carCids[0], [claim]]])) + const url = new URL(`https://w3s.link/ipfs/${blobCids[0]}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, blobCids[0], url, 0, input[0].content.length) + claimsService.setClaims(new LinkMap([[blobCids[0], [claim]]])) - const res1 = await carpark.get(toBlobKey(carCids[0].multihash)) + const res1 = await carpark.get(toBlobKey(blobCids[0].multihash)) assert(res1) await /** @type {ReadableStream} */ (res1.body) .pipeThrough(new CARReaderStream()) .pipeTo(new WritableStream({ async write ({ bytes, blockOffset, blockLength }) { - const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${carCids[0]}?format=raw`, { + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${blobCids[0]}?format=raw`, { headers: { Range: `bytes=${blockOffset}-${blockOffset + blockLength - 1}` } @@ -430,7 +430,7 @@ describe('freeway', () => { assert.equal(contentLength, bytes.length) assert.equal(res.headers.get('Content-Range'), `bytes ${blockOffset}-${blockOffset + blockLength - 1}/${input[0].content.length}`) assert.equal(res.headers.get('Content-Type'), 'application/vnd.ipld.raw') - assert.equal(res.headers.get('Etag'), `"${carCids[0]}.raw"`) + assert.equal(res.headers.get('Etag'), `"${blobCids[0]}.raw"`) } })) }) @@ -438,14 +438,14 @@ describe('freeway', () => { it('should GET a multipart byte range of raw block', async () => { const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] // no dudewhere or satnav so only content claims can satisfy the request - const { carCids } = await builder.add(input, { asBlob: true }) + const { blobCids } = await builder.add(input, { asBlob: true }) - const url = new URL(`https://w3s.link/ipfs/${carCids[0]}?format=raw`) - const claim = await generateLocationClaim(claimsService.signer, carCids[0], url, 0, input[0].content.length) - claimsService.setClaims(new LinkMap([[carCids[0], [claim]]])) + const url = new URL(`https://w3s.link/ipfs/${blobCids[0]}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, blobCids[0], url, 0, input[0].content.length) + claimsService.setClaims(new LinkMap([[blobCids[0], [claim]]])) const carpark = await miniflare.getR2Bucket('CARPARK') - const res0 = await carpark.get(toBlobKey(carCids[0].multihash)) + const res0 = await carpark.get(toBlobKey(blobCids[0].multihash)) assert(res0) /** @type {Array} */ @@ -454,7 +454,7 @@ describe('freeway', () => { .pipeThrough(new CARReaderStream()) .pipeTo(new WritableStream({ write (block) { blocks.push(block) } })) - const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${carCids[0]}?format=raw`, { + const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${blobCids[0]}?format=raw`, { headers: { Range: `bytes=${blocks.map(b => `${b.blockOffset}-${b.blockOffset + b.blockLength - 1}`).join(',')}` }