diff --git a/package-lock.json b/package-lock.json index 5547e4e..97d0a5c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "@ipld/dag-json": "^10.1.7", "@ipld/dag-pb": "^4.0.8", "@web3-storage/content-claims": "^4.0.5", - "@web3-storage/gateway-lib": "^4.1.1", + "@web3-storage/gateway-lib": "^5.0.0", "cardex": "^3.0.0", "dagula": "^7.3.0", "http-range-parse": "^1.0.0", @@ -27,6 +27,7 @@ "@cloudflare/workers-types": "^4.20231218.0", "@ucanto/principal": "^8.1.0", "ava": "^5.3.1", + "byteranges": "^1.1.0", "carbites": "^1.0.6", "carstream": "^2.1.0", "dotenv": "^16.3.1", @@ -1656,6 +1657,15 @@ "resolved": "https://registry.npmjs.org/@handlebars/parser/-/parser-1.1.0.tgz", "integrity": "sha512-rR7tJoSwJ2eooOpYGxGGW95sLq6GXUaS1UtWvN7pei6n2/okYvCGld9vsUTvkl2migxbkszsycwtMf/GEc1k1A==" }, + "node_modules/@httpland/range-parser": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@httpland/range-parser/-/range-parser-1.2.0.tgz", + "integrity": "sha512-8hOuBe2Jyatrmj060orVaRwU5lh5NPzqk5w8L4hraztr3fqurSY4seILUzt8nxYlwp33skVADoEi95LMuo8WQw==", + "dependencies": { + "@miyauci/isx": "1.1.1", + "@miyauci/prelude": "1.0.0" + } + }, "node_modules/@humanwhocodes/config-array": { "version": "0.11.14", "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz", @@ -4048,6 +4058,16 @@ "node": ">=12.18" } }, + "node_modules/@miyauci/isx": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@miyauci/isx/-/isx-1.1.1.tgz", + "integrity": "sha512-I675QDyEPVM8Ya/Yfr908YxhFMyoht6xysqKmEmkuohheyTs1ZO6mzm6tI/ddKfGNDymatwqiMtqfXjLI0seWg==" + }, + "node_modules/@miyauci/prelude": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@miyauci/prelude/-/prelude-1.0.0.tgz", + "integrity": "sha512-jHT170+byyB9G/RWFZ+UgZCVcFAB9+hjnPnK8LZJUbhIgbz3SQXyEmuQIO00mi2FLmCWgHN3rVRFEEcx11woEg==" + }, "node_modules/@multiformats/blake2": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/@multiformats/blake2/-/blake2-2.0.2.tgz", @@ -5487,22 +5507,64 @@ } }, "node_modules/@web3-storage/gateway-lib": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/@web3-storage/gateway-lib/-/gateway-lib-4.1.1.tgz", - "integrity": "sha512-MQiugH09qF6Ja8wm6z6mr3p9iKbvn8LS+q+mK6RTmymbNBpGjx9K4d/FDYbsn1rwD3xsQ2GkVVYRdKmHUZ+8OA==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@web3-storage/gateway-lib/-/gateway-lib-5.0.0.tgz", + "integrity": "sha512-jplyNJpXyv9/EUv0oQr9+35+CoHw6o8V3nzgNremUE9Akt4oyU6UKq6iutb1ARrtjVNNWr9ww8G2kVW3LgkhTA==", "dependencies": { + "@httpland/range-parser": "^1.2.0", "@ipld/car": "^5.2.6", "@web3-storage/handlebars": "^1.0.0", "bytes": "^3.1.2", "chardet": "^2.0.0", - "dagula": "^7.3.0", + "dagula": "^8.0.0", "magic-bytes.js": "^1.8.0", "mrmime": "^1.0.1", "multiformats": "^13.0.1", + "multipart-byte-range": "^2.0.1", "timeout-abort-controller": "^3.0.0", "uint8arrays": "^5.0.1" } }, + "node_modules/@web3-storage/gateway-lib/node_modules/dagula": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/dagula/-/dagula-8.0.0.tgz", + "integrity": "sha512-VtcVOK2idIf2/2Yoi+zviq9OveksiB2cyJAT6FASrzngomBSSN516ym4b8wM6v6aq66gxaBFo9p9D2ZUCcrEDw==", + "dependencies": { + "@chainsafe/libp2p-noise": "^11.0.0", + "@chainsafe/libp2p-yamux": "^4.0.2", + "@ipld/car": "^5.2.6", + "@ipld/dag-cbor": "^9.0.8", + "@ipld/dag-json": "^10.0.0", + "@ipld/dag-pb": "^4.0.8", + "@libp2p/interface-connection": "^3.0.8", + "@libp2p/interface-peer-id": "^2.0.1", + "@libp2p/interface-registrar": "^2.0.8", + "@libp2p/interfaces": "^3.3.1", + "@libp2p/mplex": "^7.1.1", + "@libp2p/tcp": "^6.0.9", + "@libp2p/websockets": "^5.0.3", + "@multiformats/blake2": "^2.0.1", + "@multiformats/multiaddr": "^11.3.0", + "archy": "^1.0.0", + "conf": "^11.0.1", + "debug": "^4.3.4", + "ipfs-unixfs": "^11.1.0", + "ipfs-unixfs-exporter": "^13.3.0", + "it-length-prefixed": "^8.0.4", + "it-pipe": "^2.0.3", + "libp2p": "^0.42.2", + "multiformats": "^13.0.1", + "p-defer": "^4.0.0", + "protobufjs": "^7.0.0", + "sade": "^1.8.1", + "streaming-iterables": "^8.0.1", + "timeout-abort-controller": "^3.0.0", + "varint": "^6.0.0" + }, + "bin": { + "dagula": "bin.js" + } + }, "node_modules/@web3-storage/gateway-lib/node_modules/uint8arrays": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/uint8arrays/-/uint8arrays-5.0.1.tgz", @@ -6255,6 +6317,14 @@ "npm": ">=7.0.0" } }, + "node_modules/byteranges": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/byteranges/-/byteranges-1.1.0.tgz", + "integrity": "sha512-c1bjg97QZleneWOirBABMvzz4qsxb9KoIFYKilQxMovb5N5MwLHnDuVt7k98Q/kL42hI1HcgG67q7yszFZJuqA==", + "engines": { + "node": ">=14" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -6732,8 +6802,8 @@ }, "node_modules/dagula": { "version": "7.3.0", - "resolved": "https://registry.npmjs.org/dagula/-/dagula-7.3.0.tgz", - "integrity": "sha512-ZF+4sg64BP9UV82+2HtgmT/yDEC6shlJgJOu8wGv1o9GyjcOliGsx4cxhxJa8AL0NNsjU4PzgaUaUiNvJPDidg==", + "resolved": "git+ssh://git@github.com/web3-storage/dagula.git#8c0075d5b4441d49600c2676f5d98526e8569196", + "license": "Apache-2.0 OR MIT", "dependencies": { "@chainsafe/libp2p-noise": "^11.0.0", "@chainsafe/libp2p-yamux": "^4.0.2", @@ -6762,7 +6832,7 @@ "p-defer": "^4.0.0", "protobufjs": "^7.0.0", "sade": "^1.8.1", - "streaming-iterables": "^7.0.4", + "streaming-iterables": "^8.0.1", "timeout-abort-controller": "^3.0.0", "varint": "^6.0.0" }, @@ -11355,6 +11425,14 @@ "resolved": "https://registry.npmjs.org/multiformats/-/multiformats-13.0.1.tgz", "integrity": "sha512-bt3R5iXe2O8xpp3wkmQhC73b/lC4S2ihU8Dndwcsysqbydqb8N+bpP116qMcClZ17g58iSIwtXUTcg2zT4sniA==" }, + "node_modules/multipart-byte-range": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/multipart-byte-range/-/multipart-byte-range-2.0.1.tgz", + "integrity": "sha512-WhtupMhoBKS9YzDszVBpeGqwJTBjdqTgXneqo0tJyUwlTn2h7Pbtr+mdaVV554dLU68jICgefaQfqcPkl62m3Q==", + "dependencies": { + "byteranges": "^1.1.0" + } + }, "node_modules/murmurhash3js-revisited": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/murmurhash3js-revisited/-/murmurhash3js-revisited-3.0.0.tgz", @@ -13142,11 +13220,11 @@ "integrity": "sha512-v+dm9bNVfOYsY1OrhaCrmyOcYoSeVvbt+hHZ0Au+T+p1y+0Uyj9aMaGIeUTT6xdpRbWzDeYKvfOslPhggQMcsg==" }, "node_modules/streaming-iterables": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/streaming-iterables/-/streaming-iterables-7.1.0.tgz", - "integrity": "sha512-t2KmiLVhqafTRqGefD98s5XAMskfkfprr/BTzPIZz0kWB23iyR7XUkY03yjUf4aZpAuuV2/2SUOVri3LgKuOKw==", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/streaming-iterables/-/streaming-iterables-8.0.1.tgz", + "integrity": "sha512-yfQdmUB1b+rGLZkD/r6YisT/eNOjZxBAckXKlzYNmRJnwSzHaiScykD8gsQceFcShtK09qAbLhOqvzIpnBPoDQ==", "engines": { - "node": ">=14" + "node": ">=18" } }, "node_modules/streamsearch": { diff --git a/package.json b/package.json index 917903a..12b42ea 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,7 @@ "@ipld/dag-json": "^10.1.7", "@ipld/dag-pb": "^4.0.8", "@web3-storage/content-claims": "^4.0.5", - "@web3-storage/gateway-lib": "^4.1.1", + "@web3-storage/gateway-lib": "^5.0.0", "cardex": "^3.0.0", "dagula": "^7.3.0", "http-range-parse": "^1.0.0", @@ -54,6 +54,7 @@ "@cloudflare/workers-types": "^4.20231218.0", "@ucanto/principal": "^8.1.0", "ava": "^5.3.1", + "byteranges": "^1.1.0", "carbites": "^1.0.6", "carstream": "^2.1.0", "dotenv": "^16.3.1", diff --git a/src/index.js b/src/index.js index 9dafcd0..4939f0b 100644 --- a/src/index.js +++ b/src/index.js @@ -4,7 +4,7 @@ import { withCorsHeaders, withContentDispositionHeader, withErrorHandler, - withHttpGet, + createWithHttpMethod, withCdnCache, withParsedIpfsUrl, withFixedLengthStream, @@ -19,13 +19,15 @@ import { withContentClaimsDagula, withHttpRangeUnsupported, withVersionHeader, - withCarHandler + withCarBlockHandler } from './middleware.js' /** * @typedef {import('./bindings.js').Environment} Environment * @typedef {import('@web3-storage/gateway-lib').IpfsUrlContext} IpfsUrlContext - * @typedef {import('@web3-storage/gateway-lib').DagulaContext} DagulaContext + * @typedef {import('@web3-storage/gateway-lib').BlockContext} BlockContext + * @typedef {import('@web3-storage/gateway-lib').DagContext} DagContext + * @typedef {import('@web3-storage/gateway-lib').UnixfsContext} UnixfsContext */ export default { @@ -37,30 +39,47 @@ export default { withContext, withCorsHeaders, withVersionHeader, - withContentDispositionHeader, withErrorHandler, withParsedIpfsUrl, - withCarHandler, - withHttpRangeUnsupported, - withHttpGet, + createWithHttpMethod('GET', 'HEAD'), + withCarBlockHandler, withContentClaimsDagula, + withFormatRawHandler, + withHttpRangeUnsupported, + withFormatCarHandler, + withContentDispositionHeader, withFixedLengthStream ) - return middleware(handler)(request, env, ctx) + return middleware(handleUnixfs)(request, env, ctx) } } -/** @type {import('@web3-storage/gateway-lib').Handler} */ -async function handler (request, env, ctx) { - const { headers } = request - const { searchParams } = ctx - if (!searchParams) throw new Error('missing URL search params') - - if (searchParams.get('format') === 'raw' || headers.get('Accept')?.includes('application/vnd.ipld.raw')) { - return await handleBlock(request, env, ctx) +/** + * @type {import('@web3-storage/gateway-lib').Middleware} + */ +export function withFormatRawHandler (handler) { + return async (request, env, ctx) => { + const { headers } = request + const { searchParams } = ctx + if (!searchParams) throw new Error('missing URL search params') + if (searchParams.get('format') === 'raw' || headers.get('Accept')?.includes('application/vnd.ipld.raw')) { + return await handleBlock(request, env, ctx) + } + return handler(request, env, ctx) // pass to other handlers } - if (searchParams.get('format') === 'car' || headers.get('Accept')?.includes('application/vnd.ipld.car')) { - return await handleCar(request, env, ctx) +} + +/** + * @type {import('@web3-storage/gateway-lib').Middleware} + */ +export function withFormatCarHandler (handler) { + return async (request, env, ctx) => { + const { headers } = request + const { searchParams } = ctx + if (!searchParams) throw new Error('missing URL search params') + if (searchParams.get('format') === 'car' || headers.get('Accept')?.includes('application/vnd.ipld.car')) { + return await handleCar(request, env, ctx) + } + return handler(request, env, ctx) // pass to other handlers } - return await handleUnixfs(request, env, ctx) } diff --git a/src/lib/blockstore.js b/src/lib/blockstore.js index 87d8430..0afdb9b 100644 --- a/src/lib/blockstore.js +++ b/src/lib/blockstore.js @@ -2,9 +2,11 @@ import { readBlockHead, asyncIterableReader } from '@ipld/car/decoder' import { base58btc } from 'multiformats/bases/base58' import defer from 'p-defer' import { OrderedCarBlockBatcher } from './block-batch.js' +import * as IndexEntry from './dag-index/entry.js' /** * @typedef {import('multiformats').UnknownLink} UnknownLink + * @typedef {import('dagula').Blockstore} Blockstore * @typedef {import('dagula').Block} Block * @typedef {import('@cloudflare/workers-types').R2Bucket} R2Bucket */ @@ -16,6 +18,8 @@ const MAX_ENCODED_BLOCK_LENGTH = (1024 * 1024 * 2) + 39 + 61 * A blockstore that is backed by an R2 bucket which contains CARv2 * MultihashIndexSorted indexes alongside CAR files. It can read DAGs split * across multiple CARs. + * + * @implements {Blockstore} */ export class R2Blockstore { /** @@ -32,6 +36,15 @@ export class R2Blockstore { // console.log(`get ${cid}`) const entry = await this._idx.get(cid) if (!entry) return + + if (IndexEntry.isLocated(entry)) { + const keyAndOptions = IndexEntry.toBucketGet(entry) + if (!keyAndOptions) return + + const res = await this._dataBucket.get(keyAndOptions[0], keyAndOptions[1]) + return res ? { cid, bytes: new Uint8Array(await res.arrayBuffer()) } : undefined + } + const carPath = `${entry.origin}/${entry.origin}.car` const range = { offset: entry.offset } const res = await this._dataBucket.get(carPath, { range }) @@ -51,6 +64,32 @@ export class R2Blockstore { reader.cancel() return { cid, bytes } } + + /** @param {UnknownLink} cid */ + async stat (cid) { + const entry = await this._idx.get(cid) + if (!entry) return + + // stat API exists only for blobs (i.e. location claimed) + if (IndexEntry.isLocated(entry)) { + return { size: entry.site.range.length } + } + } + + /** + * @param {UnknownLink} cid + * @param {import('dagula').AbortOptions & import('dagula').RangeOptions} [options] + */ + async stream (cid, options) { + const entry = await this._idx.get(cid) + if (!entry) return + + const keyAndOptions = IndexEntry.toBucketGet(entry, options) + if (!keyAndOptions) return + + const res = await this._dataBucket.get(keyAndOptions[0], keyAndOptions[1]) + return /** @type {ReadableStream|undefined} */ (res?.body) + } } export class BatchingR2Blockstore extends R2Blockstore { @@ -199,6 +238,12 @@ export class BatchingR2Blockstore extends R2Blockstore { const entry = await this._idx.get(cid) if (!entry) return + // TODO: batch with multipart gyte range request when we switch to reading + // from any URL. + if (IndexEntry.isLocated(entry)) { + return super.get(cid) + } + this.#batcher.add({ carCid: entry.origin, blockCid: cid, offset: entry.offset }) if (!entry.multihash) throw new Error('missing entry multihash') diff --git a/src/lib/dag-index/api.ts b/src/lib/dag-index/api.ts index 1ff44a9..a358135 100644 --- a/src/lib/dag-index/api.ts +++ b/src/lib/dag-index/api.ts @@ -1,11 +1,31 @@ -import { UnknownLink } from 'multiformats/link' +import { MultihashDigest, UnknownLink } from 'multiformats' import { MultihashIndexItem } from 'cardex/multihash-index-sorted/api' import { CARLink } from 'cardex/api' +import { ByteRange } from '@web3-storage/content-claims/client/api' -export interface IndexEntry extends MultihashIndexItem { +/** + * A legacy index entry for which the exact location of the blob that contains + * the block is unknown - assumed to be present in a bucket that freeway has + * access to. + */ +export interface NotLocatedIndexEntry extends MultihashIndexItem { origin: CARLink } +/** + * An index entry where the exact location of the block (URL and byte offset + + * length) has been found via a content claim. + */ +export interface LocatedIndexEntry { + digest: MultihashDigest + site: { + location: URL[], + range: Required + } +} + +export type IndexEntry = NotLocatedIndexEntry | LocatedIndexEntry + export interface Index { get (c: UnknownLink): Promise } diff --git a/src/lib/dag-index/car.js b/src/lib/dag-index/car.js index 1d78ebb..79db37d 100644 --- a/src/lib/dag-index/car.js +++ b/src/lib/dag-index/car.js @@ -4,7 +4,7 @@ import defer from 'p-defer' /** * @typedef {import('multiformats').UnknownLink} UnknownLink - * @typedef {import('./api.js').IndexEntry} IndexEntry + * @typedef {import('./api.js').NotLocatedIndexEntry} NotLocatedIndexEntry * @typedef {import('multiformats').ToString} MultihashString * @typedef {import('./api.js').Index} Index */ @@ -26,7 +26,7 @@ export class MultiCarIndex { /** * @param {UnknownLink} cid - * @returns {Promise} + * @returns {Promise} */ async get (cid) { const deferred = defer() @@ -56,10 +56,10 @@ export class StreamingCarIndex { /** @type {import('../../bindings.js').IndexSource} */ #source - /** @type {Map} */ + /** @type {Map} */ #idx = new Map() - /** @type {Map>>} */ + /** @type {Map>>} */ #promisedIdx = new Map() /** @type {boolean} */ @@ -86,7 +86,7 @@ export class StreamingCarIndex { const { done, value } = await idxReader.read() if (done) break - const entry = /** @type {IndexEntry} */(value) + const entry = /** @type {import('./api.js').NotLocatedIndexEntry} */(value) entry.origin = entry.origin ?? this.#source.origin const key = mhToString(entry.multihash) diff --git a/src/lib/dag-index/content-claims.js b/src/lib/dag-index/content-claims.js index cfe7b91..58add91 100644 --- a/src/lib/dag-index/content-claims.js +++ b/src/lib/dag-index/content-claims.js @@ -1,9 +1,6 @@ /* global ReadableStream */ import * as Link from 'multiformats/link' import * as raw from 'multiformats/codecs/raw' -import { base32 } from 'multiformats/bases/base32' -import * as Digest from 'multiformats/hashes/digest' -import { varint } from 'multiformats' import * as Claims from '@web3-storage/content-claims/client' import { MultihashIndexSortedReader } from 'cardex/multihash-index-sorted' import { Map as LinkMap } from 'lnmap' @@ -11,6 +8,7 @@ import * as CAR from '../car.js' /** * @typedef {import('multiformats').UnknownLink} UnknownLink + * @typedef {import('./api.js').NotLocatedIndexEntry} NotLocatedIndexEntry * @typedef {import('./api.js').IndexEntry} IndexEntry * @typedef {import('./api.js').Index} Index */ @@ -98,14 +96,13 @@ export class ContentClaimsIndex { const claims = await Claims.read(cid, { serviceURL: this.#serviceURL }) for (const claim of claims) { if (claim.type === 'assert/location' && claim.range?.length != null) { - const origin = locationToShardCID(claim.location[0]) - if (!origin) continue - - const { multihash } = cid - // convert offset to CARv2 index offset (start of block header) - const offset = claim.range.offset - (varint.encodingLength(cid.bytes.length + claim.range.length) + cid.bytes.length) - - this.#cache.set(cid, { origin, multihash, digest: multihash.bytes, offset }) + this.#cache.set(cid, { + digest: cid.multihash, + site: { + location: claim.location.map(l => new URL(l)), + range: { offset: claim.range.offset, length: claim.range.length } + } + }) continue } @@ -136,7 +133,11 @@ export class ContentClaimsIndex { const entries = await decodeIndex(content, block.bytes) for (const entry of entries) { - this.#cache.set(Link.create(raw.code, entry.multihash), entry) + const entryCid = Link.create(raw.code, entry.multihash) + // do not overwrite an existing LocatedIndexEntry + if (!this.#cache.has(entryCid)) { + this.#cache.set(entryCid, entry) + } } } break @@ -169,30 +170,7 @@ const decodeIndex = async (origin, bytes) => { while (true) { const { done, value } = await reader.read() if (done) break - entries.push(/** @type {IndexEntry} */({ origin, ...value })) + entries.push(/** @type {NotLocatedIndexEntry} */({ origin, ...value })) } return entries } - -/** - * Attempts to extract a CAR CID from a bucket location URL. - * - * @param {string} key - */ -const locationToShardCID = key => { - const filename = String(key.split('/').at(-1)) - const [hash] = filename.split('.') - try { - // recent buckets encode CAR CID in filename - const cid = Link.parse(hash).toV1() - if (isCARLink(cid)) return cid - throw new Error('not a CAR CID') - } catch (err) { - // older buckets base32 encode a CAR multihash .car - try { - const digestBytes = base32.baseDecode(hash) - const digest = Digest.decode(digestBytes) - return Link.create(CAR.code, digest) - } catch {} - } -} diff --git a/src/lib/dag-index/entry.js b/src/lib/dag-index/entry.js new file mode 100644 index 0000000..5b9f29a --- /dev/null +++ b/src/lib/dag-index/entry.js @@ -0,0 +1,39 @@ +import * as Link from 'multiformats/link' +import { base58btc } from 'multiformats/bases/base58' +import { CAR_CODE } from '../../constants.js' + +/** + * An index entry is "located" if a content claim has specified it's location + * i.e. it is of type `LocatedIndexEntry`. + * + * @param {import('./api.js').IndexEntry} entry + * @returns {entry is import('./api.js').LocatedIndexEntry} + */ +export const isLocated = entry => 'site' in entry + +/** + * @param {import('./api.js').IndexEntry} entry + * @param {import('dagula').RangeOptions} [options] + * @returns {[key: string, options: import('@cloudflare/workers-types').R2GetOptions]|undefined} + */ +export const toBucketGet = (entry, options) => { + if (!isLocated(entry)) return + + // if host is "w3s.link" then content can be found in CARPARK + const url = entry.site.location.find(l => l.hostname === 'w3s.link') + if (!url) return + + const link = Link.parse(url.pathname.split('/')[2]) + + let key + if (link.code === CAR_CODE) { + key = `${link}/${link}.car` + } else { + const digestString = base58btc.encode(link.multihash.bytes) + key = `${digestString}/${digestString}.blob` + } + const first = entry.site.range.offset + (options?.range?.[0] ?? 0) + const last = entry.site.range.offset + (options?.range?.[1] ?? (entry.site.range.length - 1)) + const range = { offset: first, length: last - first + 1 } + return [key, { range }] +} diff --git a/src/middleware.js b/src/middleware.js index 78b6ff2..0c7ba1c 100644 --- a/src/middleware.js +++ b/src/middleware.js @@ -16,7 +16,9 @@ import { handleCarBlock } from './handlers/car-block.js' * @typedef {import('./bindings.js').Environment} Environment * @typedef {import('@web3-storage/gateway-lib').IpfsUrlContext} IpfsUrlContext * @typedef {import('./bindings.js').IndexSourcesContext} IndexSourcesContext - * @typedef {import('@web3-storage/gateway-lib').DagulaContext} DagulaContext + * @typedef {import('@web3-storage/gateway-lib').BlockContext} BlockContext + * @typedef {import('@web3-storage/gateway-lib').DagContext} DagContext + * @typedef {import('@web3-storage/gateway-lib').UnixfsContext} UnixfsContext */ /** @@ -41,11 +43,12 @@ export function withHttpRangeUnsupported (handler) { * * @type {import('@web3-storage/gateway-lib').Middleware} */ -export function withCarHandler (handler) { +export function withCarBlockHandler (handler) { return async (request, env, ctx) => { - const { dataCid } = ctx + const { dataCid, searchParams } = ctx if (!dataCid) throw new Error('missing data CID') - if (dataCid.code !== CAR_CODE) { + // if not CAR codec, or if a different format has been requested... + if (dataCid.code !== CAR_CODE || searchParams.get('format') || request.headers.get('Accept')) { return handler(request, env, ctx) // pass to other handlers } return handleCarBlock(request, env, ctx) @@ -55,7 +58,7 @@ export function withCarHandler (handler) { /** * Creates a dagula instance backed by the R2 blockstore backed by content claims. * - * @type {import('@web3-storage/gateway-lib').Middleware} + * @type {import('@web3-storage/gateway-lib').Middleware} */ export function withContentClaimsDagula (handler) { return async (request, env, ctx) => { @@ -74,7 +77,7 @@ export function withContentClaimsDagula (handler) { const blockstore = new BatchingR2Blockstore(env.CARPARK, index) const dagula = new Dagula(blockstore) - return handler(request, env, { ...ctx, dagula }) + return handler(request, env, { ...ctx, blocks: dagula, dag: dagula, unixfs: dagula }) } } @@ -160,7 +163,7 @@ export function withIndexSources (handler) { /** * Creates a dagula instance backed by the R2 blockstore fallback with index sources. - * @type {import('@web3-storage/gateway-lib').Middleware} + * @type {import('@web3-storage/gateway-lib').Middleware} */ export function withDagulaFallback (handler) { return async (request, env, ctx) => { @@ -194,7 +197,7 @@ export function withDagulaFallback (handler) { } const dagula = new Dagula(blockstore) - return handler(request, env, { ...ctx, dagula }) + return handler(request, env, { ...ctx, blocks: dagula, dag: dagula, unixfs: dagula }) } } diff --git a/test/helpers/builder.js b/test/helpers/builder.js index 60c2fb8..7e7a17f 100644 --- a/test/helpers/builder.js +++ b/test/helpers/builder.js @@ -2,6 +2,8 @@ import { pack } from 'ipfs-car/pack' import * as Link from 'multiformats/link' import { sha256 } from 'multiformats/hashes/sha2' +import { base58btc } from 'multiformats/bases/base58' +import * as raw from 'multiformats/codecs/raw' import { CarIndexer } from '@ipld/car' import { concat } from 'uint8arrays' import { TreewalkCarSplitter } from 'carbites/treewalk' @@ -12,7 +14,7 @@ import * as CAR from './car.js' /** * @typedef {import('multiformats').ToString} MultihashString - * @typedef {import('multiformats').ToString} ShardCID + * @typedef {import('multiformats').ToString} ShardCID * @typedef {number} Offset */ @@ -35,16 +37,6 @@ export class Builder { this.#dudewhere = dudewhere } - /** - * @param {Uint8Array} bytes CAR file bytes - */ - async #writeCar (bytes) { - /** @type {import('cardex/api.js').CARLink} */ - const cid = Link.create(CAR.code, await sha256.digest(bytes)) - await this.#carpark.put(`${cid}/${cid}.car`, bytes) - return cid - } - /** * @param {import('multiformats').Link} cid CAR CID * @param {Uint8Array} bytes CAR file bytes @@ -99,10 +91,10 @@ export class Builder { /** * @param {import('ipfs-car/pack').PackProperties['input']} input - * @param {Omit & { + * @param {Omit & ({ * dudewhere?: boolean * satnav?: boolean - * }} [options] + * } | { asBlob?: boolean })} [options] */ async add (input, options = {}) { const { root, out } = await pack({ @@ -116,26 +108,36 @@ export class Builder { /** @type {import('cardex/api').CARLink[]} */ const carCids = [] + /** @type {import('multiformats').Link[]} */ + const blobCids = [] /** @type {Array<{ cid: import('multiformats').Link, carCid: import('cardex/api').CARLink }>} */ const indexes = [] const splitter = await TreewalkCarSplitter.fromIterable(out, TARGET_SHARD_SIZE) for await (const car of splitter.cars()) { const carBytes = concat(await collect(car)) - const carCid = await this.#writeCar(carBytes) - if (options.satnav ?? true) { - await this.#writeIndex(carCid, carBytes) + + if (options.asBlob) { + const blobCid = Link.create(raw.code, await sha256.digest(carBytes)) + this.#carpark.put(toBlobKey(blobCid.multihash), carBytes) + blobCids.push(blobCid) + } else { + const carCid = Link.create(CAR.code, await sha256.digest(carBytes)) + this.#carpark.put(toCarKey(carCid), carBytes) + if (options.satnav ?? true) { + await this.#writeIndex(carCid, carBytes) + } + indexes.push(await this.#writeIndexCar(carBytes)) + carCids.push(carCid) } - indexes.push(await this.#writeIndexCar(carBytes)) - carCids.push(carCid) } - if (options.dudewhere ?? true) { + if (!options.asBlob && (options.dudewhere ?? true)) { // @ts-ignore old multiformats in ipfs-car await this.#writeLinks(dataCid, carCids) } - return { dataCid, carCids, indexes } + return { dataCid, carCids, indexes, blobCids } } /** @@ -173,6 +175,15 @@ export class Builder { } } +/** @param {import('multiformats').Link} cid */ +export const toCarKey = cid => `${cid}/${cid}.car` + +/** @param {import('multiformats').MultihashDigest} digest */ +export const toBlobKey = digest => { + const digestString = base58btc.encode(digest.bytes) + return `${digestString}/${digestString}.blob` +} + /** * @template T * @param {AsyncIterable} collectable diff --git a/test/helpers/content-claims.js b/test/helpers/content-claims.js index 25a9467..af798a1 100644 --- a/test/helpers/content-claims.js +++ b/test/helpers/content-claims.js @@ -119,10 +119,10 @@ export const generateClaims = async (signer, dataCid, carCid, carStream, indexCi /** * @param {import('@ucanto/interface').Signer} signer - * @param {import('cardex/api').CARLink} carCid + * @param {import('multiformats').Link} shard * @param {ReadableStream} carStream CAR file data */ -export const generateLocationClaims = async (signer, carCid, carStream) => { +export const generateBlockLocationClaims = async (signer, shard, carStream) => { /** @type {Claims} */ const claims = new LinkMap() @@ -130,23 +130,9 @@ export const generateLocationClaims = async (signer, carCid, carStream) => { .pipeThrough(new CARReaderStream()) .pipeTo(new WritableStream({ async write ({ cid, blockOffset, blockLength }) { - const invocation = Assert.location.invoke({ - issuer: signer, - audience: signer, - with: signer.did(), - nb: { - content: cid, - location: [ - /** @type {import('@ucanto/interface').URI<'http:'>} */ - (`http://localhost/${carCid}/${carCid}.car`) - ], - range: { offset: blockOffset, length: blockLength } - } - }) - const blocks = claims.get(cid) ?? [] - // @ts-expect-error - blocks.push(await encode(invocation)) + const location = new URL(`https://w3s.link/ipfs/${shard}?format=raw`) + blocks.push(await generateLocationClaim(signer, shard, location, blockOffset, blockLength)) claims.set(cid, blocks) } })) @@ -154,9 +140,33 @@ export const generateLocationClaims = async (signer, carCid, carStream) => { return claims } +/** + * @param {import('@ucanto/interface').Signer} signer + * @param {import('multiformats').UnknownLink} content + * @param {URL} location + * @param {number} offset + * @param {number} length + */ +export const generateLocationClaim = async (signer, content, location, offset, length) => { + const invocation = Assert.location.invoke({ + issuer: signer, + audience: signer, + with: signer.did(), + nb: { + content, + location: [ + // @ts-expect-error string is not ${string}:$string + location.toString() + ], + range: { offset, length } + } + }) + return await encode(invocation) +} + /** * Encode a claim to a block. - * @param {import('@ucanto/interface').IssuedInvocationView} invocation + * @param {import('@ucanto/interface').IPLDViewBuilder} invocation */ const encode = async invocation => { const view = await invocation.buildIPLDView() @@ -179,7 +189,7 @@ export const mockClaimsService = async () => { const server = http.createServer(async (req, res) => { callCount++ const content = Link.parse(String(req.url?.split('/')[2])) - const blocks = claims.get(content) ?? [] + const blocks = [...claims.get(content) ?? []] const readable = new ReadableStream({ pull (controller) { const block = blocks.shift() diff --git a/test/index.spec.js b/test/index.spec.js index 3375801..9b81c74 100644 --- a/test/index.spec.js +++ b/test/index.spec.js @@ -1,12 +1,19 @@ import { describe, before, beforeEach, after, it } from 'node:test' import assert from 'node:assert' +import { Buffer } from 'node:buffer' import { randomBytes } from 'node:crypto' import { Miniflare } from 'miniflare' import { equals } from 'uint8arrays' import { CarIndexer, CarReader } from '@ipld/car' -import { Builder } from './helpers/builder.js' +import * as Link from 'multiformats/link' +import { sha256 } from 'multiformats/hashes/sha2' +import * as raw from 'multiformats/codecs/raw' +import { Map as LinkMap } from 'lnmap' +import { CARReaderStream } from 'carstream' +import * as ByteRanges from 'byteranges' +import { Builder, toBlobKey } from './helpers/builder.js' import { MAX_CAR_BYTES_IN_MEMORY } from '../src/constants.js' -import { generateClaims, generateLocationClaims, mockClaimsService } from './helpers/content-claims.js' +import { generateClaims, generateBlockLocationClaims, mockClaimsService, generateLocationClaim } from './helpers/content-claims.js' describe('freeway', () => { /** @type {Miniflare} */ @@ -221,17 +228,14 @@ describe('freeway', () => { it('should use location content claim', async () => { const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] // no dudewhere or satnav so only content claims can satisfy the request - const { dataCid, carCids } = await builder.add(input, { - dudewhere: true, - satnav: true - }) + const { dataCid, blobCids } = await builder.add(input, { asBlob: true }) const carpark = await miniflare.getR2Bucket('CARPARK') - const res = await carpark.get(`${carCids[0]}/${carCids[0]}.car`) + const res = await carpark.get(toBlobKey(blobCids[0].multihash)) assert(res) // @ts-expect-error nodejs ReadableStream does not implement ReadableStream interface correctly - const claims = await generateLocationClaims(claimsService.signer, carCids[0], res.body) + const claims = await generateBlockLocationClaims(claimsService.signer, blobCids[0], res.body) claimsService.setClaims(claims) const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${dataCid}/${input[0].path}`) @@ -342,4 +346,133 @@ describe('freeway', () => { assert.equal(res.headers.get('Content-Type'), 'application/vnd.ipld.car; version=1;') assert.equal(res.headers.get('Etag'), `"${carCids[0]}"`) }) + + it('should GET a raw block', async () => { + const input = randomBytes(138) + const cid = Link.create(raw.code, await sha256.digest(input)) + + const carpark = await miniflare.getR2Bucket('CARPARK') + const blobKey = toBlobKey(cid.multihash) + await carpark.put(blobKey, input) + + const url = new URL(`https://w3s.link/ipfs/${cid}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, cid, url, 0, input.length) + claimsService.setClaims(new LinkMap([[cid, [claim]]])) + + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${cid}?format=raw`) + assert(res.ok) + + const output = new Uint8Array(await res.arrayBuffer()) + assert.equal(output.length, input.length) + + const contentLength = parseInt(res.headers.get('Content-Length') ?? '0') + assert(contentLength) + assert.equal(contentLength, input.length) + assert.equal(res.headers.get('Content-Type'), 'application/vnd.ipld.raw') + assert.equal(res.headers.get('Etag'), `"${cid}.raw"`) + }) + + it('should HEAD a raw block', async () => { + const input = randomBytes(138) + const cid = Link.create(raw.code, await sha256.digest(input)) + + const carpark = await miniflare.getR2Bucket('CARPARK') + const blobKey = toBlobKey(cid.multihash) + await carpark.put(blobKey, input) + + const url = new URL(`https://w3s.link/ipfs/${cid}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, cid, url, 0, input.length) + claimsService.setClaims(new LinkMap([[cid, [claim]]])) + + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${cid}?format=raw`, { + method: 'HEAD' + }) + assert(res.ok) + + const contentLength = parseInt(res.headers.get('Content-Length') ?? '0') + assert(contentLength) + + assert.equal(contentLength, input.length) + assert.equal(res.headers.get('Accept-Ranges'), 'bytes') + assert.equal(res.headers.get('Etag'), `"${cid}.raw"`) + }) + + it('should GET a byte range of raw block', async () => { + const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] + // no dudewhere or satnav so only content claims can satisfy the request + const { blobCids } = await builder.add(input, { asBlob: true }) + + const carpark = await miniflare.getR2Bucket('CARPARK') + const res0 = await carpark.get(toBlobKey(blobCids[0].multihash)) + assert(res0) + + const url = new URL(`https://w3s.link/ipfs/${blobCids[0]}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, blobCids[0], url, 0, input[0].content.length) + claimsService.setClaims(new LinkMap([[blobCids[0], [claim]]])) + + const res1 = await carpark.get(toBlobKey(blobCids[0].multihash)) + assert(res1) + + await /** @type {ReadableStream} */ (res1.body) + .pipeThrough(new CARReaderStream()) + .pipeTo(new WritableStream({ + async write ({ bytes, blockOffset, blockLength }) { + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${blobCids[0]}?format=raw`, { + headers: { + Range: `bytes=${blockOffset}-${blockOffset + blockLength - 1}` + } + }) + assert(res.ok) + assert(equals(new Uint8Array(await res.arrayBuffer()), bytes)) + + const contentLength = parseInt(res.headers.get('Content-Length') ?? '0') + assert(contentLength) + assert.equal(contentLength, bytes.length) + assert.equal(res.headers.get('Content-Range'), `bytes ${blockOffset}-${blockOffset + blockLength - 1}/${input[0].content.length}`) + assert.equal(res.headers.get('Content-Type'), 'application/vnd.ipld.raw') + assert.equal(res.headers.get('Etag'), `"${blobCids[0]}.raw"`) + } + })) + }) + + it('should GET a multipart byte range of raw block', async () => { + const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }] + // no dudewhere or satnav so only content claims can satisfy the request + const { blobCids } = await builder.add(input, { asBlob: true }) + + const url = new URL(`https://w3s.link/ipfs/${blobCids[0]}?format=raw`) + const claim = await generateLocationClaim(claimsService.signer, blobCids[0], url, 0, input[0].content.length) + claimsService.setClaims(new LinkMap([[blobCids[0], [claim]]])) + + const carpark = await miniflare.getR2Bucket('CARPARK') + const res0 = await carpark.get(toBlobKey(blobCids[0].multihash)) + assert(res0) + + /** @type {Array} */ + const blocks = [] + await /** @type {ReadableStream} */ (res0.body) + .pipeThrough(new CARReaderStream()) + .pipeTo(new WritableStream({ write (block) { blocks.push(block) } })) + + const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${blobCids[0]}?format=raw`, { + headers: { + Range: `bytes=${blocks.map(b => `${b.blockOffset}-${b.blockOffset + b.blockLength - 1}`).join(',')}` + } + }) + assert(res1.ok) + + const contentType = res1.headers.get('Content-Type') + assert(contentType) + + const boundary = contentType.replace('multipart/byteranges; boundary=', '') + const body = Buffer.from(await res1.arrayBuffer()) + + const parts = ByteRanges.parse(body, boundary) + assert.equal(parts.length, blocks.length) + + for (let i = 0; i < parts.length; i++) { + assert.equal(parts[i].type, 'application/vnd.ipld.raw') + assert(equals(parts[i].octets, blocks[i]?.bytes)) + } + }) })