Skip to content

Commit

Permalink
Merge pull request #31 from filecoin-saturn/cid-detection
Browse files Browse the repository at this point in the history
fix: detect cids in query parameters
  • Loading branch information
guanzo authored Jan 19, 2024
2 parents ec2263a + ba22e41 commit b294f39
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 51 deletions.
14 changes: 7 additions & 7 deletions src/sw/controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { v4 as uuidv4 } from 'uuid'
import * as Sentry from '@sentry/browser'

import { Interceptor } from './interceptor.js'
import { findCIDInURL } from '../utils.js'
import { findCIDPathInURL } from '../utils.js'

const FILTERED_HOSTS = [
'images.studio.metaplex.com',
Expand Down Expand Up @@ -48,11 +48,11 @@ export class Controller {
}

const { url } = event.request
const cid = findCIDInURL(url)
const cidPath = findCIDPathInURL(url)

if (cid) {
debug('cid', cid, url)
event.respondWith(fetchCID(cid, this.saturn, this.clientId, event))
if (cidPath) {
debug('cidPath', cidPath, url)
event.respondWith(fetchCID(cidPath, this.saturn, this.clientId, event))
}
})
}
Expand All @@ -76,12 +76,12 @@ function getClientKey() {
return clientKey
}

async function fetchCID (cid, saturn, clientId, event) {
async function fetchCID(cidPath, saturn, clientId, event) {
let response = null
const { request } = event

try {
const interceptor = new Interceptor(cid, saturn, clientId, event)
const interceptor = new Interceptor(cidPath, saturn, clientId, event)
response = await interceptor.fetch()
} catch (err) {
debug(`${request.url}: fetchCID err %O`, err)
Expand Down
7 changes: 2 additions & 5 deletions src/sw/interceptor.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@ import toIterable from 'browser-readablestream-to-it'
import createDebug from 'debug'
import * as Sentry from '@sentry/browser'

import { getCidPathFromURL } from '../utils.js'

const debug = createDebug('sw')
const cl = console.log

export class Interceptor {
static nocache = false // request/response skips L1 cache entirely
static bypasscache = false // request skips L1 cache, response gets cached.

constructor(cid, saturn, clientId, event) {
this.cid = cid
this.cidPath = getCidPathFromURL(event.request.url, cid)
constructor(cidPath, saturn, clientId, event) {
this.cidPath = cidPath
this.saturn = saturn
this.clientId = clientId
this.event = event
Expand Down
74 changes: 55 additions & 19 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,67 @@ export class Deferred {
}

// Modified from https://github.com/PinataCloud/ipfs-gateway-tools/blob/34533f3d5f3c0dd616327e2e5443072c27ea569d/src/index.js#L6
export function findCIDInURL (url) {
const splitUrl = url.split('?')[0].split('/')
for (const split of splitUrl) {
if (isIPFS.cid(split)) {
return split
}
const splitOnDot = split.split('.')[0]
if(isIPFS.cid(splitOnDot)) {
return splitOnDot
export function findCIDPathInURL(url) {
let urlObj
try {
urlObj = new URL(url)
} catch (err) {
return null
}

let cid = null
let path = null

const { hostname, pathname, searchParams, href } = urlObj

const searchStrings = [
hostname + pathname, // checks for path based or subdomain based cids.
...searchParams.values(), // params could contain cid URLs, e.g. ?url=ipfs.io/ipfs/<cid>
]

for (const str of searchStrings) {
const result = findCIDPathInUrlComponent(str)

// sanity check if parsed cid appears in URL
if (result.cid && href.includes(result.cid)) {
({ cid, path } = result)
break
}
}

return null
const cidPath = path ? `${cid}/${path}` : cid

return cidPath
}

export function getCidPathFromURL(url, cid) {
const { hostname, pathname } = new URL(url)
let cidPath
function findCIDPathInUrlComponent(str) {
let cid = null
let path = null

const splitStr = str.replace(/https?:\/\//, '').split('/')
// Heuristic to check if the first segment is a domain.
const isMaybeHost = splitStr[0].includes('.')

// Assumes the rest of the segments after the cid form the file path.
const segmentsToPath = i => splitStr.slice(i).join('/') || null

for (let i = 0; i < splitStr.length; i++) {
const segment = splitStr[i]
if (isIPFS.cid(segment)) {
cid = segment
path = segmentsToPath(i + 1)
break
}

if (pathname.startsWith('/ipfs/')) {
cidPath = pathname.replace('/ipfs/', '')
} else if (hostname.includes(cid)) {
// https://<cid>.ipfs.dweb.link/cat.png -> https://saturn.ms/ipfs/<cid>/cat.png
cidPath = cid + pathname
const splitOnDot = segment.split('.')[0]
if(isIPFS.cid(splitOnDot)) {
cid = splitOnDot
if (isMaybeHost) {
path = segmentsToPath(1)
}
break
}
}

return cidPath
return { cid, path }
}
86 changes: 66 additions & 20 deletions test/utils.spec.js
Original file line number Diff line number Diff line change
@@ -1,41 +1,87 @@
import assert from 'node:assert/strict'
import { describe, it } from 'node:test'
import { findCIDInURL, getCidPathFromURL } from '#src/utils.js'
import { findCIDPathInURL } from '#src/utils.js'

describe('controller', () => {
it('should find cid in the subdomain', () => {
it('finds the cid in the subdomain', () => {
const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym'
const url = `https://${cid}.ipfs.dweb.link`

const foundCid = findCIDInURL(url)
assert.strictEqual(foundCid, cid)
assert.strictEqual(findCIDPathInURL(url), cid)
})

it('should find cid in the url path', () => {
it('finds the cidPath in the subdomain', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const path = 'test/cat.png'
const cidPath = `${cid}/${path}`
const url = `https://${cid}.ipfs.dweb.link/${path}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the cid in the url path', () => {
const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV'
const url = `https://ipfs.io/ipfs/${cid}`

const foundCid = findCIDInURL(url)
assert.strictEqual(foundCid, cid)
assert.strictEqual(findCIDPathInURL(url), cid)
})

it('should find cidPath in the subdomain', () => {
const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym'
const path = 'hello/world.png'
const cidPath = `${cid}/${path}`
const url = `https://${cid}.ipfs.dweb.link/${path}`
it('finds the cidPath in the url path', () => {
const cidPath = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV/cat.png'
const url = `https://ipfs.io/ipfs/${cidPath}`

const foundCidPath = getCidPathFromURL(url, cid)
assert.strictEqual(foundCidPath, cidPath)
assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('should find cidPath in the url path', () => {
const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV'
const path = 'hello/world.png'
it('finds the cid in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const url = `https://proxy.com/?url=ipfs.io%2Fipfs%2F${cid}/`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the cidPath in an encoded query param', () => {
const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png'
const url = `https://proxy.com/?url=https%3A%2F%2Fipfs.io%2Fipfs%2F${cidPath}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the subdomain cid in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const param = `${cid}.ipfs.dweb.link`
const url = `https://proxy.com/?url=${param}`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the subdomain cidPath in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const path = 'dog/cow/cat.png'
const cidPath = `${cid}/${path}`
const url = `https://ipfs.io/ipfs/${cid}/${path}`
const param = `https%3A%2F%2F${cid}.ipfs.dweb.link/${path}`
const url = `https://proxy.com/?url=${param}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the plain cid (no /ipfs/ prefix) in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const url = `https://proxy.com/?cid=${cid}`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the plain cidPath (no /ipfs/ prefix) in an encoded query param', () => {
const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png'
const url = `https://proxy.com/?cid=${cidPath}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('returns null if cid not found', () => {
const url = 'https://example.com/hello/world.png'

const foundCidPath = getCidPathFromURL(url, cid)
assert.strictEqual(foundCidPath, cidPath)
assert.strictEqual(findCIDPathInURL(url), null)
})
})

0 comments on commit b294f39

Please sign in to comment.