From 65d3b4732d623860afa0df44c869fcfe51330b47 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Fri, 8 Apr 2022 15:55:56 -0500 Subject: [PATCH] WIP _redirects refactored --- core/corehttp/gateway_handler.go | 150 +++++++++++++++--- .../gateway_handler_unixfs__redirects.go | 139 ++++++++++++++++ go.mod | 4 + go.sum | 6 + test/sharness/t0109-gateway-web-_redirects.sh | 62 ++++++++ 5 files changed, 343 insertions(+), 18 deletions(-) create mode 100644 core/corehttp/gateway_handler_unixfs__redirects.go create mode 100644 test/sharness/t0109-gateway-web-_redirects.sh diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index c59ea766b0f6..a295422fb6ea 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -292,24 +292,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // Resolve path to the final DAG node for the ETag - resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) - switch err { - case nil: - case coreiface.ErrOffline: - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) - return - default: - // if Accept is text/html, see if ipfs-404.html is present - if i.servePretty404IfPresent(w, r, contentPath) { - logger.Debugw("serve pretty 404 if present") - return - } - - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) - return - } - // Detect when explicit Accept header or ?format parameter are present responseFormat, formatParams, err := customResponseFormat(r) if err != nil { @@ -317,6 +299,17 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) + + var ok bool + var resolvedPath ipath.Resolved + if responseFormat == "" { + resolvedPath, contentPath, ok = i.handleUnixfsPathResolution(w, r, responseFormat, contentPath, logger) + } else { + resolvedPath, contentPath, ok = i.handleNonUnixfsPathResolution(w, r, responseFormat, contentPath, logger) + } + if !ok { + return + } trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) // Finish early if client already has matching Etag @@ -934,3 +927,124 @@ func (i *gatewayHandler) handledSetHeaders(w http.ResponseWriter, r *http.Reques return false } + +func (i *gatewayHandler) handleNonUnixfsPathResolution(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, logger *zap.SugaredLogger) (ipath.Resolved, ipath.Path, bool) { + // Resolve the path for the provided contentPath + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + + switch err { + case nil: + return resolvedPath, contentPath, true + case coreiface.ErrOffline: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) + return nil, nil, false + default: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) + return nil, nil, false + } +} + +// Resolve the provided path. +// If we can't resolve the path, then for Unixfs requests, look for a _redirects file in the root CID path. +// If _redirects file exists, attempt to match redirect rules for the path. +// If a rule matches, either redirect or rewrite as determined by the rule. +// For rewrites, we need to attempt to resolve the rewrite path as well, and if it doesn't resolve, this time we just return the error. +func (i *gatewayHandler) handleUnixfsPathResolution(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, logger *zap.SugaredLogger) (ipath.Resolved, ipath.Path, bool) { + // Resolve the path for the provided contentPath + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + + switch err { + case nil: + // TODO: I believe for the force option, we might need to short circuit this, and thus we would need to read the redirects file first + return resolvedPath, contentPath, true + case coreiface.ErrOffline: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) + return nil, nil, false + default: + // If we can't resolve the path + // Only look for _redirects file if we have Unixfs and Origin isolation + if hasOriginIsolation(r) { + // Check for _redirects file and redirect as needed + redirectsFile, err := i.getRedirectsFile(r) + if err != nil { + switch err.(type) { + case resolver.ErrNoLink: + // _redirects files doesn't exist, so don't error + default: + // TODO(JJ): During tests we get multibase.ErrUnsupportedEncoding + // This comes from multibase and I assume is due to a fake or otherwise bad CID being in the test. + internalWebError(w, err) + return nil, nil, false + } + } else { + // _redirects file exists, so parse it and redirect + redirected, newPath, err := i.handleRedirectsFile(w, r, redirectsFile, logger) + if err != nil { + err = fmt.Errorf("trouble processing _redirects file at %q: %w", redirectsFile.String(), err) + internalWebError(w, err) + return nil, nil, false + } + + if redirected { + return nil, nil, false + } + + // 200 is treated as a rewrite, so update the path and continue + if newPath != "" { + // Reassign contentPath and resolvedPath since the URL was rewritten + contentPath = ipath.New(newPath) + resolvedPath, err = i.api.ResolvePath(r.Context(), contentPath) + if err != nil { + internalWebError(w, err) + return nil, nil, false + } + logger.Debugf("_redirects: 200 rewrite. newPath=%v", newPath) + + return resolvedPath, contentPath, true + } + } + } + + // if Accept is text/html, see if ipfs-404.html is present + // This logic isn't documented and will likely be removed at some point. + // Any 404 logic in _redirects above will have already run by this time, so it's really an extra fall back + if i.servePretty404IfPresent(w, r, contentPath) { + logger.Debugw("serve pretty 404 if present") + return nil, nil, false + } + + // Fallback + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) + return nil, nil, false + } +} + +func (i *gatewayHandler) serve404(w http.ResponseWriter, r *http.Request, content404Path ipath.Path) error { + resolved404Path, err := i.api.ResolvePath(r.Context(), content404Path) + if err != nil { + return err + } + + node, err := i.api.Unixfs().Get(r.Context(), resolved404Path) + if err != nil { + return err + } + defer node.Close() + + f, ok := node.(files.File) + if !ok { + return fmt.Errorf("could not convert node for 404 page to file") + } + + size, err := f.Size() + if err != nil { + return fmt.Errorf("could not get size of 404 page") + } + + log.Debugw("using _redirects 404 file", "path", content404Path) + w.Header().Set("Content-Type", "text/html") + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + w.WriteHeader(http.StatusNotFound) + _, err = io.CopyN(w, f, size) + return err +} diff --git a/core/corehttp/gateway_handler_unixfs__redirects.go b/core/corehttp/gateway_handler_unixfs__redirects.go new file mode 100644 index 000000000000..d08028600121 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs__redirects.go @@ -0,0 +1,139 @@ +package corehttp + +import ( + "errors" + "fmt" + "net/http" + gopath "path" + "strings" + + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "github.com/tj/go-redirects" + "github.com/ucarion/urlpath" + "go.uber.org/zap" +) + +func (i *gatewayHandler) handleRedirectsFile(w http.ResponseWriter, r *http.Request, redirectsFilePath ipath.Resolved, logger *zap.SugaredLogger) (bool, string, error) { + // Convert the path into a file node + node, err := i.api.Unixfs().Get(r.Context(), redirectsFilePath) + if err != nil { + return false, "", fmt.Errorf("could not get _redirects node: %v", err) + } + defer node.Close() + + // Convert the node into a file + f, ok := node.(files.File) + if !ok { + return false, "", fmt.Errorf("could not convert _redirects node to file") + } + + // Parse redirect rules from file + redirectRules, err := redirects.Parse(f) + if err != nil { + return false, "", fmt.Errorf("could not parse redirect rules: %v", err) + } + logger.Debugf("redirectRules=%v", redirectRules) + + // Attempt to match a rule to the URL path, and perform the corresponding redirect or rewrite + pathParts := strings.Split(r.URL.Path, "/") + if len(pathParts) > 3 { + // All paths should start with /ipfs/cid/, so get the path after that + urlPath := "/" + strings.Join(pathParts[3:], "/") + rootPath := strings.Join(pathParts[:3], "/") + // Trim off the trailing / + urlPath = strings.TrimSuffix(urlPath, "/") + + logger.Debugf("_redirects: urlPath=", urlPath) + for _, rule := range redirectRules { + // get rule.From, trim trailing slash, ... + fromPath := urlpath.New(strings.TrimSuffix(rule.From, "/")) + logger.Debugf("_redirects: fromPath=%v", strings.TrimSuffix(rule.From, "/")) + match, ok := fromPath.Match(urlPath) + if !ok { + continue + } + + // We have a match! Perform substitutions. + toPath := rule.To + toPath = replacePlaceholders(toPath, match) + toPath = replaceSplat(toPath, match) + + logger.Debugf("_redirects: toPath=%v", toPath) + + // Rewrite + if rule.Status == 200 { + // Prepend the rootPath + toPath = rootPath + rule.To + return false, toPath, nil + } + + // Or 404 + if rule.Status == 404 { + toPath = rootPath + rule.To + content404Path := ipath.New(toPath) + err = i.serve404(w, r, content404Path) + return true, toPath, err + } + + // Or redirect + http.Redirect(w, r, toPath, rule.Status) + return true, toPath, nil + } + } + + // No redirects matched + return false, "", nil +} + +func replacePlaceholders(to string, match urlpath.Match) string { + if len(match.Params) > 0 { + for key, value := range match.Params { + to = strings.ReplaceAll(to, ":"+key, value) + } + } + + return to +} + +func replaceSplat(to string, match urlpath.Match) string { + return strings.ReplaceAll(to, ":splat", match.Trailing) +} + +// Returns a resolved path to the _redirects file located in the root CID path of the requested path +func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, error) { + // r.URL.Path is the full ipfs path to the requested resource, + // regardless of whether path or subdomain resolution is used. + rootPath, err := getRootPath(r.URL.Path) + if err != nil { + return nil, err + } + + path := ipath.New(gopath.Join(rootPath, "_redirects")) + resolvedPath, err := i.api.ResolvePath(r.Context(), path) + if err != nil { + return nil, err + } + return resolvedPath, nil +} + +// Returns the root CID path for the given path +func getRootPath(path string) (string, error) { + if strings.HasPrefix(path, ipfsPathPrefix) && strings.Count(gopath.Clean(path), "/") >= 2 { + parts := strings.Split(path, "/") + return gopath.Join(ipfsPathPrefix, parts[2]), nil + } else { + return "", errors.New("failed to get root CID path") + } +} + +func hasOriginIsolation(r *http.Request) bool { + _, gw := r.Context().Value("gw-hostname").(string) + _, dnslink := r.Context().Value("dnslink-hostname").(string) + + if gw || dnslink { + return true + } else { + return false + } +} diff --git a/go.mod b/go.mod index d89921df1448..6801e815c1b5 100644 --- a/go.mod +++ b/go.mod @@ -102,6 +102,8 @@ require ( github.com/prometheus/client_golang v1.11.0 github.com/stretchr/testify v1.7.0 github.com/syndtr/goleveldb v1.0.0 + github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 // indirect + github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect github.com/whyrusleeping/go-sysinfo v0.0.0-20190219211824-4a357d4b90b1 github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7 go.opencensus.io v0.23.0 @@ -121,4 +123,6 @@ require ( golang.org/x/sys v0.0.0-20211025112917-711f33c9992c ) +replace github.com/tj/go-redirects => ../go-redirects + go 1.16 diff --git a/go.sum b/go.sum index 5163daaa5b3c..27d158ee11ff 100644 --- a/go.sum +++ b/go.sum @@ -1368,9 +1368,14 @@ github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpP github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e h1:T5PdfK/M1xyrHwynxMIVMWLS7f/qHwfslZphxtGnw7s= github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g= +github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= +github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 h1:K11tjwz8zTTSZkz4TUjfLN+y8uJWP38BbyPqZ2yB/Yk= +github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37/go.mod h1:E0E2H2gQA+uoi27VCSU+a/BULPtadQA78q3cpTjZbZw= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= +github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb h1:Ywfo8sUltxogBpFuMOFRrrSifO788kAFxmvVw31PtQQ= +github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb/go.mod h1:ikPs9bRWicNw3S7XpJ8sK/smGwU9WcSVU3dy9qahYBM= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= @@ -1951,6 +1956,7 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o= diff --git a/test/sharness/t0109-gateway-web-_redirects.sh b/test/sharness/t0109-gateway-web-_redirects.sh new file mode 100644 index 000000000000..a0789aed1dc7 --- /dev/null +++ b/test/sharness/t0109-gateway-web-_redirects.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +test_description="Test HTTP Gateway _redirects support" + +. lib/test-lib.sh + +test_init_ipfs +test_launch_ipfs_daemon + +## ============================================================================ +## Test _redirects file support +## ============================================================================ + +# Directory tree crafted to test _redirects file support +test_expect_success "Add the _redirects file test directory" ' + mkdir -p testredirect/ && + echo "index.html" > testredirect/index.html && + echo "one.html" > testredirect/one.html && + echo "two.html" > testredirect/two.html && + echo "^/redirect-one$ /one.html" > testredirect/_redirects && + echo "^/301-redirect-one$ /one.html 301" >> testredirect/_redirects && + echo "^/302-redirect-two$ /two.html 302" >> testredirect/_redirects && + echo "^/200-index$ /index.html 200" >> testredirect/_redirects && + echo "^/*$ /index.html 200" >> testredirect/_redirects && + REDIRECTS_DIR_CID=$(ipfs add -Qr --cid-version 1 testredirect) +' + +REDIRECTS_DIR_HOSTNAME="${REDIRECTS_DIR_CID}.ipfs.localhost:$GWAY_PORT" + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/redirect-one redirects with default of 302, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "302 Found" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/301-redirect-one redirects with 301, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/301-redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "301 Moved Permanently" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/302-redirect-two redirects with 302, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/302-redirect-two" > response && + test_should_contain "two.html" response && + test_should_contain "302 Found" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/200-index returns 200, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/200-index" > response && + test_should_contain "index.html" response && + test_should_contain "200 OK" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry returns 404, since not in _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry" > response && + test_should_contain "404 Not Found" response +' + +test_expect_success "request for http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one returns 404, no _redirects since no origin isolation" ' + curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one" > response && + test_should_contain "404 Not Found" response +' \ No newline at end of file