diff --git a/README.md b/README.md index 9eb1063c..fb045aa4 100644 --- a/README.md +++ b/README.md @@ -48,11 +48,10 @@ See the full list of available options at ### Remote URL ### The URL of the original image to load is specified as the remainder of the -path, without any encoding. For example, +path and may be URL encoded. For example, `http://localhost/200/https://willnorris.com/logo.jpg`. -In order to [optimize caching][], it is recommended that URLs not contain query -strings. +If remote image URLs contain query strings, it is recommended to URL-encode them when passing to imageproxy in order to [optimize caching][]. [optimize caching]: http://www.stevesouders.com/blog/2008/08/23/revving-filenames-dont-use-querystring/ @@ -75,6 +74,7 @@ x0.15 | 15% original height, proportional width | 200x,q60 200x,png | 200px wide, converted to PNG format | 200x,png cx175,cw400,ch300,100x | crop to 400x300px starting at (175,0), scale to 100px wide | cx175,cw400,ch300,100x +x | no options, don't transform, just proxy | x The [smart crop feature](https://godoc.org/willnorris.com/go/imageproxy#hdr-Smart_Crop) can best be seen by comparing crops of [this source image][judah-sheets], with diff --git a/data.go b/data.go index 5df4a695..0692cc63 100644 --- a/data.go +++ b/data.go @@ -308,39 +308,36 @@ func (r Request) String() string { // NewRequest parses an http.Request into an imageproxy Request. Options and // the remote image URL are specified in the request path, formatted as: -// /{options}/{remote_url}. Options may be omitted, so a request path may -// simply contain /{remote_url}. The remote URL must be an absolute "http" or -// "https" URL, should not be URL encoded, and may contain a query string. +// /{options}/{remote_url}. Options may not be omitted, but `x` or `0x0` can +// be used as noop option (/x/{remote_url}). +// The remote URL must be an absolute "http(s)" URL unless BaseURL is set. +// The remote URL may be URL encoded. // // Assuming an imageproxy server running on localhost, the following are all // valid imageproxy requests: // // http://localhost/100x200/http://example.com/image.jpg // http://localhost/100x200,r90/http://example.com/image.jpg?foo=bar -// http://localhost//http://example.com/image.jpg -// http://localhost/http://example.com/image.jpg +// http://localhost/x/http://example.com/image.jpg +// http://localhost/x/http%3A%2F%2Fexample.com%2Fimage.jpg func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) { var err error req := &Request{Original: r} path := r.URL.EscapedPath()[1:] // strip leading slash - req.URL, err = parseURL(path) - if err != nil || !req.URL.IsAbs() { - // first segment should be options - parts := strings.SplitN(path, "/", 2) - if len(parts) != 2 { - return nil, URLError{"too few path segments", r.URL} - } - - var err error - req.URL, err = parseURL(parts[1]) - if err != nil { - return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL} - } + // first segment should be options + parts := strings.SplitN(path, "/", 2) + if len(parts) != 2 { + return nil, URLError{"too few path segments", r.URL} + } - req.Options = ParseOptions(parts[0]) + req.URL, err = parseURL(parts[1]) + if err != nil { + return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL} } + req.Options = ParseOptions(parts[0]) + if baseURL != nil { req.URL = baseURL.ResolveReference(req.URL) } @@ -353,8 +350,16 @@ func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) { return nil, URLError{"remote URL must have http or https scheme", r.URL} } - // query string is always part of the remote URL - req.URL.RawQuery = r.URL.RawQuery + // only append original (unencoded) query string + // if we don't have one already. Example: + // http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?more=query + // should be https://example.com/foo/bar?hello=world + // not https://example.com/foo/bar?more=query + if len(r.URL.RawQuery) > 0 && len(req.URL.RawQuery) == 0 { + // query string is always part of the remote URL + req.URL.RawQuery = r.URL.RawQuery + } + return req, nil } @@ -363,6 +368,42 @@ var reCleanedURL = regexp.MustCompile(`^(https?):/+([^/])`) // parseURL parses s as a URL, handling URLs that have been munged by // path.Clean or a webserver that collapses multiple slashes. func parseURL(s string) (*url.URL, error) { + var err error + + // convert http:/example.com to http://example.com s = reCleanedURL.ReplaceAllString(s, "$1://$2") + + s, err = decodeURL(s) + if err != nil { + return nil, err + } + return url.Parse(s) } + +var reAbsURL = regexp.MustCompile(`^https?`) +var reDecodedURL = regexp.MustCompile(`^https?://`) + +func decodeURL(s string) (string, error) { + var err error + // don't try to decode unless looks like abs url + if !reAbsURL.MatchString(s) { + return s, nil + } + + // preserve original value in case we are not able to decode + decodedURL := s + maxDecodeAttempts := 3 + for i := 0; !reDecodedURL.MatchString(decodedURL) && i < maxDecodeAttempts; i++ { + decodedURL, err = url.QueryUnescape(decodedURL) + if err != nil { + return "", URLError{"remote URL could not be decoded", nil} + } + } + if reDecodedURL.MatchString(decodedURL) { + return decodedURL, nil + } else { + // return original value. might be relative url (e.g. https/foo.jpg) + return s, nil + } +} diff --git a/data_test.go b/data_test.go index 80d3c3dd..b6a0de25 100644 --- a/data_test.go +++ b/data_test.go @@ -113,6 +113,9 @@ func TestNewRequest(t *testing.T) { {"http://localhost//example.com/foo", "", emptyOptions, true}, {"http://localhost//ftp://example.com/foo", "", emptyOptions, true}, + // invalid URL because options now required + {"http://localhost/http://example.com/foo", "", emptyOptions, true}, + // invalid options. These won't return errors, but will not fully parse the options { "http://localhost/s/http://example.com/", @@ -125,15 +128,19 @@ func TestNewRequest(t *testing.T) { // valid URLs { - "http://localhost/http://example.com/foo", + "http://localhost//http://example.com/foo", "http://example.com/foo", emptyOptions, false, }, { - "http://localhost//http://example.com/foo", + "http://localhost/x/http://example.com/foo", "http://example.com/foo", emptyOptions, false, }, { - "http://localhost//https://example.com/foo", + "http://localhost/x/http://example.com/foo", + "http://example.com/foo", emptyOptions, false, + }, + { + "http://localhost/0x0/https://example.com/foo", "https://example.com/foo", emptyOptions, false, }, { @@ -141,21 +148,46 @@ func TestNewRequest(t *testing.T) { "http://example.com/foo", Options{Width: 1, Height: 2}, false, }, { - "http://localhost//http://example.com/foo?bar", + "http://localhost/0x0/http://example.com/foo?bar", "http://example.com/foo?bar", emptyOptions, false, }, { - "http://localhost/http:/example.com/foo", + "http://localhost/x/http:/example.com/foo", "http://example.com/foo", emptyOptions, false, }, { - "http://localhost/http:///example.com/foo", + "http://localhost/x/http:///example.com/foo", "http://example.com/foo", emptyOptions, false, }, { // escaped path - "http://localhost/http://example.com/%2C", + "http://localhost/x/http://example.com/%2C", "http://example.com/%2C", emptyOptions, false, }, + // unescaped querystring + { + "http://localhost/x/http://example.com/foo/bar?hello=world", + "http://example.com/foo/bar?hello=world", emptyOptions, false, + }, + // escaped remote including querystring + { + "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld", + "http://example.com/foo/bar?hello=world", emptyOptions, false, + }, + { + "http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld", + "https://example.com/foo/bar?hello=world", emptyOptions, false, + }, + // multi-escaped remote + { + "http://localhost/x/https%25253A%25252F%25252Fexample.com%25252Ffoo%25252Fbar%25253Fhello%25253Dworld", + "https://example.com/foo/bar?hello=world", emptyOptions, false, + }, + // escaped remote containing double escaped url as param + // test that we don't over-decode remote url breaking parameters + { + "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld%26url%3Dhttps%253A%252F%252Fwww.example.com%252F%253Ffoo%253Dbar%2526hello%253Dworld", + "http://example.com/foo/bar?hello=world&url=https%3A%2F%2Fwww.example.com%2F%3Ffoo%3Dbar%26hello%3Dworld", emptyOptions, false, + }, } for _, tt := range tests { @@ -186,16 +218,72 @@ func TestNewRequest(t *testing.T) { } func TestNewRequest_BaseURL(t *testing.T) { - req, _ := http.NewRequest("GET", "/x/path", nil) - base, _ := url.Parse("https://example.com/") - - r, err := NewRequest(req, base) - if err != nil { - t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err) + tests := []struct { + BaseURL string // base url to use + URL string // input URL to parse as an imageproxy request + RemoteURL string // expected URL of remote image parsed from input + Options Options // expected options parsed from input + ExpectError bool // whether an error is expected from NewRequest + }{ + { + "http://example.com/", + "http://localhost/x/foo", + "http://example.com/foo", emptyOptions, false, + }, + { + "http://example.com/hello", + "http://localhost/x//foo/bar", + "http://example.com/foo/bar", emptyOptions, false, + }, + // if BaseURL doesn't have trailing slash + // URL.ResolveReference will strip last directory + { + "http://example.com/hello/", + "http://localhost/x/foo/bar", + "http://example.com/hello/foo/bar", emptyOptions, false, + }, + { + "http://example.com/hello/", + "http://localhost/x/../foo/bar", + "http://example.com/foo/bar", emptyOptions, false, + }, + // relative remote urls should not have URL Decoding even if + // they start with http... (dirname) + { + "http://example.com/hello/", + "http://localhost/x/httpdir/rela%20tive", + "http://example.com/hello/httpdir/rela%20tive", emptyOptions, false, + }, } - want := "https://example.com/path#0x0" - if got := r.String(); got != want { - t.Errorf("NewRequest(%v, %v) returned %q, want %q", req, base, got, want) + for _, tt := range tests { + req, err := http.NewRequest("GET", tt.URL, nil) + if err != nil { + t.Errorf("http.NewRequest(%q) returned error: %v", tt.URL, err) + continue + } + base, err := url.Parse(tt.BaseURL) + if err != nil { + t.Errorf("url.Parse(%q) returned error: %v", tt.BaseURL, err) + continue + } + + r, err := NewRequest(req, base) + if tt.ExpectError { + if err == nil { + t.Errorf("NewRequest(%v, %v) did not return expected error", req, base) + } + continue + } else if err != nil { + t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err) + continue + } + + if got, want := r.URL.String(), tt.RemoteURL; got != want { + t.Errorf("NewRequest(%q, %q) request URL = %v, want %v", tt.URL, tt.BaseURL, got, want) + } + if got, want := r.Options, tt.Options; got != want { + t.Errorf("NewRequest(%q, %q) request options = %v, want %v", tt.URL, tt.BaseURL, got, want) + } } } diff --git a/imageproxy_test.go b/imageproxy_test.go index 86bd6a6d..2d2c8b87 100644 --- a/imageproxy_test.go +++ b/imageproxy_test.go @@ -382,10 +382,10 @@ func TestProxy_ServeHTTP(t *testing.T) { code int // expected response status code }{ {"/favicon.ico", http.StatusOK}, - {"//foo", http.StatusBadRequest}, // invalid request URL - {"/http://bad.test/", http.StatusForbidden}, // Disallowed host - {"/http://good.test/error", http.StatusInternalServerError}, // HTTP protocol error - {"/http://good.test/nocontent", http.StatusNoContent}, // non-OK response + {"/x/foo", http.StatusBadRequest}, // invalid request URL + {"/x/http://bad.test/", http.StatusForbidden}, // Disallowed host + {"/x/http://good.test/error", http.StatusInternalServerError}, // HTTP protocol error + {"/x/http://good.test/nocontent", http.StatusNoContent}, // non-OK response {"/100/http://good.test/png", http.StatusOK}, {"/100/http://good.test/plain", http.StatusForbidden}, // non-image response @@ -413,7 +413,7 @@ func TestProxy_ServeHTTP_is304(t *testing.T) { }, } - req, _ := http.NewRequest("GET", "http://localhost/http://good.test/etag", nil) + req, _ := http.NewRequest("GET", "http://localhost/x/http://good.test/etag", nil) req.Header.Add("If-None-Match", `"tag"`) resp := httptest.NewRecorder() p.ServeHTTP(resp, req)