From 1201687da268c11459891a80ca1972aeaca8db88 Mon Sep 17 00:00:00 2001 From: Nicolas George Date: Thu, 30 Jul 2020 00:02:10 +0200 Subject: [PATCH] lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose(). Also add and update some tests. Change the semantic a little, because for filesytem paths symlinks complicate things. See the comments in the code for detail. Fix trac tickets #8813 and 8814. --- libavformat/tests/url.c | 60 ++++++++- libavformat/url.c | 261 ++++++++++++++++++++-------------------- libavformat/url.h | 4 +- tests/ref/fate/url | 54 ++++++++- 4 files changed, 245 insertions(+), 134 deletions(-) diff --git a/libavformat/tests/url.c b/libavformat/tests/url.c index e7d259ab7dabf..2440ae08bc555 100644 --- a/libavformat/tests/url.c +++ b/libavformat/tests/url.c @@ -49,7 +49,13 @@ static void test_decompose(const char *url) static void test(const char *base, const char *rel) { char buf[200], buf2[200]; - ff_make_absolute_url(buf, sizeof(buf), base, rel); + int ret; + + ret = ff_make_absolute_url(buf, sizeof(buf), base, rel); + if (ret < 0) { + printf("%50s %-20s => error %s\n", base, rel, av_err2str(ret)); + return; + } printf("%50s %-20s => %s\n", base, rel, buf); if (base) { /* Test in-buffer replacement */ @@ -104,6 +110,58 @@ int main(void) test("http://server/foo/bar", "/../../../../../other/url"); test("http://server/foo/bar", "/test/../../../../../other/url"); test("http://server/foo/bar", "/test/../../test/../../../other/url"); + test("http://server/foo/bar", "file:../baz/qux"); + test("http://server/foo//bar/", "../../"); + test("file:../tmp/foo", "../bar/"); + test("file:../tmp/foo", "file:../bar/"); + test("http://server/foo/bar", "./"); + test("http://server/foo/bar", ".dotfile"); + test("http://server/foo/bar", "..doubledotfile"); + test("http://server/foo/bar", "double..dotfile"); + test("http://server/foo/bar", "doubledotfile.."); + + /* From https://tools.ietf.org/html/rfc3986#section-5.4 */ + test("http://a/b/c/d;p?q", "g:h"); // g:h + test("http://a/b/c/d;p?q", "g"); // http://a/b/c/g + test("http://a/b/c/d;p?q", "./g"); // http://a/b/c/g + test("http://a/b/c/d;p?q", "g/"); // http://a/b/c/g/ + test("http://a/b/c/d;p?q", "/g"); // http://a/g + test("http://a/b/c/d;p?q", "//g"); // http://g + test("http://a/b/c/d;p?q", "?y"); // http://a/b/c/d;p?y + test("http://a/b/c/d;p?q", "g?y"); // http://a/b/c/g?y + test("http://a/b/c/d;p?q", "#s"); // http://a/b/c/d;p?q#s + test("http://a/b/c/d;p?q", "g#s"); // http://a/b/c/g#s + test("http://a/b/c/d;p?q", "g?y#s"); // http://a/b/c/g?y#s + test("http://a/b/c/d;p?q", ";x"); // http://a/b/c/;x + test("http://a/b/c/d;p?q", "g;x"); // http://a/b/c/g;x + test("http://a/b/c/d;p?q", "g;x?y#s"); // http://a/b/c/g;x?y#s + test("http://a/b/c/d;p?q", ""); // http://a/b/c/d;p?q + test("http://a/b/c/d;p?q", "."); // http://a/b/c/ + test("http://a/b/c/d;p?q", "./"); // http://a/b/c/ + test("http://a/b/c/d;p?q", ".."); // http://a/b/ + test("http://a/b/c/d;p?q", "../"); // http://a/b/ + test("http://a/b/c/d;p?q", "../g"); // http://a/b/g + test("http://a/b/c/d;p?q", "../.."); // http://a/ + test("http://a/b/c/d;p?q", "../../"); // http://a/ + test("http://a/b/c/d;p?q", "../../g"); // http://a/g + test("http://a/b/c/d;p?q", "../../../g"); // http://a/g + test("http://a/b/c/d;p?q", "../../../../g"); // http://a/g + test("http://a/b/c/d;p?q", "/./g"); // http://a/g + test("http://a/b/c/d;p?q", "/../g"); // http://a/g + test("http://a/b/c/d;p?q", "g."); // http://a/b/c/g. + test("http://a/b/c/d;p?q", ".g"); // http://a/b/c/.g + test("http://a/b/c/d;p?q", "g.."); // http://a/b/c/g.. + test("http://a/b/c/d;p?q", "..g"); // http://a/b/c/..g + test("http://a/b/c/d;p?q", "./../g"); // http://a/b/g + test("http://a/b/c/d;p?q", "./g/."); // http://a/b/c/g/ + test("http://a/b/c/d;p?q", "g/./h"); // http://a/b/c/g/h + test("http://a/b/c/d;p?q", "g/../h"); // http://a/b/c/h + test("http://a/b/c/d;p?q", "g;x=1/./y"); // http://a/b/c/g;x=1/y + test("http://a/b/c/d;p?q", "g;x=1/../y"); // http://a/b/c/y + test("http://a/b/c/d;p?q", "g?y/./x"); // http://a/b/c/g?y/./x + test("http://a/b/c/d;p?q", "g?y/../x"); // http://a/b/c/g?y/../x + test("http://a/b/c/d;p?q", "g#s/./x"); // http://a/b/c/g#s/./x + test("http://a/b/c/d;p?q", "g#s/../x"); // http://a/b/c/g#s/../x printf("\nTesting av_url_split:\n"); test2("/foo/bar"); diff --git a/libavformat/url.c b/libavformat/url.c index b92409f6b2f38..3c858f0257c8c 100644 --- a/libavformat/url.c +++ b/libavformat/url.c @@ -149,146 +149,149 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end) return 0; } -static void trim_double_dot_url(char *buf, const char *rel, int size) +static int append_path(char *root, char *out_end, char **rout, + const char *in, const char *in_end) { - const char *p = rel; - const char *root = rel; - char tmp_path[MAX_URL_SIZE] = {0, }; - char *sep; - char *node; - - /* Get the path root of the url which start by "://" */ - if (p && (sep = strstr(p, "://"))) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; - } - - /* set new current position if the root node is changed */ - p = root; - while (p && (node = strstr(p, ".."))) { - av_strlcat(tmp_path, p, node - p + strlen(tmp_path)); - p = node + 3; - sep = strrchr(tmp_path, '/'); - if (sep) - sep[0] = '\0'; - else - tmp_path[0] = '\0'; + char *out = *rout; + const char *d, *next; + + if (in < in_end && *in == '/') + in++; /* already taken care of */ + while (in < in_end) { + d = find_delim("/", in, in_end); + next = d + (d < in_end && *d == '/'); + if (d - in == 1 && in[0] == '.') { + /* skip */ + } else if (d - in == 2 && in[0] == '.' && in[1] == '.') { + av_assert1(out[-1] == '/'); + if (out - root > 1) + while (out > root && (--out)[-1] != '/'); + } else { + if (out_end - out < next - in) + return AVERROR(ENOMEM); + memmove(out, in, next - in); + out += next - in; + } + in = next; } - - if (!av_stristart(p, "/", NULL) && root != rel) - av_strlcat(tmp_path, "/", size); - - av_strlcat(tmp_path, p, size); - /* start set buf after temp path process. */ - av_strlcpy(buf, rel, root - rel + 1); - - if (!av_stristart(tmp_path, "/", NULL) && root != rel) - av_strlcat(buf, "/", size); - - av_strlcat(buf, tmp_path, size); + *rout = out; + return 0; } -void ff_make_absolute_url(char *buf, int size, const char *base, +int ff_make_absolute_url(char *buf, int size, const char *base, const char *rel) { - char *sep, *path_query; - char *root, *p; - char tmp_path[MAX_URL_SIZE]; - - memset(tmp_path, 0, sizeof(tmp_path)); - /* Absolute path, relative to the current server */ - if (base && strstr(base, "://") && rel[0] == '/') { - if (base != buf) - av_strlcpy(buf, base, size); - sep = strstr(buf, "://"); - if (sep) { - /* Take scheme from base url */ - if (rel[1] == '/') { - sep[1] = '\0'; - } else { - /* Take scheme and host from base url */ - sep += 3; - sep = strchr(sep, '/'); - if (sep) - *sep = '\0'; - } - } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; + URLComponents ub, uc; + char *out, *out_end, *path; + const char *keep, *base_path_end; + int use_base_path, simplify_path = 0, ret; + + /* This is tricky. + For HTTP, http://server/site/page + ../media/file + should resolve into http://server/media/file + but for filesystem access, dir/playlist + ../media/file + should resolve into dir/../media/file + because dir could be a symlink, and .. points to + the actual parent of the target directory. + + We'll consider that URLs with an actual scheme and authority, + i.e. starting with scheme://, need parent dir simplification, + while bare paths or pseudo-URLs starting with proto: without + the double slash do not. + + For real URLs, the processing is similar to the algorithm described + here: + https://tools.ietf.org/html/rfc3986#section-5 + */ + + if (!size) + return AVERROR(ENOMEM); + out = buf; + out_end = buf + size - 1; + + if (!base) + base = ""; + if ((ret = ff_url_decompose(&ub, base, NULL) < 0) || + (ret = ff_url_decompose(&uc, rel, NULL) < 0)) + goto error; + + keep = ub.url; +#define KEEP(component, also) do { \ + if (uc.url_component_end_##component == uc.url && \ + ub.url_component_end_##component > keep) { \ + keep = ub.url_component_end_##component; \ + also \ + } \ + } while (0) + KEEP(scheme, ); + KEEP(authority_full, simplify_path = 1;); + KEEP(path,); + KEEP(query,); + KEEP(fragment,); +#undef KEEP +#define COPY(start, end) do { \ + size_t len = end - start; \ + if (len > out_end - out) { \ + ret = AVERROR(ENOMEM); \ + goto error; \ + } \ + memmove(out, start, len); \ + out += len; \ + } while (0) + COPY(ub.url, keep); + COPY(uc.url, uc.path); + + use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path; + if (uc.path > uc.url) + use_base_path = 0; + if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/') + use_base_path = 0; + if (use_base_path) { + base_path_end = ub.url_component_end_path; + if (URL_COMPONENT_HAVE(uc, path)) + while (base_path_end > ub.path && base_path_end[-1] != '/') + base_path_end--; } - /* If rel actually is an absolute url, just copy it */ - if (!base || strstr(rel, "://") || rel[0] == '/') { - memset(buf, 0, size); - trim_double_dot_url(buf, rel, size); - return; - } - if (base != buf) - av_strlcpy(buf, base, size); - - /* Strip off any query string from base */ - path_query = strchr(buf, '?'); - if (path_query) - *path_query = '\0'; - - /* Is relative path just a new query part? */ - if (rel[0] == '?') { - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; - } - - root = p = buf; - /* Get the path root of the url which start by "://" */ - if (p && strstr(p, "://")) { - sep = strstr(p, "://"); - if (sep) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; + if (keep > ub.path) + simplify_path = 0; + if (URL_COMPONENT_HAVE(uc, scheme)) + simplify_path = 0; + if (URL_COMPONENT_HAVE(uc, authority)) + simplify_path = 1; + /* No path at all, leave it */ + if (!use_base_path && !URL_COMPONENT_HAVE(uc, path)) + simplify_path = 0; + + if (simplify_path) { + const char *root = "/"; + COPY(root, root + 1); + path = out; + if (use_base_path) { + ret = append_path(path, out_end, &out, ub.path, base_path_end); + if (ret < 0) + goto error; } + if (URL_COMPONENT_HAVE(uc, path)) { + ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path); + if (ret < 0) + goto error; + } + } else { + if (use_base_path) + COPY(ub.path, base_path_end); + COPY(uc.path, uc.url_component_end_path); } - /* Remove the file name from the base url */ - sep = strrchr(buf, '/'); - if (sep && sep <= root) - sep = root; - - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - while (av_strstart(rel, "..", NULL) && sep) { - /* Remove the path delimiter at the end */ - if (sep > root) { - sep[0] = '\0'; - sep = strrchr(buf, '/'); - } + COPY(uc.url_component_end_path, uc.end); +#undef COPY + *out = 0; + return 0; - /* If the next directory name to pop off is "..", break here */ - if (!strcmp(sep ? &sep[1] : buf, "..")) { - /* Readd the slash we just removed */ - av_strlcat(buf, "/", size); - break; - } - /* Cut off the directory name */ - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - rel += 3; - } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); +error: + snprintf(buf, size, "invalid:%s", + ret == AVERROR(ENOMEM) ? "truncated" : + ret == AVERROR(EINVAL) ? "syntax_error" : ""); + return ret; } AVIODirEntry *ff_alloc_dir_entry(void) diff --git a/libavformat/url.h b/libavformat/url.h index ae27da5c73097..e33edf0cb980c 100644 --- a/libavformat/url.h +++ b/libavformat/url.h @@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto, * @param base the base url, may be equal to buf. * @param rel the new url, which is interpreted relative to base */ -void ff_make_absolute_url(char *buf, int size, const char *base, - const char *rel); +int ff_make_absolute_url(char *buf, int size, const char *base, + const char *rel); /** * Allocate directory entry with default values. diff --git a/tests/ref/fate/url b/tests/ref/fate/url index 84cf85abdd690..7e6395c47bf69 100644 --- a/tests/ref/fate/url +++ b/tests/ref/fate/url @@ -46,9 +46,9 @@ http://[::1]:8080/dev/null => Testing ff_make_absolute_url: (null) baz => baz /foo/bar baz => /foo/baz - /foo/bar ../baz => /baz + /foo/bar ../baz => /foo/../baz /foo/bar /baz => /baz - /foo/bar ../../../baz => /baz + /foo/bar ../../../baz => /foo/../../../baz http://server/foo/ baz => http://server/foo/baz http://server/foo/bar baz => http://server/foo/baz http://server/foo/ ../baz => http://server/baz @@ -62,6 +62,56 @@ Testing ff_make_absolute_url: http://server/foo/bar /../../../../../other/url => http://server/other/url http://server/foo/bar /test/../../../../../other/url => http://server/other/url http://server/foo/bar /test/../../test/../../../other/url => http://server/other/url + http://server/foo/bar file:../baz/qux => file:../baz/qux + http://server/foo//bar/ ../../ => http://server/foo/ + file:../tmp/foo ../bar/ => file:../tmp/../bar/ + file:../tmp/foo file:../bar/ => file:../bar/ + http://server/foo/bar ./ => http://server/foo/ + http://server/foo/bar .dotfile => http://server/foo/.dotfile + http://server/foo/bar ..doubledotfile => http://server/foo/..doubledotfile + http://server/foo/bar double..dotfile => http://server/foo/double..dotfile + http://server/foo/bar doubledotfile.. => http://server/foo/doubledotfile.. + http://a/b/c/d;p?q g:h => g:h + http://a/b/c/d;p?q g => http://a/b/c/g + http://a/b/c/d;p?q ./g => http://a/b/c/g + http://a/b/c/d;p?q g/ => http://a/b/c/g/ + http://a/b/c/d;p?q /g => http://a/g + http://a/b/c/d;p?q //g => http://g + http://a/b/c/d;p?q ?y => http://a/b/c/d;p?y + http://a/b/c/d;p?q g?y => http://a/b/c/g?y + http://a/b/c/d;p?q #s => http://a/b/c/d;p?q#s + http://a/b/c/d;p?q g#s => http://a/b/c/g#s + http://a/b/c/d;p?q g?y#s => http://a/b/c/g?y#s + http://a/b/c/d;p?q ;x => http://a/b/c/;x + http://a/b/c/d;p?q g;x => http://a/b/c/g;x + http://a/b/c/d;p?q g;x?y#s => http://a/b/c/g;x?y#s + http://a/b/c/d;p?q => http://a/b/c/d;p?q + http://a/b/c/d;p?q . => http://a/b/c/ + http://a/b/c/d;p?q ./ => http://a/b/c/ + http://a/b/c/d;p?q .. => http://a/b/ + http://a/b/c/d;p?q ../ => http://a/b/ + http://a/b/c/d;p?q ../g => http://a/b/g + http://a/b/c/d;p?q ../.. => http://a/ + http://a/b/c/d;p?q ../../ => http://a/ + http://a/b/c/d;p?q ../../g => http://a/g + http://a/b/c/d;p?q ../../../g => http://a/g + http://a/b/c/d;p?q ../../../../g => http://a/g + http://a/b/c/d;p?q /./g => http://a/g + http://a/b/c/d;p?q /../g => http://a/g + http://a/b/c/d;p?q g. => http://a/b/c/g. + http://a/b/c/d;p?q .g => http://a/b/c/.g + http://a/b/c/d;p?q g.. => http://a/b/c/g.. + http://a/b/c/d;p?q ..g => http://a/b/c/..g + http://a/b/c/d;p?q ./../g => http://a/b/g + http://a/b/c/d;p?q ./g/. => http://a/b/c/g/ + http://a/b/c/d;p?q g/./h => http://a/b/c/g/h + http://a/b/c/d;p?q g/../h => http://a/b/c/h + http://a/b/c/d;p?q g;x=1/./y => http://a/b/c/g;x=1/y + http://a/b/c/d;p?q g;x=1/../y => http://a/b/c/y + http://a/b/c/d;p?q g?y/./x => http://a/b/c/g?y/./x + http://a/b/c/d;p?q g?y/../x => http://a/b/c/g?y/../x + http://a/b/c/d;p?q g#s/./x => http://a/b/c/g#s/./x + http://a/b/c/d;p?q g#s/../x => http://a/b/c/g#s/../x Testing av_url_split: /foo/bar => -1 /foo/bar