diff --git a/link_finder.go b/link_finder.go index c8bc1a0..91ceb4b 100644 --- a/link_finder.go +++ b/link_finder.go @@ -23,7 +23,7 @@ var atomToAttributes = map[atom.Atom][]string{ atom.Meta: {"content"}, } -var imageDescriptorPattern = regexp.MustCompile(" [^ ]*$") +var imageDescriptorPattern = regexp.MustCompile(`(\S)\s+\S+\s*$`) type linkFinder struct { linkFilterer linkFilterer @@ -75,7 +75,7 @@ func (f linkFinder) parseLinks(n *html.Node, a string) []string { switch a { case "srcset": for _, s := range strings.Split(s, ",") { - ss = append(ss, imageDescriptorPattern.ReplaceAllString(f.trimSpace(s), "")) + ss = append(ss, f.trimSpace(imageDescriptorPattern.ReplaceAllString(s, "$1"))) } case "content": switch scrape.Attr(n, "property") { diff --git a/link_finder_test.go b/link_finder_test.go index 92fc564..739bd63 100644 --- a/link_finder_test.go +++ b/link_finder_test.go @@ -189,23 +189,18 @@ func TestLinkFinderFindMetaTags(t *testing.T) { assert.Nil(t, err) } -func TestLinkFinderFindLinkWithSpaces(t *testing.T) { +func TestLinkFinderFindDataSchemeLinkWithSpaces(t *testing.T) { b, err := url.Parse("http://foo.com") assert.Nil(t, err) n, err := html.Parse(strings.NewReader( - htmlWithBody(` - - `)), + htmlWithBody(``)), ) assert.Nil(t, err) ls := newTestLinkFinder().Find(n, b) - err, ok := ls["http://foo.com/foo.html"] + err, ok := ls["data:text/plain, Hello,%20world!"] assert.True(t, ok) assert.Nil(t, err) }