diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dbe9c82 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/ \ No newline at end of file diff --git a/doi_info.go b/doi_info.go index f1f88f1..826e4f9 100644 --- a/doi_info.go +++ b/doi_info.go @@ -9,6 +9,9 @@ type DoiInfo struct { // Manuscript describes an open access manuscript that can be // selected by the user. type Manuscript struct { - Description string `json:"description"` // Description of this manuscript - Location string `json:"location"` // Location URI of manuascript (e.g. pdf) + Location string `json:"url"` // Location URI of manuascript (e.g. pdf) + RepositoryInstitution string `json:"repositoryLabel"` // Readable label for the repository where the article can be found + Type string `json:"type"` // The MIME type of the manuscript file + Source string `json:"source"` // The API where we found the file + Name string `json:"name"` // The file name } diff --git a/doi_info_cache.go b/doi_info_cache.go index f3a1326..d411262 100644 --- a/doi_info_cache.go +++ b/doi_info_cache.go @@ -82,7 +82,7 @@ func (c *DoiCache) GetOrAdd(doi string, fetchDoi func() (*DoiInfo, error)) (*Doi return cached, err } - // OK, now execute the doi getch function and unlock the cache entry when done. + // OK, now execute the doi fetch function and unlock the cache entry when done. defer entry.Unlock() if entry.info, err = fetchDoi(); err != nil { diff --git a/doi_info_cache_test.go b/doi_info_cache_test.go index c605184..e37f3b1 100644 --- a/doi_info_cache_test.go +++ b/doi_info_cache_test.go @@ -17,8 +17,8 @@ func TestSimpleCase(t *testing.T) { expected := &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "Foo", - Location: "Bar", + RepositoryInstitution: "Foo", + Location: "Bar", }, }, } @@ -27,8 +27,8 @@ func TestSimpleCase(t *testing.T) { return &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "Foo", - Location: "Bar", + RepositoryInstitution: "Foo", + Location: "Bar", }, }, }, nil @@ -83,8 +83,8 @@ func TestContested(t *testing.T) { expected := &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "Foo", - Location: "Bar", + RepositoryInstitution: "Foo", + Location: "Bar", }, }, } @@ -98,8 +98,8 @@ func TestContested(t *testing.T) { return &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "Foo", - Location: "Bar", + RepositoryInstitution: "Foo", + Location: "Bar", }, }, }, nil @@ -177,8 +177,8 @@ func didCompute(cache *pass.DoiCache, doi string) bool { return &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "Foo", - Location: "Bar", + RepositoryInstitution: "Foo", + Location: "Bar", }, }, }, nil diff --git a/download_service.go b/download_service.go index 6be5549..a39a2d1 100644 --- a/download_service.go +++ b/download_service.go @@ -3,7 +3,9 @@ package main import ( "io" "io/ioutil" + "log" "net/http" + URL "net/url" "github.com/pkg/errors" ) @@ -41,18 +43,23 @@ func (d DownloadService) Download(doi, url string) (string, error) { defer resp.Body.Close() - if resp.StatusCode > 299 { + if resp.StatusCode > 303 { body, _ := ioutil.ReadAll(resp.Body) - return "", errors.Errorf("download of '%s' faied with %d %s", url, resp.StatusCode, string(body)) + return "", errors.Errorf("download of '%s' failed with %d %s", url, resp.StatusCode, string(body)) } return d.Fedora.PostBinary(d.Dest, resp.Body, resp.Header.Get(headerContentType)) - } func (d DownloadService) verifyURL(doi string, info *DoiInfo, url string) error { for _, m := range info.Manuscripts { - if m.Location == url { + + decodedURLForPdf, err := URL.QueryUnescape(m.Location) + if err != nil { + log.Printf("file name decoding failed: %s", err) + } + + if decodedURLForPdf == url { return nil // We found the matching URL. Done! } } diff --git a/download_service_http.go b/download_service_http.go index 244103e..6665df1 100644 --- a/download_service_http.go +++ b/download_service_http.go @@ -13,7 +13,6 @@ type Downloader interface { func DownloadServiceHandler(svc Downloader) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Allowing GET just so I can test in browser quickly if r.Method != http.MethodPost { w.WriteHeader(http.StatusMethodNotAllowed) return @@ -36,8 +35,8 @@ func DownloadServiceHandler(svc Downloader) http.Handler { downloadLocation, err := svc.Download(doi, uri) if err != nil { - var badReqest ErrorBadInput - if errors.As(err, &badReqest) { + var badRequest ErrorBadInput + if errors.As(err, &badRequest) { w.WriteHeader(http.StatusBadRequest) } else { w.WriteHeader(http.StatusInternalServerError) diff --git a/integration_test.go b/integration_test.go index 81d773d..b41aedc 100644 --- a/integration_test.go +++ b/integration_test.go @@ -28,7 +28,9 @@ func TestIntegration(t *testing.T) { downloadURL := info.Manuscripts[0].Location - binaryURI := postBinary(t, doi, fmt.Sprintf("%s?doi=%s&url=%s", downloadEndpoint, url.QueryEscape(doi), url.QueryEscape(downloadURL))) + binaryURI := postBinary(t, doi, fmt.Sprintf("%s?doi=%s&url=%s", downloadEndpoint, url.QueryEscape(doi), downloadURL)) + + println(binaryURI) // Now, make sure we can HEAD the created binary headRequest, _ := http.NewRequest(http.MethodHead, binaryURI, nil) @@ -38,11 +40,11 @@ func TestIntegration(t *testing.T) { t.Fatalf("HEAD of resulting binary failed: %v", err) } - if resp.Header.Get("Content-Type") != "application/pdf" { + if resp.Header.Get("Content-Type") != "application/pdf;charset=ISO-8859-1" { t.Fatalf("Got wrong content type for PDF!") } - if resp.Header.Get("Content-Length") != "710000" { + if resp.Header.Get("Content-Length") != "1024793" { t.Fatalf("Didn't get expected content length for pdf file") } } diff --git a/lookup_service_http.go b/lookup_service_http.go index a8df78b..d0be0e9 100644 --- a/lookup_service_http.go +++ b/lookup_service_http.go @@ -6,7 +6,7 @@ import ( "net/http" ) -// LookupService looks up a DOI and provides information addociated with it +// LookupService looks up a DOI and provides information associated with it type LookupService interface { Lookup(doi string) (*DoiInfo, error) } diff --git a/lookup_service_http_test.go b/lookup_service_http_test.go index 5691c11..d9cbc67 100644 --- a/lookup_service_http_test.go +++ b/lookup_service_http_test.go @@ -39,7 +39,7 @@ func TestNoDoi(t *testing.T) { resp := httptest.NewRecorder() pass.LookupServiceHandler(NoLookupService{}).ServeHTTP( - resp, httptest.NewRequest(http.MethodGet, "/foo?param=notDoi", nil)) + resp, httptest.NewRequest(http.MethodGet, "/lookup?param=notDoi", nil)) if resp.Code != http.StatusBadRequest { t.Errorf("Expected bad request error code") @@ -51,12 +51,18 @@ func TestResponse(t *testing.T) { info := &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "One", - Location: "http://example.org/first", + RepositoryInstitution: "One", + Location: "http://example.org/first", + Type: "application/pdf", + Source: "Unpaywall", + Name: "first", }, { - Description: "Two", - Location: "http://example.org/second", + RepositoryInstitution: "Two", + Location: "http://example.org/second", + Type: "application/pdf", + Source: "Unpaywall", + Name: "second", }, }, } diff --git a/unpaywall.go b/unpaywall.go index 8649d18..b455e3c 100644 --- a/unpaywall.go +++ b/unpaywall.go @@ -3,11 +3,10 @@ package main import ( "encoding/json" "fmt" + "log" "net/http" -) - -const ( - unpaywallAcceptedVersion = "acceptedVersion" + "net/url" + "strings" ) // UnpaywallService looks up DOI info from unpaywall @@ -20,13 +19,13 @@ type UnpaywallService struct { // DOI lookup response from unpaywall type unpaywallDOIResponse struct { - OaLocations []unpaywallLocation `json:"oa_locations"` + BestOaLocation unpaywallLocation `json:"best_oa_location"` } type unpaywallLocation struct { - URLForPdf string `json:"url_for_pdf"` - Version string `json:"version"` - Evidence string `json:"evidence"` + URLForPdf string `json:"url_for_pdf"` + Version string `json:"version"` + RepositoryInstitution string `json:"repository_institution"` } // Lookup looks up DOI info for a given DOI @@ -42,14 +41,28 @@ func (u UnpaywallService) Lookup(doi string) (*DoiInfo, error) { var doiResponse DoiInfo - // Just return the all accepted versions we encounter? dunno - for _, location := range results.OaLocations { - if location.Version == unpaywallAcceptedVersion && location.URLForPdf != "" { - doiResponse.Manuscripts = append(doiResponse.Manuscripts, Manuscript{ - Description: location.Evidence, - Location: location.URLForPdf, - }) - } + // For now we'll only return the best location for the manuscript + location := results.BestOaLocation + + // Get the file name from the decoded url for pdf + // but log any problems do not cause response to fail + var fileName string + decodedURLForPdf, err := url.QueryUnescape(location.URLForPdf) + if err != nil { + log.Printf("file name decoding failed: %s", err) + } else { + splitURLForPdf := strings.Split(decodedURLForPdf, "/") + fileName = splitURLForPdf[len(splitURLForPdf)-1] + } + + if location.URLForPdf != "" { + doiResponse.Manuscripts = append(doiResponse.Manuscripts, Manuscript{ + Location: location.URLForPdf, + RepositoryInstitution: location.RepositoryInstitution, + Type: "application/pdf", + Source: "Unpaywall", + Name: fileName, + }) } return &doiResponse, nil diff --git a/unpaywall_test.go b/unpaywall_test.go index 6d2f564..5eb4692 100644 --- a/unpaywall_test.go +++ b/unpaywall_test.go @@ -19,8 +19,11 @@ func TestUnpaywall(t *testing.T) { expected := &pass.DoiInfo{ Manuscripts: []pass.Manuscript{ { - Description: "oa repository (via OAI-PMH doi match)", - Location: "http://europepmc.org/articles/pmc4221854?pdf=render", + Location: "https://dash.harvard.edu/bitstream/1/12285462/1/Nanometer-Scale%20Thermometry.pdf", + RepositoryInstitution: "Harvard University - Digital Access to Scholarship at Harvard (DASH)", + Type: "application/pdf", + Source: "Unpaywall", + Name: "Nanometer-Scale Thermometry.pdf", }, }, }