diff --git a/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderFileRoundTripTest.java b/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderFileRoundTripTest.java index 0b21981af..bec1a1c7b 100644 --- a/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderFileRoundTripTest.java +++ b/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderFileRoundTripTest.java @@ -83,7 +83,7 @@ public void testRoundTripCvsFile() throws PassCliException, SQLException, IOExce verifyGrantTwo(); String contentUpTs = Files.readString(GRANT_UPTS_PATH); - assertEquals(passUpdater.getLatestUpdate() + "\n", contentUpTs); + assertEquals(passUpdater.getLatestUpdate() + System.lineSeparator(), contentUpTs); } } diff --git a/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderS3RoundTripTest.java b/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderS3RoundTripTest.java index aa1f0ac48..0f00c3a9a 100644 --- a/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderS3RoundTripTest.java +++ b/pass-grant-loader/src/test/java/org/eclipse/pass/support/grant/GrantLoaderS3RoundTripTest.java @@ -123,7 +123,7 @@ public void testRoundTripCvsFileS3() throws PassCliException, SQLException, IOEx S3Resource actualTestGrantUpTs1 = s3Template.download("test-bucket", "s3-testgrantupdatets"); try (InputStream inputStream = actualTestGrantUpTs1.getInputStream()) { String contentUpTs = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); - assertEquals(passUpdater.getLatestUpdate() + "\n", contentUpTs); + assertEquals(passUpdater.getLatestUpdate() + System.lineSeparator(), contentUpTs); } // WHEN - run again to verify grant update timestamps @@ -132,7 +132,8 @@ public void testRoundTripCvsFileS3() throws PassCliException, SQLException, IOEx "load", "s3://test-bucket/test-pull.csv", null); S3Resource actualTestGrantUpTs2 = s3Template.download("test-bucket", "s3-testgrantupdatets"); - String expectedGrantUpdateTs = firstLastUpdate + "\n" + passUpdater.getLatestUpdate() + "\n"; + String expectedGrantUpdateTs = firstLastUpdate + System.lineSeparator() + passUpdater.getLatestUpdate() + + System.lineSeparator(); try (InputStream inputStream = actualTestGrantUpTs2.getInputStream()) { String contentUpTs = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); assertEquals(expectedGrantUpdateTs, contentUpTs); diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java index 5537cab0b..a3301c1bf 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java @@ -44,6 +44,9 @@ public class NihmsHarvester { /** * Initiate harvester with required properties + * + * @param urlBuilder object to build the URL that the NihmsHarvester will use to pull data from + * @param nihmsHarvesterDownloader downloader object for downloaded the harvested data */ public NihmsHarvester(UrlBuilder urlBuilder, NihmsHarvesterDownloader nihmsHarvesterDownloader) { diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java index 8bc795393..fa2b5af9a 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java @@ -78,6 +78,11 @@ public class NihmsHarvesterCLIRunner implements CommandLineRunner { private final NihmsHarvester nihmsHarvester; + /** + * Constructor for the NihmsHarvesterCLIRunner + * + * @param nihmsHarvester Object that is responsible for initiating and managing the data downloads + */ public NihmsHarvesterCLIRunner(NihmsHarvester nihmsHarvester) { this.nihmsHarvester = nihmsHarvester; } diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterConfig.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterConfig.java index 5e2991846..7f6fbc9c9 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterConfig.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterConfig.java @@ -34,6 +34,11 @@ public class NihmsHarvesterConfig { @Value("${nihmsetl.http.read-timeout-ms}") private Long nihmsReadTimeoutMs; + /** + * The OkHttpClient that has the connection properties set from the application.properties + * + * @return OkHttpClient + */ @Bean public OkHttpClient okHttpClient() { return new OkHttpClient.Builder() diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterDownloader.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterDownloader.java index 14c401760..e4f2c9544 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterDownloader.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterDownloader.java @@ -49,6 +49,9 @@ public class NihmsHarvesterDownloader { /** * Initiate harvester with required properties + * + * @param okHttpClient The OkHttpClient that handles the data extraction from external REST sources. + * @param downloadDirectory The directory where the NihmsLoader will download the external data to. */ public NihmsHarvesterDownloader(OkHttpClient okHttpClient, @Value("${nihmsetl.data.dir}") String downloadDirectory) { @@ -69,6 +72,13 @@ public NihmsHarvesterDownloader(OkHttpClient okHttpClient, } } + /** + * Handles the downloads of external NIHMS data + * @param url The URL from where the data is sourced + * @param status The status of the NIHMS data. Can either be: compliant, noncompliant, or inprocess. + * @throws IOException This exception is thrown if there is an error persisting data + * @throws InterruptedException This exception is thrown If there is a network connection interruption. + */ public void download(URL url, NihmsStatus status) throws IOException, InterruptedException { File outputFile = newFile(status); LOG.debug("Retrieving: {}", url); diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/UrlBuilder.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/UrlBuilder.java index 87716d528..a58ae72f7 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/UrlBuilder.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/UrlBuilder.java @@ -50,6 +50,12 @@ public class UrlBuilder { private final Environment environment; + /** + * Constructor for the UrlBuilder. Sets environment either profiles or properties using SpringBoot Configuration. + * + * @param environment enables the accessing of configuration properties in the UrlBuilder, such as the api schema, + * api host, and api path. + */ public UrlBuilder(Environment environment) { this.environment = environment; } diff --git a/pass-nihms-loader/nihms-data-transform-load/pom.xml b/pass-nihms-loader/nihms-data-transform-load/pom.xml index 3554f37f3..6bf5cd84f 100644 --- a/pass-nihms-loader/nihms-data-transform-load/pom.xml +++ b/pass-nihms-loader/nihms-data-transform-load/pom.xml @@ -13,6 +13,7 @@ NIHMS Data Transform/Load + 3.3.1 3.9.6 @@ -130,6 +131,12 @@ ${commons-io.version} test + + org.wiremock + wiremock-standalone + ${wiremock-version} + test + diff --git a/pass-nihms-loader/nihms-data-transform-load/src/main/java/org/eclipse/pass/loader/nihms/entrez/PmidLookup.java b/pass-nihms-loader/nihms-data-transform-load/src/main/java/org/eclipse/pass/loader/nihms/entrez/PmidLookup.java index 64eb9c6a6..1265a103a 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/main/java/org/eclipse/pass/loader/nihms/entrez/PmidLookup.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/main/java/org/eclipse/pass/loader/nihms/entrez/PmidLookup.java @@ -33,6 +33,7 @@ import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; /** @@ -54,8 +55,9 @@ public class PmidLookup { * delayed responses. * https://www.ncbi.nlm.nih.gov/books/NBK25497/ */ - private static final String DEFAULT_ENTREZ_PATH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary" + - ".fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s"; + @Value("${pmc.entrez.service.url}") + private String ENTREZ_PATH; + private static final Long DEFAULT_ENTREZ_TIME_OUT = Long.valueOf("400"); private static final String JSON_ERROR_KEY = "error"; @@ -92,6 +94,7 @@ public JSONObject retrievePubMedRecordAsJson(String pmid) { jsonRecord = retrieveJsonFromApi(pmid); if (jsonRecord == null) { // pause and retry once to allow for API limitations + LOG.info("Entrez URL:",ENTREZ_PATH); LOG.info("Pausing before trying to pull PMID {} from Entrez again", pmid); TimeUnit.MILLISECONDS.sleep(DEFAULT_ENTREZ_TIME_OUT); jsonRecord = retrieveJsonFromApi(pmid); @@ -111,7 +114,7 @@ public JSONObject retrievePubMedRecordAsJson(String pmid) { */ private JSONObject retrieveJsonFromApi(String pmid) { JSONObject root; - String path = String.format(DEFAULT_ENTREZ_PATH, pmid); + String path = String.format(ENTREZ_PATH, pmid); try { HttpClient client = HttpClientBuilder .create() diff --git a/pass-nihms-loader/nihms-data-transform-load/src/main/resources/application.properties b/pass-nihms-loader/nihms-data-transform-load/src/main/resources/application.properties index f6d949059..c006e661c 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/main/resources/application.properties +++ b/pass-nihms-loader/nihms-data-transform-load/src/main/resources/application.properties @@ -3,6 +3,8 @@ nihmsetl.repository.id=${NIHMS_REPO_ID:} nihmsetl.data.dir=/data/nihmsloader/data nihmsetl.pmcurl.template=https://www.ncbi.nlm.nih.gov/pmc/articles/%s/ +pmc.entrez.service.url=https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s + pass.client.url=${PASS_CLIENT_URL:localhost:8080} pass.client.user=${PASS_CLIENT_USER:fakeuser} pass.client.password=${PASS_CLIENT_PASSWORD:fakepassword} \ No newline at end of file diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/NihmsSubmissionEtlITBase.java b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/NihmsSubmissionEtlITBase.java index 850e28f4a..72b14d434 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/NihmsSubmissionEtlITBase.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/NihmsSubmissionEtlITBase.java @@ -72,6 +72,10 @@ @Testcontainers @DirtiesContext public abstract class NihmsSubmissionEtlITBase { + //including high ascii to test for ascii handling + protected final String title = "Article A ε4"; + protected final String doi = "10.1000/a.abcd.1234"; + protected final String issue = "3"; static { MavenXpp3Reader reader = new MavenXpp3Reader(); diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadCompliantIT.java b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadCompliantIT.java index 85f4331dc..3eab90345 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadCompliantIT.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadCompliantIT.java @@ -51,10 +51,6 @@ */ public class TransformAndLoadCompliantIT extends NihmsSubmissionEtlITBase { private final String pmcid1 = "PMC12345678"; - private final String title = "Article A"; - private final String doi = "10.1000/a.abcd.1234"; - private final String issue = "3"; - protected String pmid1; protected String awardNumber1; protected String nihmsId1; diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadInProcessIT.java b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadInProcessIT.java index 0f6f481e0..d737da326 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadInProcessIT.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadInProcessIT.java @@ -47,9 +47,6 @@ */ public class TransformAndLoadInProcessIT extends NihmsSubmissionEtlITBase { private final String nihmsId1 = "NIHMS987654321"; - private final String title = "Article A"; - private final String doi = "10.1000/a.abcd.1234"; - private final String issue = "3"; /** * Tests when the publication is completely new and is an in-process diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadNonCompliantIT.java b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadNonCompliantIT.java index dfb06f790..941726cb1 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadNonCompliantIT.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/TransformAndLoadNonCompliantIT.java @@ -51,10 +51,6 @@ @ExtendWith(MockitoExtension.class) public class TransformAndLoadNonCompliantIT extends NihmsSubmissionEtlITBase { private final String nihmsId1 = "NIHMS987654321"; - private final String title = "Article A"; - private final String doi = "10.1000/a.abcd.1234"; - private final String issue = "3"; - /** * Tests when the publication is completely new and is non-compliant diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/entrez/EntrezPmidLookupTest.java b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/entrez/EntrezPmidLookupTest.java index a7978c95f..6958bb47c 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/entrez/EntrezPmidLookupTest.java +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/java/org/eclipse/pass/loader/nihms/entrez/EntrezPmidLookupTest.java @@ -15,44 +15,77 @@ */ package org.eclipse.pass.loader.nihms.entrez; +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.get; +import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; +import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; + +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.junit5.WireMockTest; +import org.apache.commons.io.IOUtils; +import org.eclipse.pass.loader.nihms.NihmsTransformLoadCLIRunner; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.test.context.TestPropertySource; /** * @author Karen Hanson - * @version $Id$ */ +@SpringBootTest +@TestPropertySource("classpath:test-application.properties") +@WireMockTest(httpPort = 9911) public class EntrezPmidLookupTest { + @Autowired + protected PmidLookup pmidLookup; + + // Needed so tests can run after application starts + @MockBean private NihmsTransformLoadCLIRunner nihmsTransformLoadCLIRunner; + @Test - public void testGetEntrezRecordJson() { - PmidLookup apiService = new PmidLookup(); + public void testGetEntrezRecordJson() throws IOException, URISyntaxException { + String entrez = IOUtils.toString(getClass().getClassLoader(). + getResourceAsStream("pmidrecord.json"), StandardCharsets.UTF_8); - String pmid = "29249144"; + String pmid = "11111111"; - JSONObject pmr = apiService.retrievePubMedRecordAsJson(pmid); - assertTrue(pmr.getString("source").contains("Proteome")); + stubFor(get(urlPathEqualTo("/entrez/eutils/esummary.fcgi")) + .withQueryParam("db", WireMock.equalTo("pubmed")) + .withQueryParam("retmode", WireMock.equalTo("json")) + .withQueryParam("rettype", WireMock.equalTo("abstract")) + .withQueryParam("id", WireMock.equalTo(pmid)) + .willReturn(aResponse().withStatus(200).withBody(entrez))); + JSONObject pmr = pmidLookup.retrievePubMedRecordAsJson(pmid); + assertTrue(pmr.getString("source").contains("Journal A")); } @Test - public void testGetPubMedRecord() { - PmidLookup pmidLookup = new PmidLookup(); - String pmid = "29249144"; - PubMedEntrezRecord record = pmidLookup.retrievePubMedRecord(pmid); - assertEquals("10.1021/acs.jproteome.7b00775", record.getDoi()); - } + public void testGetPubMedRecord() throws IOException { + String entrez = IOUtils.toString(getClass().getClassLoader(). + getResourceAsStream("pmidrecord.json"), StandardCharsets.UTF_8); + + String pmid = "11111111"; + + stubFor(get(urlPathEqualTo("/entrez/eutils/esummary.fcgi")) + .withQueryParam("db", WireMock.equalTo("pubmed")) + .withQueryParam("retmode", WireMock.equalTo("json")) + .withQueryParam("rettype", WireMock.equalTo("abstract")) + .withQueryParam("id", WireMock.equalTo(pmid)) + .willReturn(aResponse().withStatus(200).withBody(entrez))); - @Test - public void testGetPubMedRecordWithHighAsciiChars() { - PmidLookup pmidLookup = new PmidLookup(); - String pmid = "27648456"; PubMedEntrezRecord record = pmidLookup.retrievePubMedRecord(pmid); - assertEquals("10.1002/acn3.333", record.getDoi()); - assertEquals("Age-dependent effects of APOE ε4 in preclinical Alzheimer's disease.", record.getTitle()); + assertEquals("10.1000/a.abcd.1234", record.getDoi()); + //test to ensure that it can handle high ascii characters + assertEquals("Article A ε4", record.getTitle()); } - } diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/resources/pmidrecord.json b/pass-nihms-loader/nihms-data-transform-load/src/test/resources/pmidrecord.json index 7fd2192ca..0498226ca 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/resources/pmidrecord.json +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/resources/pmidrecord.json @@ -16,8 +16,7 @@ } ], "lastauthor": "Fuchsia A", - "title": "Article A", - "sorttitle": "Article A", + "title": "Article A ε4", "volume": "140", "issue": "3", "pages": "111-113", diff --git a/pass-nihms-loader/nihms-data-transform-load/src/test/resources/test-application.properties b/pass-nihms-loader/nihms-data-transform-load/src/test/resources/test-application.properties index 0138a682b..03332c9d1 100644 --- a/pass-nihms-loader/nihms-data-transform-load/src/test/resources/test-application.properties +++ b/pass-nihms-loader/nihms-data-transform-load/src/test/resources/test-application.properties @@ -1,5 +1,7 @@ # directory to harvest data from nihmsetl.data.dir=./src/test/resources/data +# PubMed Central Entrez Service URL +pmc.entrez.service.url=http://localhost:9911/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s # pass-core properties pass.client.url=http://localhost:8080/ pass.client.user=backend