Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mock Entrez PMIDlookup #112

Merged
merged 15 commits into from
Jul 9, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public void testRoundTripCvsFile() throws PassCliException, SQLException, IOExce
verifyGrantTwo();

String contentUpTs = Files.readString(GRANT_UPTS_PATH);
assertEquals(passUpdater.getLatestUpdate() + "\n", contentUpTs);
assertEquals(passUpdater.getLatestUpdate() + System.lineSeparator(), contentUpTs);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public void testRoundTripCvsFileS3() throws PassCliException, SQLException, IOEx
S3Resource actualTestGrantUpTs1 = s3Template.download("test-bucket", "s3-testgrantupdatets");
try (InputStream inputStream = actualTestGrantUpTs1.getInputStream()) {
String contentUpTs = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
assertEquals(passUpdater.getLatestUpdate() + "\n", contentUpTs);
assertEquals(passUpdater.getLatestUpdate() + System.lineSeparator(), contentUpTs);
}

// WHEN - run again to verify grant update timestamps
Expand All @@ -132,7 +132,8 @@ public void testRoundTripCvsFileS3() throws PassCliException, SQLException, IOEx
"load", "s3://test-bucket/test-pull.csv", null);

S3Resource actualTestGrantUpTs2 = s3Template.download("test-bucket", "s3-testgrantupdatets");
String expectedGrantUpdateTs = firstLastUpdate + "\n" + passUpdater.getLatestUpdate() + "\n";
String expectedGrantUpdateTs = firstLastUpdate + System.lineSeparator() + passUpdater.getLatestUpdate()
+ System.lineSeparator();
try (InputStream inputStream = actualTestGrantUpTs2.getInputStream()) {
String contentUpTs = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
assertEquals(expectedGrantUpdateTs, contentUpTs);
Expand Down
2 changes: 1 addition & 1 deletion pass-nihms-loader/nihms-data-harvest/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@
</dependencies>

<build>
<finalName>nihms-data-harvest-cli</finalName>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>org.eclipse.pass.loader.nihms.NihmsHarvesterCLI</mainClass>
<finalName>nihms-data-harvest-cli</finalName>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs to remain here.

Copy link
Contributor Author

@tsande16 tsande16 Jul 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this one I was getting compilation errors. I think there was a recent mvn change or something. I will put it back in to get the error message.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. This has been reverted, but will be looked into in the future, per our discussion.

<classifier>exec</classifier>
</configuration>
<executions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ public class NihmsHarvester {

/**
* Initiate harvester with required properties
*
* @param urlBuilder object to build the URL that the NihmsHarvester will use to pull data from
* @param nihmsHarvesterDownloader downloader object for downloaded the harvested data
*/
public NihmsHarvester(UrlBuilder urlBuilder,
NihmsHarvesterDownloader nihmsHarvesterDownloader) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ public class NihmsHarvesterCLIRunner implements CommandLineRunner {

private final NihmsHarvester nihmsHarvester;

/**
* Constructor for the NihmsHarvesterCLIRunner
*
* @param nihmsHarvester Object that is responsible for initiating and managing the data downloads
*/
public NihmsHarvesterCLIRunner(NihmsHarvester nihmsHarvester) {
this.nihmsHarvester = nihmsHarvester;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ public class NihmsHarvesterConfig {
@Value("${nihmsetl.http.read-timeout-ms}")
private Long nihmsReadTimeoutMs;

/**
* The OkHttpClient that has the connection properties set from the application.properties
*
* @return OkHttpClient
*/
@Bean
public OkHttpClient okHttpClient() {
return new OkHttpClient.Builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public class NihmsHarvesterDownloader {

/**
* Initiate harvester with required properties
*
* @param okHttpClient The OkHttpClient that handles the data extraction from external REST sources.
* @param downloadDirectory The directory where the NihmsLoader will download the external data to.
*/
public NihmsHarvesterDownloader(OkHttpClient okHttpClient,
@Value("${nihmsetl.data.dir}") String downloadDirectory) {
Expand All @@ -69,6 +72,13 @@ public NihmsHarvesterDownloader(OkHttpClient okHttpClient,
}
}

/**
* Handles the downloads of external NIHMS data
* @param url The URL from where the data is sourced
* @param status The status of the NIHMS data. Can either be: compliant, noncompliant, or inprocess.
* @throws IOException This exception is thrown if there is an error persisting data
* @throws InterruptedException This exception is thrown If there is a network connection interruption.
*/
public void download(URL url, NihmsStatus status) throws IOException, InterruptedException {
File outputFile = newFile(status);
LOG.debug("Retrieving: {}", url);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public class UrlBuilder {

private final Environment environment;

/**
* Constructor for the UrlBuilder. Sets environment either profiles or properties using SpringBoot Configuration.
*
* @param environment enables the accessing of configuration properties in the UrlBuilder, such as the api schema,
* api host, and api path.
*/
public UrlBuilder(Environment environment) {
this.environment = environment;
}
Expand Down
7 changes: 7 additions & 0 deletions pass-nihms-loader/nihms-data-transform-load/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<name>NIHMS Data Transform/Load</name>

<properties>
<wiremock-version>3.3.1</wiremock-version>
<maven-model.version>3.9.6</maven-model.version>
</properties>

Expand Down Expand Up @@ -130,6 +131,12 @@
<version>${commons-io.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
<version>${wiremock-version}</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

/**
Expand All @@ -54,8 +55,11 @@ public class PmidLookup {
* delayed responses.
* https://www.ncbi.nlm.nih.gov/books/NBK25497/
*/
private static final String DEFAULT_ENTREZ_PATH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary" +
".fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s";
/*private static final String DEFAULT_ENTREZ_PATH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary" +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can delete this too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

".fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s";*/
@Value("${pmc.entrez.service.url}")
private String ENTREZ_PATH;

private static final Long DEFAULT_ENTREZ_TIME_OUT = Long.valueOf("400");

private static final String JSON_ERROR_KEY = "error";
Expand Down Expand Up @@ -92,6 +96,7 @@ public JSONObject retrievePubMedRecordAsJson(String pmid) {
jsonRecord = retrieveJsonFromApi(pmid);
if (jsonRecord == null) {
// pause and retry once to allow for API limitations
LOG.info("Entrez URL:",ENTREZ_PATH);
LOG.info("Pausing before trying to pull PMID {} from Entrez again", pmid);
TimeUnit.MILLISECONDS.sleep(DEFAULT_ENTREZ_TIME_OUT);
jsonRecord = retrieveJsonFromApi(pmid);
Expand All @@ -111,7 +116,7 @@ public JSONObject retrievePubMedRecordAsJson(String pmid) {
*/
private JSONObject retrieveJsonFromApi(String pmid) {
JSONObject root;
String path = String.format(DEFAULT_ENTREZ_PATH, pmid);
String path = String.format(ENTREZ_PATH, pmid);
try {
HttpClient client = HttpClientBuilder
.create()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ nihmsetl.repository.id=${NIHMS_REPO_ID:}
nihmsetl.data.dir=/data/nihmsloader/data
nihmsetl.pmcurl.template=https://www.ncbi.nlm.nih.gov/pmc/articles/%s/

pmc.entrez.service.url=https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s

pass.client.url=${PASS_CLIENT_URL:localhost:8080}
pass.client.user=${PASS_CLIENT_USER:fakeuser}
pass.client.password=${PASS_CLIENT_PASSWORD:fakepassword}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
@Testcontainers
@DirtiesContext
public abstract class NihmsSubmissionEtlITBase {
//including high ascii to test for ascii handling
protected final String title = "Article A ε4";
protected final String doi = "10.1000/a.abcd.1234";
protected final String issue = "3";

static {
MavenXpp3Reader reader = new MavenXpp3Reader();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@
*/
public class TransformAndLoadCompliantIT extends NihmsSubmissionEtlITBase {
private final String pmcid1 = "PMC12345678";
private final String title = "Article A";
private final String doi = "10.1000/a.abcd.1234";
private final String issue = "3";

protected String pmid1;
protected String awardNumber1;
protected String nihmsId1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@
*/
public class TransformAndLoadInProcessIT extends NihmsSubmissionEtlITBase {
private final String nihmsId1 = "NIHMS987654321";
private final String title = "Article A";
private final String doi = "10.1000/a.abcd.1234";
private final String issue = "3";

/**
* Tests when the publication is completely new and is an in-process
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@
@ExtendWith(MockitoExtension.class)
public class TransformAndLoadNonCompliantIT extends NihmsSubmissionEtlITBase {
private final String nihmsId1 = "NIHMS987654321";
private final String title = "Article A";
private final String doi = "10.1000/a.abcd.1234";
private final String issue = "3";


/**
* Tests when the publication is completely new and is non-compliant
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,44 +15,81 @@
*/
package org.eclipse.pass.loader.nihms.entrez;

import static com.github.tomakehurst.wiremock.client.WireMock.aResponse;
import static com.github.tomakehurst.wiremock.client.WireMock.get;
import static com.github.tomakehurst.wiremock.client.WireMock.stubFor;
import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;

import com.github.tomakehurst.wiremock.client.WireMock;
import com.github.tomakehurst.wiremock.junit5.WireMockTest;
import org.apache.commons.io.IOUtils;
import org.eclipse.pass.loader.nihms.NihmsTransformLoadCLIRunner;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.test.context.TestPropertySource;

/**
* @author Karen Hanson
* @version $Id$
*/
@SpringBootTest
@TestPropertySource("classpath:test-application.properties")
@WireMockTest(httpPort = 9911)
public class EntrezPmidLookupTest {

@Autowired
protected PmidLookup pmidLookup;

// Needed so tests can run after application starts
@MockBean private NihmsTransformLoadCLIRunner nihmsTransformLoadCLIRunner;

@Value("${pmc.entrez.service.url}")
private String ENTREZ_PATH;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be deleted, not being used looks like.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


@Test
public void testGetEntrezRecordJson() {
PmidLookup apiService = new PmidLookup();
public void testGetEntrezRecordJson() throws IOException, URISyntaxException {
String entrez = IOUtils.toString(getClass().getClassLoader().
getResourceAsStream("pmidrecord.json"), StandardCharsets.UTF_8);

String pmid = "29249144";
String pmid = "11111111";

JSONObject pmr = apiService.retrievePubMedRecordAsJson(pmid);
assertTrue(pmr.getString("source").contains("Proteome"));
stubFor(get(urlPathEqualTo("/entrez/eutils/esummary.fcgi"))
.withQueryParam("db", WireMock.equalTo("pubmed"))
.withQueryParam("retmode", WireMock.equalTo("json"))
.withQueryParam("rettype", WireMock.equalTo("abstract"))
.withQueryParam("id", WireMock.equalTo(pmid))
.willReturn(aResponse().withStatus(200).withBody(entrez)));

JSONObject pmr = pmidLookup.retrievePubMedRecordAsJson(pmid);
assertTrue(pmr.getString("source").contains("Journal A"));
}

@Test
public void testGetPubMedRecord() {
PmidLookup pmidLookup = new PmidLookup();
String pmid = "29249144";
PubMedEntrezRecord record = pmidLookup.retrievePubMedRecord(pmid);
assertEquals("10.1021/acs.jproteome.7b00775", record.getDoi());
}
public void testGetPubMedRecord() throws IOException {
String entrez = IOUtils.toString(getClass().getClassLoader().
getResourceAsStream("pmidrecord.json"), StandardCharsets.UTF_8);

String pmid = "11111111";

stubFor(get(urlPathEqualTo("/entrez/eutils/esummary.fcgi"))
.withQueryParam("db", WireMock.equalTo("pubmed"))
.withQueryParam("retmode", WireMock.equalTo("json"))
.withQueryParam("rettype", WireMock.equalTo("abstract"))
.withQueryParam("id", WireMock.equalTo(pmid))
.willReturn(aResponse().withStatus(200).withBody(entrez)));

@Test
public void testGetPubMedRecordWithHighAsciiChars() {
PmidLookup pmidLookup = new PmidLookup();
String pmid = "27648456";
PubMedEntrezRecord record = pmidLookup.retrievePubMedRecord(pmid);
assertEquals("10.1002/acn3.333", record.getDoi());
assertEquals("Age-dependent effects of APOE ε4 in preclinical Alzheimer's disease.", record.getTitle());
assertEquals("10.1000/a.abcd.1234", record.getDoi());
//test to ensure that it can handle high ascii characters
assertEquals("Article A ε4", record.getTitle());
}

}
Loading
Loading