diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java index d04f86809..5537cab0b 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvester.java @@ -16,12 +16,14 @@ package org.eclipse.pass.loader.nihms; import java.net.URL; +import java.time.LocalDate; +import java.time.Period; +import java.time.format.DateTimeFormatter; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; import org.eclipse.pass.loader.nihms.model.NihmsStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +37,8 @@ public class NihmsHarvester { private static final Logger LOG = LoggerFactory.getLogger(NihmsHarvester.class); + static final int DEFAULT_HARVEST_MONTHS = 12; + private final UrlBuilder urlBuilder; private final NihmsHarvesterDownloader nihmsHarvesterDownloader; @@ -51,22 +55,19 @@ public NihmsHarvester(UrlBuilder urlBuilder, * Retrieve files from NIHMS based on status list and startDate provided * * @param statusesToDownload list of {@code NihmsStatus} types to download from the NIHMS website - * @param startDate formatted as {@code yyyy-mm}, can be null to default to 1 year prior to harvest date + * @param harvestPeriodMonths number of months of data to query */ - public void harvest(Set statusesToDownload, String startDate) { + public void harvest(Set statusesToDownload, int harvestPeriodMonths) { if (CollectionUtils.isEmpty(statusesToDownload)) { throw new RuntimeException("statusesToDownload list cannot be empty"); } - if (!validStartDate(startDate)) { - throw new RuntimeException( - String.format("The startDate %s is not valid. The date must be formatted as mm-yyyy", startDate)); - } try { Map params = new HashMap<>(); - if (StringUtils.isNotEmpty(startDate)) { - startDate = startDate.replace("-", "/"); + if (harvestPeriodMonths != DEFAULT_HARVEST_MONTHS) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("MM/uuuu"); + String startDate = LocalDate.now().minus(Period.ofMonths(harvestPeriodMonths)).format(formatter); LOG.info("Filtering with Start Date " + startDate); params.put("pdf", startDate); } @@ -94,15 +95,4 @@ public void harvest(Set statusesToDownload, String startDate) { } } - /** - * null or empty are OK for start date, but a badly formatted date that does not have the format mm-yyyy should - * return false - * - * @param startDate true if valid start date (empty or formatted mm-yyyy) - * @return true if valid start date (empty or formatted mm-yyyy) - */ - public static boolean validStartDate(String startDate) { - return (StringUtils.isEmpty(startDate) || startDate.matches("^(0?[1-9]|1[012])-(\\d{4})$")); - } - } diff --git a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java index b12168281..8bc795393 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java +++ b/pass-nihms-loader/nihms-data-harvest/src/main/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIRunner.java @@ -68,14 +68,13 @@ public class NihmsHarvesterCLIRunner implements CommandLineRunner { private boolean inProcess = false; /** - * The start date from which to load . + * The number of months of data to nihms harvest. */ - @Option(name = "-s", aliases = {"-startDate", "--startDate"}, - usage = "DateTime to start the query against NIHMS data. This will cause " - + "a return of all records published since the date provided. Syntax must be mm-yyyy. This value " + - "will override the " - + "NIHMS system default which is one year before the current month") - private String startDate = ""; + @Option(name = "-m", aliases = {"-harvestMonths", "--harvestMonths"}, + usage = "Period of time by month to query against NIHMS data. For example, to query for the past 3 " + + "months of nihms data, the argument would be -harvestMonths=3. This value will override the NIHMS " + + "system default which is one year before the current month.") + private int harvestMonths = NihmsHarvester.DEFAULT_HARVEST_MONTHS; private final NihmsHarvester nihmsHarvester; @@ -97,7 +96,7 @@ public void run(String... args) { } Set statusesToProcess = new HashSet<>(); - String startDateFilter = this.startDate; + int harvestPeriodMonths = this.harvestMonths; //select statuses to process if (this.compliant) { @@ -114,7 +113,7 @@ public void run(String... args) { } /* Run the package generation application proper */ - nihmsHarvester.harvest(statusesToProcess, startDateFilter); + nihmsHarvester.harvest(statusesToProcess, harvestPeriodMonths); } catch (CmdLineException e) { /** diff --git a/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIHarvestMonthsTest.java b/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIHarvestMonthsTest.java new file mode 100644 index 000000000..423554697 --- /dev/null +++ b/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLIHarvestMonthsTest.java @@ -0,0 +1,76 @@ +/* + * + * * Copyright 2024 Johns Hopkins University + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.eclipse.pass.loader.nihms; + +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.verify; + +import java.io.IOException; +import java.net.URL; +import java.time.LocalDate; +import java.time.Period; +import java.time.format.DateTimeFormatter; +import java.util.EnumSet; + +import org.eclipse.pass.loader.nihms.model.NihmsStatus; +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.boot.test.mock.mockito.SpyBean; +import org.springframework.test.context.TestPropertySource; + +/** + * @author Russ Poetker (rpoetke1@jh.edu) + */ +@SpringBootTest(classes = NihmsHarvesterCLI.class, args = {"--harvestMonths=3"}) +@TestPropertySource( + locations = "classpath:test-application.properties", + properties = { + "nihmsetl.api.url.param.pdf=", + "nihmsetl.api.url.param.pdt=" + }) +public class NihmsHarvesterCLIHarvestMonthsTest { + + @SpyBean NihmsHarvester nihmsHarvester; + @MockBean NihmsHarvesterDownloader nihmsHarvesterDownloader; + + @Test + public void testHarvesterCLI_WithHarvestMonths() throws IOException, InterruptedException { + // GIVEN/WHEN + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("MM/uuuu"); + // 3 months passed in args up top in @SpringBootTest + String expectedPdf = LocalDate.now().minus(Period.ofMonths(3)).format(formatter) + .replace("/", "%2F"); + // THEN + verify(nihmsHarvester).harvest(eq(EnumSet.allOf(NihmsStatus.class)), anyInt()); + verify(nihmsHarvesterDownloader).download( + eq(new URL("https://www.ncbi.nlm.nih.gov/pmc/utils/pacm/c?pdf=" + expectedPdf + + "&api-token=test-token&inst=JOHNS-HOPKINS-TEST&format=csv&ipf=4134401-TEST")), + eq(NihmsStatus.COMPLIANT)); + verify(nihmsHarvesterDownloader).download( + eq(new URL("https://www.ncbi.nlm.nih.gov/pmc/utils/pacm/p?pdf=" + expectedPdf + + "&api-token=test-token&inst=JOHNS-HOPKINS-TEST&format=csv&ipf=4134401-TEST")), + eq(NihmsStatus.IN_PROCESS)); + verify(nihmsHarvesterDownloader).download( + eq(new URL("https://www.ncbi.nlm.nih.gov/pmc/utils/pacm/n?pdf=" + expectedPdf + + "&api-token=test-token&inst=JOHNS-HOPKINS-TEST&format=csv&ipf=4134401-TEST")), + eq(NihmsStatus.NON_COMPLIANT)); + } + +} \ No newline at end of file diff --git a/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLITest.java b/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLITest.java index 9bcd4ea2c..abfa3215b 100644 --- a/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLITest.java +++ b/pass-nihms-loader/nihms-data-harvest/src/test/java/org/eclipse/pass/loader/nihms/NihmsHarvesterCLITest.java @@ -17,7 +17,7 @@ */ package org.eclipse.pass.loader.nihms; -import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.verify; @@ -46,7 +46,7 @@ public class NihmsHarvesterCLITest { public void testHarvesterCLI() throws IOException, InterruptedException { // GIVEN/WHEN // THEN - verify(nihmsHarvester).harvest(eq(EnumSet.allOf(NihmsStatus.class)), any()); + verify(nihmsHarvester).harvest(eq(EnumSet.allOf(NihmsStatus.class)), anyInt()); verify(nihmsHarvesterDownloader).download( eq(new URL("https://www.ncbi.nlm.nih.gov/pmc/utils/pacm/c?pdt=07%2F2019&pdf=07%2F2018" + "&api-token=test-token&inst=JOHNS-HOPKINS-TEST&format=csv&ipf=4134401-TEST")), diff --git a/pass-nihms-loader/nihms-docker/entrypoint.sh b/pass-nihms-loader/nihms-docker/entrypoint.sh index 88589c857..cfc38f1dc 100644 --- a/pass-nihms-loader/nihms-docker/entrypoint.sh +++ b/pass-nihms-loader/nihms-docker/entrypoint.sh @@ -1,7 +1,7 @@ #!/bin/sh # Execute NIHMS harvest -java -jar nihms-data-harvest-cli-exec.jar +java -jar nihms-data-harvest-cli-exec.jar "$@" # Execute NIHMS transform and load into PASS java -jar nihms-data-transform-load-exec.jar \ No newline at end of file