From a5f2729da6de097e50c48e39fc294b4c218aa572 Mon Sep 17 00:00:00 2001 From: Karen Hanson Date: Tue, 19 Jun 2018 16:57:23 -0400 Subject: [PATCH] Improvements to grant matching, RepositoryCopy status control (#18) * To improve matching of awardNumber to NIHMS data, anything after a "-" is removed * Ensure you can't switch a RepositoryCopy.copyStatus back from COMPLETE. This showed up when 2 pmids were assigned to the same DOI with one compliant and the non-compliant. It tried to switch the copyStatus to null. There is now a warning and a COMPLETE RepositoryCopy cannot be undone. * Fix an error message that cites the record number instead of CSV row number * Increment to version 1.2.3 --- entrez-pmid-lookup/pom.xml | 2 +- nihms-data-harvest-cli/pom.xml | 2 +- nihms-data-harvest/pom.xml | 2 +- nihms-data-transform-load-cli/pom.xml | 2 +- nihms-data-transform-load/pom.xml | 2 +- .../pass/loader/nihms/NihmsCsvProcessor.java | 2 +- .../nihms/NihmsPublicationToSubmission.java | 15 +++++++++++---- .../loader/nihms/NihmsRepoCopyStatusTest.java | 12 ++++++------ nihms-etl-integration/pom.xml | 2 +- nihms-etl-model/pom.xml | 2 +- nihms-etl-util/pom.xml | 2 +- nihms-pass-client/pom.xml | 2 +- .../pass/client/nihms/NihmsPassClientService.java | 6 ++++++ pom.xml | 2 +- 14 files changed, 34 insertions(+), 21 deletions(-) diff --git a/entrez-pmid-lookup/pom.xml b/entrez-pmid-lookup/pom.xml index 676105d..6b322af 100644 --- a/entrez-pmid-lookup/pom.xml +++ b/entrez-pmid-lookup/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 entrez-pmid-lookup diff --git a/nihms-data-harvest-cli/pom.xml b/nihms-data-harvest-cli/pom.xml index 31b315f..b87cb79 100644 --- a/nihms-data-harvest-cli/pom.xml +++ b/nihms-data-harvest-cli/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-data-harvest-cli jar diff --git a/nihms-data-harvest/pom.xml b/nihms-data-harvest/pom.xml index 9013300..3a42f16 100644 --- a/nihms-data-harvest/pom.xml +++ b/nihms-data-harvest/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-data-harvest diff --git a/nihms-data-transform-load-cli/pom.xml b/nihms-data-transform-load-cli/pom.xml index 163a518..5bc1236 100644 --- a/nihms-data-transform-load-cli/pom.xml +++ b/nihms-data-transform-load-cli/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-data-transform-load-cli NIHMS Data Transform/Load Command Line Interface diff --git a/nihms-data-transform-load/pom.xml b/nihms-data-transform-load/pom.xml index f950686..0fb8b98 100644 --- a/nihms-data-transform-load/pom.xml +++ b/nihms-data-transform-load/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-data-transform-load NIHMS Data Transform/Load diff --git a/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsCsvProcessor.java b/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsCsvProcessor.java index 076e107..c74acc5 100644 --- a/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsCsvProcessor.java +++ b/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsCsvProcessor.java @@ -160,7 +160,7 @@ private void consumeRow(CSVRecord row, Consumer pubConsumer) { } catch (Exception ex) { failCount = failCount + 1; - LOG.error("A problem occurred while processing csv row {} with pmid {}. The record was not imported successfully.", recCount, pub.getPmid(), ex); + LOG.error("A problem occurred while processing csv row {} with pmid {}. The record was not imported successfully.", recCount+1, pub.getPmid(), ex); } } diff --git a/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsPublicationToSubmission.java b/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsPublicationToSubmission.java index 7122553..e7a5b9a 100644 --- a/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsPublicationToSubmission.java +++ b/nihms-data-transform-load/src/main/java/org/dataconservancy/pass/loader/nihms/NihmsPublicationToSubmission.java @@ -260,12 +260,13 @@ private RepositoryCopy retrieveOrCreateRepositoryCopy(NihmsPublication pub, URI } repoCopy.setExternalIds(externalIds); - //check if copystatus changed + //check if copystatus changed, but do not change from completed to something else, this indicates a data issue. CopyStatus copyStatus = repoCopy.getCopyStatus(); CopyStatus newCopyStatus = calcRepoCopyStatus(pub, copyStatus); - if (!copyStatus.equals(newCopyStatus)) { + + if (copyStatus==null || !copyStatus.equals(newCopyStatus)) { repoCopy.setCopyStatus(newCopyStatus); - submissionDTO.setUpdateRepositoryCopy(true); + submissionDTO.setUpdateRepositoryCopy(true); } } return repoCopy; @@ -420,6 +421,13 @@ public static CopyStatus calcRepoCopyStatus(NihmsPublication pub, CopyStatus cur if (pub.getNihmsStatus().equals(NihmsStatus.COMPLIANT)) { return CopyStatus.COMPLETE; } + //do not let the status go back from COMPLETE - once it is COMPLETE any attempt to change status back is probably bad data + if (currCopyStatus!=null && currCopyStatus.equals(CopyStatus.COMPLETE)) { + LOG.warn("A NIHMS record for a publication with PMID {} is attempting to change the status of a COMPLETED Repository Copy. " + + "This may be due to two PMIDs being assigned to a single DOI, so the data should be checked. The status will not " + + "be changed.", pub.getPmid()); + return CopyStatus.COMPLETE; + } CopyStatus newStatus = null; @@ -435,7 +443,6 @@ public static CopyStatus calcRepoCopyStatus(NihmsPublication pub, CopyStatus cur // NIHMS but log a warning if (currCopyStatus!=null) { if (newStatus==null - || currCopyStatus.equals(CopyStatus.COMPLETE) || (currCopyStatus.equals(CopyStatus.IN_PROGRESS) && newStatus.equals(CopyStatus.ACCEPTED))) { LOG.warn("The status of the RepositoryCopy in PASS was at a later stage than the current NIHMS status would imply. " + "Rolled back from \"{}\" to \"{}\" for pmid {}", currCopyStatus.toString(), (newStatus==null ? "(null)" : newStatus.toString()), pub.getPmid()); diff --git a/nihms-data-transform-load/src/test/java/org/dataconservancy/pass/loader/nihms/NihmsRepoCopyStatusTest.java b/nihms-data-transform-load/src/test/java/org/dataconservancy/pass/loader/nihms/NihmsRepoCopyStatusTest.java index 1665599..daddcec 100644 --- a/nihms-data-transform-load/src/test/java/org/dataconservancy/pass/loader/nihms/NihmsRepoCopyStatusTest.java +++ b/nihms-data-transform-load/src/test/java/org/dataconservancy/pass/loader/nihms/NihmsRepoCopyStatusTest.java @@ -80,10 +80,10 @@ public void testCalcCopyStatusAccepted() { CopyStatus status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, null); assertEquals(CopyStatus.ACCEPTED, status); - //status has gone out of alignment with PASS - PASS status is saying complete. This should roll back - //the status to accepted and log a warning. + //status has gone out of alignment with PASS - PASS status is saying complete. This should not roll back + //the status to accepted and it should log a warning that something is attempting to take status out of complete. status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, CopyStatus.COMPLETE); - assertEquals(CopyStatus.ACCEPTED, status); + assertEquals(CopyStatus.COMPLETE, status); //it was accepted, and is still accepted status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, CopyStatus.ACCEPTED); @@ -104,9 +104,9 @@ public void testCalcCopyStatusStalled() { CopyStatus status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, null); assertEquals(CopyStatus.STALLED, status); - //should be stalled even if was previously complete + //if previous status was complete, should be complete regardless status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, CopyStatus.COMPLETE); - assertEquals(CopyStatus.STALLED, status); + assertEquals(CopyStatus.COMPLETE, status); } @@ -129,7 +129,7 @@ public void testCalcCopyStatusInProgress() { //status has gone out of alignment with PASS - PASS is ahead sometime. This should roll back //the status to received and log a warning. status = NihmsPublicationToSubmission.calcRepoCopyStatus(pub, CopyStatus.COMPLETE); - assertEquals(CopyStatus.IN_PROGRESS, status); + assertEquals(CopyStatus.COMPLETE, status); //this time, the submission has been tagged since it was accepted. pub = newTestPub(); diff --git a/nihms-etl-integration/pom.xml b/nihms-etl-integration/pom.xml index 84fbbef..f8e8157 100644 --- a/nihms-etl-integration/pom.xml +++ b/nihms-etl-integration/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-etl-integration NIHMS ELT Integration Tests diff --git a/nihms-etl-model/pom.xml b/nihms-etl-model/pom.xml index 34a7574..beefdac 100644 --- a/nihms-etl-model/pom.xml +++ b/nihms-etl-model/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-etl-model diff --git a/nihms-etl-util/pom.xml b/nihms-etl-util/pom.xml index b2633fa..b74f9da 100644 --- a/nihms-etl-util/pom.xml +++ b/nihms-etl-util/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-etl-util NIHMS ETL Utilities diff --git a/nihms-pass-client/pom.xml b/nihms-pass-client/pom.xml index 59b44db..891d938 100644 --- a/nihms-pass-client/pom.xml +++ b/nihms-pass-client/pom.xml @@ -5,7 +5,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 nihms-pass-client diff --git a/nihms-pass-client/src/main/java/org/dataconservancy/pass/client/nihms/NihmsPassClientService.java b/nihms-pass-client/src/main/java/org/dataconservancy/pass/client/nihms/NihmsPassClientService.java index 8fe604a..2f5b198 100644 --- a/nihms-pass-client/src/main/java/org/dataconservancy/pass/client/nihms/NihmsPassClientService.java +++ b/nihms-pass-client/src/main/java/org/dataconservancy/pass/client/nihms/NihmsPassClientService.java @@ -143,6 +143,12 @@ public Grant findMostRecentGrantByAwardNumber(String awardNumber) { if (!awardNumber.equals(modAwardNum)) { grantIds.addAll(client.findAllByAttribute(Grant.class, AWARD_NUMBER_FLD, modAwardNum)); } + + //if there is a "-##" at the end of the award number, remove it and search again + if (modAwardNum.contains("-") && modAwardNum.indexOf("-") > 9) { + modAwardNum = modAwardNum.substring(0, modAwardNum.indexOf("-")); + grantIds.addAll(client.findAllByAttribute(Grant.class, AWARD_NUMBER_FLD, modAwardNum)); + } List grants = new ArrayList(); for (URI id : grantIds) { diff --git a/pom.xml b/pom.xml index c550fcf..d656b8f 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.dataconservancy.pass pass-nihms-submission-etl - 1.2.2 + 1.2.3 pom PASS NIHMS Submission Extract-Transform-Load