Skip to content

Commit

Permalink
Allow External ID Resolution via local MCF files (#133)
Browse files Browse the repository at this point in the history
  • Loading branch information
enjoythecode authored Jun 30, 2022
1 parent a840834 commit 088c09b
Show file tree
Hide file tree
Showing 12 changed files with 653 additions and 26 deletions.
3 changes: 3 additions & 0 deletions tool/src/main/java/org/datacommons/tool/Processor.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ private void processNodes(Mcf.McfType type, File file)
while ((n = parser.parseNextNode()) != null) {
n = McfMutator.mutate(n.toBuilder(), logCtx);

if (idResolver != null && type == Mcf.McfType.INSTANCE_MCF) {
idResolver.addLocalGraph(n);
}
if (existenceChecker != null && type == Mcf.McfType.INSTANCE_MCF) {
// Add instance MCF nodes to ExistenceChecker. We load all the nodes up first
// before we check them later in checkNodes().
Expand Down
3 changes: 2 additions & 1 deletion tool/src/test/java/org/datacommons/tool/GenMcfTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ public class GenMcfTest {
"fataltmcf", 2,
"resolution", 5,
"statchecks", 3,
"successtmcf", 3);
"successtmcf", 3,
"localidresolution", 5);

@Test
public void GenMcfTest() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DateTime,isoCode,wikidataId,CumulativeCount_MedicalTest_ConditionCOVID_19_Positive
2020-01-01,pseudoIsoCodeSimple,pseudoWikidataIdSimple,1
2020-01-02,pseudoIsoCodeDiverging,pseudoWikidataIdDiverging,2
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Node: E:COVID19_cases_india->E0
typeOf: dcs:StatVarObservation
variableMeasured: dcs:CumulativeCount_MedicalTest_ConditionCOVID_19_Positive
observationAbout: E:COVID19_cases_india->E1
observationDate: C:COVID19_cases_india->DateTime
value: C:COVID19_cases_india->CumulativeCount_MedicalTest_ConditionCOVID_19_Positive

Node: E:COVID19_cases_india->E1
typeOf: schema:Place
isoCode: C:COVID19_cases_india->isoCode
wikidataId: C:COVID19_cases_india->wikidataId

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# 1. Simple local resolution of external isoCode of type Place
Node: dcid:pseudoPlaceSimple
typeOf: schema:Place
isoCode: pseudoIsoCodeSimple
wikidataId: pseudoWikidataIdSimple

# 2. Local resolution that should give an error due to diverging external IDs
## isoCode
Node: dcid:pseudoPlaceDivergent_AAA
typeOf: schema:Place
isoCode: pseudoIsoCodeDiverging

## wikidataId
Node: dcid:pseudoPlaceDivergent_BBB
typeOf: schema:Place
wikidataId: pseudoWikidataIdDiverging
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# From covid.csv:2
# Error: Failed to assign DCID :: type: 'Place', node: 'COVID19_cases_india/E1/1'
Node: COVID19_cases_india/E1/1
isoCode: "pseudoIsoCodeDiverging"
wikidataId: "pseudoWikidataIdDiverging"
typeOf: dcid:Place

# From covid.csv:2
# Error: Unable to assign DCID due to unresolved local reference :: ref: 'COVID19_cases_india/E1/1', node: 'COVID19_cases_india/E0/1'
Node: COVID19_cases_india/E0/1
observationDate: "2020-01-02"
observationAbout: l:COVID19_cases_india/E1/1
variableMeasured: dcid:CumulativeCount_MedicalTest_ConditionCOVID_19_Positive
value: 2
typeOf: dcid:StatVarObservation

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Node: dcid:pseudoPlaceSimple
dcid: "pseudoPlaceSimple"
isoCode: "pseudoIsoCodeSimple"
wikidataId: "pseudoWikidataIdSimple"
typeOf: dcid:Place

Node: dcid:pseudoPlaceDivergent_BBB
dcid: "pseudoPlaceDivergent_BBB"
wikidataId: "pseudoWikidataIdDiverging"
typeOf: dcid:Place

Node: dcid:pseudoPlaceDivergent_AAA
dcid: "pseudoPlaceDivergent_AAA"
isoCode: "pseudoIsoCodeDiverging"
typeOf: dcid:Place

Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"levelSummary": {
"LEVEL_INFO": {
"counters": {
"NumRowSuccesses": "2",
"NumPVSuccesses": "11",
"Existence_NumChecks": "33",
"NumNodeSuccesses": "2",
"Existence_NumDcCalls": "2"
}
},
"LEVEL_ERROR": {
"counters": {
"Resolution_DivergingDcidsForExternalIds_isoCode_wikidataId": "1",
"Resolution_IrreplaceableLocalRef": "1",
"Resolution_DcidAssignmentFailure_Place": "1",
"Resolution_UnassignableNodeDcid": "1"
}
}
},
"entries": [{
"level": "LEVEL_ERROR",
"location": {
"file": "covid.csv",
"lineNumber": "2"
},
"userMessage": "Found diverging DCIDs for external IDs :: extId1: 'pseudoIsoCodeDiverging, dcid1: 'pseudoPlaceDivergent_AAA', property1: 'isoCode, extId2: 'pseudoWikidataIdDiverging', dcid2:pseudoPlaceDivergent_BBB, property2: 'wikidataId', node: 'COVID19_cases_india/E1/1'",
"counterKey": "Resolution_DivergingDcidsForExternalIds_isoCode_wikidataId"
}, {
"level": "LEVEL_ERROR",
"location": {
"file": "covid.csv",
"lineNumber": "2"
},
"userMessage": "Failed to assign DCID :: type: 'Place', node: 'COVID19_cases_india/E1/1'",
"counterKey": "Resolution_DcidAssignmentFailure_Place"
}, {
"level": "LEVEL_ERROR",
"location": {
"file": "covid.csv",
"lineNumber": "2"
},
"userMessage": "Unable to replace a local reference :: ref: 'COVID19_cases_india/E1/1', node: 'COVID19_cases_india/E0/1'",
"counterKey": "Resolution_IrreplaceableLocalRef"
}, {
"level": "LEVEL_ERROR",
"location": {
"file": "covid.csv",
"lineNumber": "2"
},
"userMessage": "Unable to assign DCID due to unresolved local reference :: ref: 'COVID19_cases_india/E1/1', node: 'COVID19_cases_india/E0/1'",
"counterKey": "Resolution_UnassignableNodeDcid"
}],
"commandArgs": {
"existenceChecks": true,
"resolution": "RESOLUTION_MODE_FULL",
"numThreads": 1,
"statChecks": true
}
}
Loading

0 comments on commit 088c09b

Please sign in to comment.