Skip to content

Commit

Permalink
Various Updates to StatChecker (#80)
Browse files Browse the repository at this point in the history
* various updates

* address pr comments
  • Loading branch information
chejennifer authored Sep 24, 2021
1 parent 907c4a5 commit 7529566
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 85 deletions.
2 changes: 1 addition & 1 deletion tool/src/main/java/org/datacommons/tool/WriterPair.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public void close() throws IOException {
private BufferedWriter newWriter(Args.OutputFileType type) throws IOException {
String filePath = args.outputFiles.get(type).toString();
if (csvFile != null) {
String fileSuffix = FilenameUtils.removeExtension(csvFile.getName()) + ".csv";
String fileSuffix = FilenameUtils.removeExtension(csvFile.getName()) + ".mcf";
filePath = FilenameUtils.removeExtension(filePath) + "_" + fileSuffix;
}
return new BufferedWriter(new FileWriter(filePath));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
},
"LEVEL_WARNING": {
"counters": {
"StatsCheck_MaxPercentFluctuationGreaterThan50": "1",
"Existence_MissingReference_Property": "2",
"Existence_MissingReference_typeOf": "1"
}
Expand Down Expand Up @@ -110,37 +109,6 @@
"userMessage": "Found a local ref to an unresolvable node :: ref: 'l:SVId', property: 'variableMeasured', node: 'UnresSVObsId'",
"counterKey": "Resolution_ReferenceToFailedNode_variableMeasured"
}],
"statsCheckSummary": [{
"placeDcid": "wikidataId/Q1186",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_MaxPercentFluctuationGreaterThan50",
"problemPoints": [{
"date": "2020-01-30",
"values": [{
"value": 1.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "2"
}]
}]
}, {
"date": "2020-02-02",
"values": [{
"value": 2.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "3"
}]
}]
}],
"percentDifference": 1.0
}]
}],
"commandArgs": {
"existenceChecks": true,
"resolution": "RESOLUTION_MODE_FULL",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,32 @@
DateTime,geoId,CumulativeCount_MedicalTest_ConditionCOVID_19_Positive
2020-01-02,geoId/07,1
2020-01-02,geoId/201,1
2020-01-02,geoId/211,1
2020-01-02,geoId/221,1
2020-01-02,geoId/231,1
2020-01-02,geoId/241,1
2020-01-02,geoId/25111,1
2020-01-02,geoId/26111,1
2020-01-02,geoId/27111,1
2020-01-02,geoId/28111,1
2020-01-02,geoId/29111,1
2020-01-02,geoId/3011111,1
2020-01-02,geoId/3111111,1
2020-01-30,geoId/06,1
2020-02-02,geoId/06,2
2020-02-02,geoId/07,0
2020-02-02,geoId/201,1
2020-02-02,geoId/211,1
2020-02-02,geoId/221,1
2020-02-02,geoId/231,1
2020-02-02,geoId/241,1
2020-02-02,geoId/25111,1
2020-02-02,geoId/26111,1
2020-02-02,geoId/27111,1
2020-02-02,geoId/28111,1
2020-02-02,geoId/29111,1
2020-02-02,geoId/3011111,1
2020-02-02,geoId/3111111,1
2020-02-03,geoId/06,3
2020-02-03,geoId/06,8
2020-03-02,geoId/06,1
Expand All @@ -20,4 +44,16 @@ DateTime,geoId,CumulativeCount_MedicalTest_ConditionCOVID_19_Positive
2020-04-02,geoId/07,112
2020-05-02,geoId/07,5
2020-05-03,geoId/06,8
2020-06-02,geoId/07,275
2020-06-02,geoId/07,275
2020-06-02,geoId/201,1
2020-06-02,geoId/211,1
2020-06-02,geoId/221,1
2020-06-02,geoId/231,1
2020-06-02,geoId/241,1
2020-06-02,geoId/25111,1
2020-06-02,geoId/26111,1
2020-06-02,geoId/27111,1
2020-06-02,geoId/28111,1
2020-06-02,geoId/29111,1
2020-06-02,geoId/3011111,1
2020-06-02,geoId/3111111,1
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
"levelSummary": {
"LEVEL_INFO": {
"counters": {
"NumRowSuccesses": "21",
"NumPVSuccesses": "147",
"Existence_NumChecks": "132",
"NumNodeSuccesses": "21",
"NumRowSuccesses": "57",
"NumPVSuccesses": "399",
"Existence_NumChecks": "348",
"NumNodeSuccesses": "57",
"Existence_NumDcCalls": "1"
}
},
Expand All @@ -14,7 +14,7 @@
"StatsCheck_Inconsistent_Date_Granularity": "1",
"StatsCheck_Inconsistent_Values": "1",
"StatsCheck_MaxPercentFluctuationGreaterThan500": "1",
"StatsCheck_Data_Holes": "1"
"StatsCheck_Data_Holes": "13"
}
},
"LEVEL_ERROR": {
Expand All @@ -27,7 +27,7 @@
"level": "LEVEL_ERROR",
"location": {
"file": "covid.csv",
"lineNumber": "7"
"lineNumber": "31"
},
"userMessage": "Found nodes with different values for the same StatVarObservation :: observationAbout: 'geoId/06', variableMeasured: 'CumulativeCount_MedicalTest_ConditionCOVID_19_Positive', observationDate: '2020-02-03', value1: 3.0, value2: 8.0",
"counterKey": "Sanity_InconsistentSvObsValues"
Expand All @@ -47,19 +47,30 @@
"value": 3.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "6"
"lineNumber": "30"
}]
}, {
"value": 8.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "7"
"lineNumber": "31"
}]
}]
}]
}, {
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Data hole found between the dates: 2020-03-03T00:00 and 2020-05-03T00:00"
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-30, 2020-02-02, 2020-02-03, 2020-03-02, 2020-03-03, 2020-05-03"
}]
}, {
"placeDcid": "geoId/3111111",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/0601",
Expand All @@ -76,11 +87,99 @@
"value": 1.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "16"
"lineNumber": "40"
}]
}]
}]
}]
}, {
"placeDcid": "geoId/25111",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/28111",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/231",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/29111",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/221",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/241",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/211",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/26111",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
"measurementMethod": "",
"observationPeriod": "",
"scalingFactor": "",
"unit": "",
"validationCounters": [{
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-02, 2020-02-02, 2020-06-02"
}]
}, {
"placeDcid": "geoId/07",
"statVarDcid": "CumulativeCount_MedicalTest_ConditionCOVID_19_Positive",
Expand All @@ -96,7 +195,7 @@
"value": 0.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "5"
"lineNumber": "17"
}]
}]
}, {
Expand All @@ -105,11 +204,11 @@
"value": 1.0,
"locations": [{
"file": "covid.csv",
"lineNumber": "10"
"lineNumber": "34"
}]
}]
}],
"percentDifference": 1000000.0
"percentDifference": 1.0E8
}]
}],
"commandArgs": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
}]
}, {
"counterKey": "StatsCheck_Data_Holes",
"additionalDetails": "Data hole found between the dates: 2020-03-03T00:00 and 2020-05-03T00:00"
"additionalDetails": "Possible data hole found. Dates in this series: 2020-01-30, 2020-02-02, 2020-02-03, 2020-03-02, 2020-03-03, 2020-05-03"
}]
}, {
"placeDcid": "geoId/0601",
Expand Down Expand Up @@ -106,7 +106,7 @@
}]
}]
}],
"percentDifference": 1.3333333333333333
"percentDifference": 133.33
}]
}],
"commandArgs": {
Expand Down
Loading

0 comments on commit 7529566

Please sign in to comment.