diff --git a/docs/usage.md b/docs/usage.md index b559a523..5e8ccb04 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -121,6 +121,12 @@ Specifies the directory to write output files. Default is `dc_generated/` within current working directory. +### `-ep`, `--existence-checks-place` + +Specifies whether to perform existence checks for places found in the `observationAbout` property in StatVarObservation nodes. + +Defaults to `false`. + ### `-s`, `--stat-checks` Checks integrity of time series by checking for holes, variance in values, etc. diff --git a/tool/src/main/java/org/datacommons/tool/Args.java b/tool/src/main/java/org/datacommons/tool/Args.java index 6d1b0b1e..da394bd4 100644 --- a/tool/src/main/java/org/datacommons/tool/Args.java +++ b/tool/src/main/java/org/datacommons/tool/Args.java @@ -19,6 +19,7 @@ class Args { public int numThreads = 1; public Path outputDir = null; public boolean generateSummaryReport = true; + public boolean checkObservationAbout = false; public String toString() { StringBuilder argStr = new StringBuilder(); @@ -34,6 +35,7 @@ public String toString() { if (samplePlaces != null) { argStr.append(", sample-places=" + Strings.join(samplePlaces, ':')); } + argStr.append(", observation-about=" + checkObservationAbout); return argStr.toString(); } @@ -50,6 +52,7 @@ public Debug.CommandArgs toProto() { } argsBuilder.setStatChecks(doStatChecks); if (samplePlaces != null) argsBuilder.addAllSamplePlaces(samplePlaces); + argsBuilder.setObservationAbout(checkObservationAbout); return argsBuilder.build(); } diff --git a/tool/src/main/java/org/datacommons/tool/GenMcf.java b/tool/src/main/java/org/datacommons/tool/GenMcf.java index f09719b3..3267d7fa 100644 --- a/tool/src/main/java/org/datacommons/tool/GenMcf.java +++ b/tool/src/main/java/org/datacommons/tool/GenMcf.java @@ -74,6 +74,7 @@ public Integer call() throws IOException, TemplateException { args.outputFiles.put(type, Paths.get(parent.outputDir.getPath(), fName)); } args.generateSummaryReport = parent.generateSummaryReport; + args.checkObservationAbout = parent.checkObservationAbout; // Process all the things. return Processor.process(args); diff --git a/tool/src/main/java/org/datacommons/tool/Lint.java b/tool/src/main/java/org/datacommons/tool/Lint.java index 46202176..fcbf8147 100644 --- a/tool/src/main/java/org/datacommons/tool/Lint.java +++ b/tool/src/main/java/org/datacommons/tool/Lint.java @@ -67,6 +67,7 @@ public Integer call() throws IOException, TemplateException { args.fileGroup = FileGroup.build(files, spec, delimiter, logger); args.outputDir = parent.outputDir.toPath(); args.generateSummaryReport = parent.generateSummaryReport; + args.checkObservationAbout = parent.checkObservationAbout; return Processor.process(args); } } diff --git a/tool/src/main/java/org/datacommons/tool/Main.java b/tool/src/main/java/org/datacommons/tool/Main.java index 9390ef95..9be3edc3 100644 --- a/tool/src/main/java/org/datacommons/tool/Main.java +++ b/tool/src/main/java/org/datacommons/tool/Main.java @@ -106,6 +106,16 @@ class Main { description = "Generates a summary report in html format. Defaults to true.") public boolean generateSummaryReport; + @CommandLine.Option( + names = {"-ep", "--existence-checks-place"}, + defaultValue = "false", + scope = CommandLine.ScopeType.INHERIT, + description = + "Specifies whether to perform existence checks for places found in " + + "the `observationAbout` property in StatVarObservation nodes." + + "Defaults to true.") + public boolean checkObservationAbout; + public static void main(String... args) { System.exit( new CommandLine(new Main()).setCaseInsensitiveEnumValuesAllowed(true).execute(args)); diff --git a/tool/src/main/java/org/datacommons/tool/Processor.java b/tool/src/main/java/org/datacommons/tool/Processor.java index ec30d497..efac3341 100644 --- a/tool/src/main/java/org/datacommons/tool/Processor.java +++ b/tool/src/main/java/org/datacommons/tool/Processor.java @@ -237,7 +237,8 @@ private void processTable(File csvFile) g = McfMutator.mutate(g.toBuilder(), logCtx); // This will set counters/messages in logCtx. - boolean success = McfChecker.check(g, existenceChecker, statVarState, logCtx); + boolean success = + McfChecker.check(g, existenceChecker, statVarState, args.checkObservationAbout, logCtx); if (args.resolutionMode != Args.ResolutionMode.NONE) { g = resolveCommon(g, writerPair); diff --git a/tool/src/test/resources/org/datacommons/tool/genmcf/fataltmcf/output/report.json b/tool/src/test/resources/org/datacommons/tool/genmcf/fataltmcf/output/report.json index c3632c47..04b47242 100644 --- a/tool/src/test/resources/org/datacommons/tool/genmcf/fataltmcf/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/genmcf/fataltmcf/output/report.json @@ -116,7 +116,7 @@ "file": "FatalTmcf.tmcf", "lineNumber": "19" }, - "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E13', property: 'dcid' node: 'E:SVTest->E3'", + "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E10', property: 'dcid' node: 'E:SVTest->E3'", "counterKey": "Sanity_TmcfMissingEntityDef" }, { "level": "LEVEL_ERROR", @@ -124,16 +124,16 @@ "file": "FatalTmcf.tmcf", "lineNumber": "19" }, - "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E10', property: 'dcid' node: 'E:SVTest->E3'", - "counterKey": "Sanity_TmcfMissingEntityDef" + "userMessage": "Column referred to in TMCF is missing from CSV header :: column: 'dcid1', node: 'E:SVTest->E3'", + "counterKey": "Sanity_TmcfMissingColumn" }, { "level": "LEVEL_ERROR", "location": { "file": "FatalTmcf.tmcf", "lineNumber": "19" }, - "userMessage": "Column referred to in TMCF is missing from CSV header :: column: 'dcid1', node: 'E:SVTest->E3'", - "counterKey": "Sanity_TmcfMissingColumn" + "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E13', property: 'dcid' node: 'E:SVTest->E3'", + "counterKey": "Sanity_TmcfMissingEntityDef" }, { "level": "LEVEL_ERROR", "location": { @@ -195,6 +195,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_FULL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/genmcf/localidresolution/output/report.json b/tool/src/test/resources/org/datacommons/tool/genmcf/localidresolution/output/report.json index cc6870c2..f4b03607 100644 --- a/tool/src/test/resources/org/datacommons/tool/genmcf/localidresolution/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/genmcf/localidresolution/output/report.json @@ -55,6 +55,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_FULL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/genmcf/resolution/output/report.json b/tool/src/test/resources/org/datacommons/tool/genmcf/resolution/output/report.json index 431422ea..91f0760a 100644 --- a/tool/src/test/resources/org/datacommons/tool/genmcf/resolution/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/genmcf/resolution/output/report.json @@ -194,6 +194,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_FULL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/genmcf/statchecks/output/report.json b/tool/src/test/resources/org/datacommons/tool/genmcf/statchecks/output/report.json index b3840bde..3efcc180 100644 --- a/tool/src/test/resources/org/datacommons/tool/genmcf/statchecks/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/genmcf/statchecks/output/report.json @@ -215,6 +215,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_FULL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/genmcf/successtmcf/output/report.json b/tool/src/test/resources/org/datacommons/tool/genmcf/successtmcf/output/report.json index b2bd7918..752d4e52 100644 --- a/tool/src/test/resources/org/datacommons/tool/genmcf/successtmcf/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/genmcf/successtmcf/output/report.json @@ -710,6 +710,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_FULL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/lint/allfiletypes/output/report.json b/tool/src/test/resources/org/datacommons/tool/lint/allfiletypes/output/report.json index e40d9f7d..eb134a82 100644 --- a/tool/src/test/resources/org/datacommons/tool/lint/allfiletypes/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/lint/allfiletypes/output/report.json @@ -933,6 +933,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_LOCAL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/lint/mcfonly/output/report.json b/tool/src/test/resources/org/datacommons/tool/lint/mcfonly/output/report.json index ad5cfbf1..67b68cc5 100644 --- a/tool/src/test/resources/org/datacommons/tool/lint/mcfonly/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/lint/mcfonly/output/report.json @@ -561,6 +561,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_LOCAL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/lint/nocsv/output/report.json b/tool/src/test/resources/org/datacommons/tool/lint/nocsv/output/report.json index bb368d50..897050a4 100644 --- a/tool/src/test/resources/org/datacommons/tool/lint/nocsv/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/lint/nocsv/output/report.json @@ -583,7 +583,7 @@ "file": "NoCsv.tmcf", "lineNumber": "3" }, - "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E30', property: 'observationAbout' node: 'E:SVTest->E0'", + "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E3', property: 'observationAbout' node: 'E:SVTest->E0'", "counterKey": "Sanity_TmcfMissingEntityDef" }, { "level": "LEVEL_ERROR", @@ -591,7 +591,7 @@ "file": "NoCsv.tmcf", "lineNumber": "3" }, - "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E3', property: 'observationAbout' node: 'E:SVTest->E0'", + "userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E30', property: 'observationAbout' node: 'E:SVTest->E0'", "counterKey": "Sanity_TmcfMissingEntityDef" }, { "level": "LEVEL_ERROR", @@ -702,6 +702,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_LOCAL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/tool/src/test/resources/org/datacommons/tool/lint/statchecks/output/report.json b/tool/src/test/resources/org/datacommons/tool/lint/statchecks/output/report.json index 25fa27b7..e537f599 100644 --- a/tool/src/test/resources/org/datacommons/tool/lint/statchecks/output/report.json +++ b/tool/src/test/resources/org/datacommons/tool/lint/statchecks/output/report.json @@ -113,6 +113,7 @@ "existenceChecks": true, "resolution": "RESOLUTION_MODE_LOCAL", "numThreads": 1, - "statChecks": true + "statChecks": true, + "observationAbout": false } } \ No newline at end of file diff --git a/util/src/main/java/org/datacommons/proto/Debug.java b/util/src/main/java/org/datacommons/proto/Debug.java index 363d4f19..1bf70f59 100644 --- a/util/src/main/java/org/datacommons/proto/Debug.java +++ b/util/src/main/java/org/datacommons/proto/Debug.java @@ -5276,6 +5276,19 @@ public interface CommandArgsOrBuilder * @return The bytes of the samplePlaces at the given index. */ com.google.protobuf.ByteString getSamplePlacesBytes(int index); + + /** + * optional bool observation_about = 6; + * + * @return Whether the observationAbout field is set. + */ + boolean hasObservationAbout(); + /** + * optional bool observation_about = 6; + * + * @return The observationAbout. + */ + boolean getObservationAbout(); } /** Protobuf type {@code org.datacommons.proto.CommandArgs} */ public static final class CommandArgs extends com.google.protobuf.GeneratedMessageV3 @@ -5365,6 +5378,12 @@ private CommandArgs( samplePlaces_.add(bs); break; } + case 48: + { + bitField0_ |= 0x00000010; + observationAbout_ = input.readBool(); + break; + } default: { if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { @@ -5668,6 +5687,25 @@ public com.google.protobuf.ByteString getSamplePlacesBytes(int index) { return samplePlaces_.getByteString(index); } + public static final int OBSERVATION_ABOUT_FIELD_NUMBER = 6; + private boolean observationAbout_; + /** + * optional bool observation_about = 6; + * + * @return Whether the observationAbout field is set. + */ + public boolean hasObservationAbout() { + return ((bitField0_ & 0x00000010) != 0); + } + /** + * optional bool observation_about = 6; + * + * @return The observationAbout. + */ + public boolean getObservationAbout() { + return observationAbout_; + } + private byte memoizedIsInitialized = -1; @java.lang.Override @@ -5697,6 +5735,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io for (int i = 0; i < samplePlaces_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 5, samplePlaces_.getRaw(i)); } + if (((bitField0_ & 0x00000010) != 0)) { + output.writeBool(6, observationAbout_); + } unknownFields.writeTo(output); } @@ -5726,6 +5767,9 @@ public int getSerializedSize() { size += dataSize; size += 1 * getSamplePlacesList().size(); } + if (((bitField0_ & 0x00000010) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(6, observationAbout_); + } size += unknownFields.getSerializedSize(); memoizedSize = size; return size; @@ -5758,6 +5802,10 @@ public boolean equals(final java.lang.Object obj) { if (getStatChecks() != other.getStatChecks()) return false; } if (!getSamplePlacesList().equals(other.getSamplePlacesList())) return false; + if (hasObservationAbout() != other.hasObservationAbout()) return false; + if (hasObservationAbout()) { + if (getObservationAbout() != other.getObservationAbout()) return false; + } if (!unknownFields.equals(other.unknownFields)) return false; return true; } @@ -5789,6 +5837,10 @@ public int hashCode() { hash = (37 * hash) + SAMPLE_PLACES_FIELD_NUMBER; hash = (53 * hash) + getSamplePlacesList().hashCode(); } + if (hasObservationAbout()) { + hash = (37 * hash) + OBSERVATION_ABOUT_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getObservationAbout()); + } hash = (29 * hash) + unknownFields.hashCode(); memoizedHashCode = hash; return hash; @@ -5938,6 +5990,8 @@ public Builder clear() { bitField0_ = (bitField0_ & ~0x00000008); samplePlaces_ = com.google.protobuf.LazyStringArrayList.EMPTY; bitField0_ = (bitField0_ & ~0x00000010); + observationAbout_ = false; + bitField0_ = (bitField0_ & ~0x00000020); return this; } @@ -5988,6 +6042,10 @@ public org.datacommons.proto.Debug.CommandArgs buildPartial() { bitField0_ = (bitField0_ & ~0x00000010); } result.samplePlaces_ = samplePlaces_; + if (((from_bitField0_ & 0x00000020) != 0)) { + result.observationAbout_ = observationAbout_; + to_bitField0_ |= 0x00000010; + } result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -6062,6 +6120,9 @@ public Builder mergeFrom(org.datacommons.proto.Debug.CommandArgs other) { } onChanged(); } + if (other.hasObservationAbout()) { + setObservationAbout(other.getObservationAbout()); + } this.mergeUnknownFields(other.unknownFields); onChanged(); return this; @@ -6378,6 +6439,47 @@ public Builder addSamplePlacesBytes(com.google.protobuf.ByteString value) { return this; } + private boolean observationAbout_; + /** + * optional bool observation_about = 6; + * + * @return Whether the observationAbout field is set. + */ + public boolean hasObservationAbout() { + return ((bitField0_ & 0x00000020) != 0); + } + /** + * optional bool observation_about = 6; + * + * @return The observationAbout. + */ + public boolean getObservationAbout() { + return observationAbout_; + } + /** + * optional bool observation_about = 6; + * + * @param value The observationAbout to set. + * @return This builder for chaining. + */ + public Builder setObservationAbout(boolean value) { + bitField0_ |= 0x00000020; + observationAbout_ = value; + onChanged(); + return this; + } + /** + * optional bool observation_about = 6; + * + * @return This builder for chaining. + */ + public Builder clearObservationAbout() { + bitField0_ = (bitField0_ & ~0x00000020); + observationAbout_ = false; + onChanged(); + return this; + } + @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { @@ -12657,29 +12759,30 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + "ons.proto.Log.CounterSet:\0028\001\"c\n\005Level\022\025\n" + "\021LEVEL_UNSPECIFIED\020\000\022\016\n\nLEVEL_INFO\020\001\022\021\n\r" + "LEVEL_WARNING\020\002\022\017\n\013LEVEL_ERROR\020\003\022\017\n\013LEVE" - + "L_FATAL\020\004J\004\010\002\020\003\"\262\002\n\013CommandArgs\022\030\n\020exist" + + "L_FATAL\020\004J\004\010\002\020\003\"\315\002\n\013CommandArgs\022\030\n\020exist" + "ence_checks\030\001 \001(\010\022E\n\nresolution\030\002 \001(\01621." + "org.datacommons.proto.CommandArgs.Resolu" + "tionMode\022\023\n\013num_threads\030\003 \001(\005\022\023\n\013stat_ch" - + "ecks\030\004 \001(\010\022\025\n\rsample_places\030\005 \003(\t\"\200\001\n\016Re" - + "solutionMode\022\037\n\033RESOLUTION_MODE_UNSPECIF" - + "IED\020\000\022\030\n\024RESOLUTION_MODE_NONE\020\001\022\031\n\025RESOL" - + "UTION_MODE_LOCAL\020\002\022\030\n\024RESOLUTION_MODE_FU" - + "LL\020\003\"\251\001\n\tDataPoint\022\014\n\004date\030\001 \001(\t\022:\n\006valu" - + "es\030\002 \003(\0132*.org.datacommons.proto.DataPoi" - + "nt.DataValue\032R\n\tDataValue\022\r\n\005value\030\001 \001(\001" - + "\0226\n\tlocations\030\002 \003(\0132#.org.datacommons.pr" - + "oto.Log.Location\"\234\003\n\024StatValidationResul" - + "t\022\022\n\nplace_dcid\030\001 \001(\t\022\025\n\rstat_var_dcid\030\002" - + " \001(\t\022\032\n\022measurement_method\030\003 \001(\t\022\032\n\022obse" - + "rvation_period\030\004 \001(\t\022\026\n\016scaling_factor\030\005" - + " \001(\t\022\014\n\004unit\030\006 \001(\t\022\\\n\023validation_counter" - + "s\030\007 \003(\0132?.org.datacommons.proto.StatVali" - + "dationResult.StatValidationEntry\032\234\001\n\023Sta" - + "tValidationEntry\022\023\n\013counter_key\030\001 \001(\t\0228\n" - + "\016problem_points\030\002 \003(\0132 .org.datacommons." - + "proto.DataPoint\022\032\n\022additional_details\030\003 " - + "\001(\t\022\032\n\022percent_difference\030\004 \001(\001" + + "ecks\030\004 \001(\010\022\025\n\rsample_places\030\005 \003(\t\022\031\n\021obs" + + "ervation_about\030\006 \001(\010\"\200\001\n\016ResolutionMode\022" + + "\037\n\033RESOLUTION_MODE_UNSPECIFIED\020\000\022\030\n\024RESO" + + "LUTION_MODE_NONE\020\001\022\031\n\025RESOLUTION_MODE_LO" + + "CAL\020\002\022\030\n\024RESOLUTION_MODE_FULL\020\003\"\251\001\n\tData" + + "Point\022\014\n\004date\030\001 \001(\t\022:\n\006values\030\002 \003(\0132*.or" + + "g.datacommons.proto.DataPoint.DataValue\032" + + "R\n\tDataValue\022\r\n\005value\030\001 \001(\001\0226\n\tlocations" + + "\030\002 \003(\0132#.org.datacommons.proto.Log.Locat" + + "ion\"\234\003\n\024StatValidationResult\022\022\n\nplace_dc" + + "id\030\001 \001(\t\022\025\n\rstat_var_dcid\030\002 \001(\t\022\032\n\022measu" + + "rement_method\030\003 \001(\t\022\032\n\022observation_perio" + + "d\030\004 \001(\t\022\026\n\016scaling_factor\030\005 \001(\t\022\014\n\004unit\030" + + "\006 \001(\t\022\\\n\023validation_counters\030\007 \003(\0132?.org" + + ".datacommons.proto.StatValidationResult." + + "StatValidationEntry\032\234\001\n\023StatValidationEn" + + "try\022\023\n\013counter_key\030\001 \001(\t\0228\n\016problem_poin" + + "ts\030\002 \003(\0132 .org.datacommons.proto.DataPoi" + + "nt\022\032\n\022additional_details\030\003 \001(\t\022\032\n\022percen" + + "t_difference\030\004 \001(\001" }; descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom( @@ -12737,7 +12840,12 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_org_datacommons_proto_CommandArgs_descriptor, new java.lang.String[] { - "ExistenceChecks", "Resolution", "NumThreads", "StatChecks", "SamplePlaces", + "ExistenceChecks", + "Resolution", + "NumThreads", + "StatChecks", + "SamplePlaces", + "ObservationAbout", }); internal_static_org_datacommons_proto_DataPoint_descriptor = getDescriptor().getMessageTypes().get(2); diff --git a/util/src/main/java/org/datacommons/util/McfChecker.java b/util/src/main/java/org/datacommons/util/McfChecker.java index 23109256..b8b4e7a1 100644 --- a/util/src/main/java/org/datacommons/util/McfChecker.java +++ b/util/src/main/java/org/datacommons/util/McfChecker.java @@ -53,15 +53,28 @@ public class McfChecker { boolean nodeFailure = false; // Failure of a specific node being processed. private ExistenceChecker existenceChecker; private StatVarState svState; + boolean shouldCheckObservationAbout; // Should check for observationAbout existence on SVObs // Argument |graph| may be Instance or Template MCF. public static boolean check( Mcf.McfGraph graph, ExistenceChecker existenceChecker, StatVarState svState, + boolean shouldCheckObservationAbout, LogWrapper logCtx) throws IOException, InterruptedException { - return new McfChecker(graph, null, existenceChecker, svState, logCtx).check(); + return new McfChecker( + graph, null, existenceChecker, svState, shouldCheckObservationAbout, logCtx) + .check(); + } + + public static boolean check( + Mcf.McfGraph graph, + ExistenceChecker existenceChecker, + StatVarState svState, + LogWrapper logCtx) + throws IOException, InterruptedException { + return check(graph, existenceChecker, svState, false, logCtx); } // Used to check a single node from TMcfCsvParser. @@ -71,14 +84,14 @@ public static boolean checkNode( Mcf.McfGraph.Builder nodeGraph = Mcf.McfGraph.newBuilder(); nodeGraph.setType(mcfType); nodeGraph.putNodes(nodeId, node); - return new McfChecker(nodeGraph.build(), null, null, null, logCtx).check(); + return new McfChecker(nodeGraph.build(), null, null, null, false, logCtx).check(); } // Used with Template MCF when there are columns from CSV header. public static boolean checkTemplate( Mcf.McfGraph graph, Set columns, ExistenceChecker existenceChecker, LogWrapper logCtx) throws IOException, InterruptedException { - return new McfChecker(graph, columns, existenceChecker, null, logCtx).check(); + return new McfChecker(graph, columns, existenceChecker, null, false, logCtx).check(); } private McfChecker( @@ -86,12 +99,14 @@ private McfChecker( Set columns, ExistenceChecker existenceChecker, StatVarState svState, + boolean shouldCheckObservationAbout, LogWrapper logCtx) { this.graph = graph; this.columns = columns; this.logCtx = logCtx; this.existenceChecker = existenceChecker; this.svState = svState; + this.shouldCheckObservationAbout = shouldCheckObservationAbout; } // Returns true if there was no sanity error found. @@ -244,8 +259,9 @@ private void checkSVObs(String nodeId, Mcf.McfGraph.PropertyValues node) throws IOException, InterruptedException { checkRequiredSingleValueProp( nodeId, node, Vocabulary.STAT_VAR_OBSERVATION_TYPE, Vocabulary.VARIABLE_MEASURED); - checkRequiredSingleValueProp( - nodeId, node, Vocabulary.STAT_VAR_OBSERVATION_TYPE, Vocabulary.OBSERVATION_ABOUT); + String observationAbout = + checkRequiredSingleValueProp( + nodeId, node, Vocabulary.STAT_VAR_OBSERVATION_TYPE, Vocabulary.OBSERVATION_ABOUT); String obsDate = checkRequiredSingleValueProp( nodeId, node, Vocabulary.STAT_VAR_OBSERVATION_TYPE, Vocabulary.OBSERVATION_DATE); @@ -269,6 +285,15 @@ private void checkSVObs(String nodeId, Mcf.McfGraph.PropertyValues node) node, Vocabulary.STAT_VAR_OBSERVATION_TYPE, Vocabulary.GENERIC_VALUE); + + if (shouldCheckObservationAbout && existenceChecker != null) { + LogCb logCb = + new LogCb(logCtx, Debug.Log.Level.LEVEL_ERROR, node) + .setDetail(LogCb.PREF_KEY, Vocabulary.OBSERVATION_ABOUT) + .setDetail(LogCb.NODE_KEY, observationAbout) + .setCounterSuffix(Vocabulary.OBSERVATION_ABOUT); + existenceChecker.submitNodeCheck(observationAbout, logCb); + } } private void checkLegacyPopulation(String nodeId, Mcf.McfGraph.PropertyValues node) { diff --git a/util/src/main/proto/Debug.proto b/util/src/main/proto/Debug.proto index bba78b95..4b66d4e5 100644 --- a/util/src/main/proto/Debug.proto +++ b/util/src/main/proto/Debug.proto @@ -83,6 +83,7 @@ message CommandArgs { optional int32 num_threads = 3; optional bool stat_checks = 4; repeated string sample_places = 5; + optional bool observation_about = 6; } message DataPoint {