From c39d2b4b211464dd2392530d59d22ca4fe6642bc Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Fri, 21 Jul 2023 17:00:01 -0700 Subject: [PATCH 1/6] Create a resolver to resolve nodes with coordinates. --- .../datacommons/util/CoordinatesResolver.java | 112 ++++++++++++++++++ .../util/CoordinatesResolverTest.java | 57 +++++++++ 2 files changed, 169 insertions(+) create mode 100644 util/src/main/java/org/datacommons/util/CoordinatesResolver.java create mode 100644 util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java diff --git a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java new file mode 100644 index 00000000..674bf2be --- /dev/null +++ b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java @@ -0,0 +1,112 @@ +package org.datacommons.util; + +import java.io.IOException; +import java.util.LinkedHashSet; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.McfGraph.Values; +import org.datacommons.proto.Mcf.ValueType; +import org.datacommons.proto.Recon.ResolveCoordinatesRequest; +import org.datacommons.proto.Recon.ResolveCoordinatesRequest.Coordinate; +import org.datacommons.proto.Recon.ResolveCoordinatesResponse; + +/** Resolves nodes with lat-lngs by calling the DC coordinates resolution API. */ +public class CoordinatesResolver { + private final AtomicBoolean resolved = new AtomicBoolean(false); + + private final Set resolveCoordinates = ConcurrentHashMap.newKeySet(); + + private final ConcurrentHashMap> resolvedCoordinates = + new ConcurrentHashMap<>(); + + private final ReconClient client; + + public CoordinatesResolver(ReconClient client) { + this.client = client; + } + + public void submitNode(PropertyValues node) { + if (resolved.get()) { + throw new IllegalStateException("submitNode called after remote resolution."); + } + getCoordinate(node).ifPresent(resolveCoordinates::add); + } + + // TODO: Pick the ID based on a preferred list. + public String resolveNode(PropertyValues node) { + return getCoordinate(node) + .filter(resolvedCoordinates::containsKey) + .flatMap(coordinate -> resolvedCoordinates.get(coordinate).stream().findFirst()) + .orElse(""); + } + + // TODO: Support chunking in batches of max size 500. + public void remoteResolve() throws IOException, InterruptedException { + if (resolved.get()) { + throw new IllegalStateException("remoteResolve called after remote resolution."); + } + resolved.set(true); + + if (resolveCoordinates.isEmpty()) { + return; + } + + ResolveCoordinatesRequest request = + ResolveCoordinatesRequest.newBuilder().addAllCoordinates(resolveCoordinates).build(); + ResolveCoordinatesResponse response = client.resolveCoordinates(request); + populateResolvedCandidates(response); + } + + boolean isResolved() { + return resolved.get(); + } + + private void populateResolvedCandidates(ResolveCoordinatesResponse response) { + response + .getPlaceCoordinatesList() + .forEach( + placeCoordinate -> { + if (placeCoordinate.getPlaceDcidsCount() > 0) { + resolvedCoordinates.put( + Coordinate.newBuilder() + .setLatitude(placeCoordinate.getLatitude()) + .setLongitude(placeCoordinate.getLongitude()) + .build(), + new LinkedHashSet<>(placeCoordinate.getPlaceDcidsList())); + } + }); + } + + private static Optional getCoordinate(PropertyValues node) { + if (node.containsPvs(Vocabulary.LATITUDE) && node.containsPvs(Vocabulary.LONGITUDE)) { + + Optional optLat = getDoubleValue(node.getPvsMap().get(Vocabulary.LATITUDE)); + Optional optLng = getDoubleValue(node.getPvsMap().get(Vocabulary.LONGITUDE)); + + if (optLat.isPresent() && optLng.isPresent()) { + double lat = optLat.get(); + double lng = optLng.get(); + + if (!Double.isNaN(lat) && !Double.isNaN(lng)) { + return Optional.of(Coordinate.newBuilder().setLatitude(lat).setLongitude(lng).build()); + } + } + } + + return Optional.empty(); + } + + // TODO: Add support for other formats of values (e.g. 12.34N, 45.56W, etc.) + private static Optional getDoubleValue(Values prop) { + return prop.getTypedValuesList().stream() + .filter( + typedValue -> + ValueType.NUMBER.equals(typedValue.getType()) + || ValueType.TEXT.equals(typedValue.getType())) + .findFirst() + .map(typedValue -> Double.parseDouble(typedValue.getValue())); + } +} diff --git a/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java new file mode 100644 index 00000000..da6fa540 --- /dev/null +++ b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java @@ -0,0 +1,57 @@ +package org.datacommons.util; + +import static com.google.common.truth.Truth.assertThat; +import static java.net.http.HttpClient.newHttpClient; +import static org.datacommons.util.Vocabulary.LATITUDE; +import static org.datacommons.util.Vocabulary.LONGITUDE; + +import java.util.List; +import java.util.Map; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.ValueType; +import org.junit.Test; + +public class CoordinatesResolverTest { + private static final PropertyValues SF = + newNode("City", Map.of(LATITUDE, "37.77493", LONGITUDE, "-122.41942")); + private static final String SF_ZIP_DCID = "zip/94103"; + + private static final PropertyValues BIG_BEN = + newNode("Place", Map.of(LATITUDE, "51.510357", LONGITUDE, "-0.116773")); + private static final String BIG_BEN_NUTS_DCID = "nuts/UKI32"; + + private static final PropertyValues UNSUBMITTED_NODE = + newNode("City", Map.of(LATITUDE, "12.34", LONGITUDE, "56.78")); + + private static final List TEST_NODES = List.of(SF, BIG_BEN); + + @Test + public void endToEnd() throws Exception { + CoordinatesResolver resolver = new CoordinatesResolver(new ReconClient(newHttpClient())); + + assertThat(resolver.isResolved()).isFalse(); + + for (PropertyValues node : TEST_NODES) { + resolver.submitNode(node); + } + + assertThat(resolver.isResolved()).isFalse(); + + resolver.remoteResolve(); + + assertThat(resolver.isResolved()).isTrue(); + + assertThat(resolver.resolveNode(SF)).isEqualTo(SF_ZIP_DCID); + assertThat(resolver.resolveNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); + assertThat(resolver.resolveNode(UNSUBMITTED_NODE)).isEmpty(); + } + + private static PropertyValues newNode(String typeOf, Map props) { + PropertyValues.Builder node = PropertyValues.newBuilder(); + node.putPvs(Vocabulary.TYPE_OF, McfUtil.newValues(ValueType.RESOLVED_REF, typeOf)); + for (var pv : props.entrySet()) { + node.putPvs(pv.getKey(), McfUtil.newValues(ValueType.TEXT, pv.getValue())); + } + return node.build(); + } +} From 1429921056abc8b38b9f3c0feb87279ba1b851f9 Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Wed, 2 Aug 2023 12:53:04 -0700 Subject: [PATCH 2/6] Resolve chunks of 500 in parallel. --- .../java/org/datacommons/proto/Recon.java | 1164 ++++++++++++++++- .../datacommons/util/CoordinatesResolver.java | 73 +- .../org/datacommons/util/ReconClient.java | 29 +- util/src/main/proto/recon.proto | 87 +- .../util/CoordinatesResolverTest.java | 29 +- .../org/datacommons/util/ReconClientTest.java | 2 +- 6 files changed, 1302 insertions(+), 82 deletions(-) diff --git a/util/src/main/java/org/datacommons/proto/Recon.java b/util/src/main/java/org/datacommons/proto/Recon.java index adf1a100..134f1ed0 100644 --- a/util/src/main/java/org/datacommons/proto/Recon.java +++ b/util/src/main/java/org/datacommons/proto/Recon.java @@ -11439,6 +11439,743 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { org.datacommons.proto.Recon.ResolveCoordinatesResponse.Builder.class); } + public interface PlaceOrBuilder + extends + // @@protoc_insertion_point(interface_extends:org.datacommons.proto.ResolveCoordinatesResponse.Place) + com.google.protobuf.MessageOrBuilder { + + /** + * string dcid = 1; + * + * @return The dcid. + */ + java.lang.String getDcid(); + /** + * string dcid = 1; + * + * @return The bytes for dcid. + */ + com.google.protobuf.ByteString getDcidBytes(); + + /** + * string dominant_type = 2; + * + * @return The dominantType. + */ + java.lang.String getDominantType(); + /** + * string dominant_type = 2; + * + * @return The bytes for dominantType. + */ + com.google.protobuf.ByteString getDominantTypeBytes(); + } + /** Protobuf type {@code org.datacommons.proto.ResolveCoordinatesResponse.Place} */ + public static final class Place extends com.google.protobuf.GeneratedMessageV3 + implements + // @@protoc_insertion_point(message_implements:org.datacommons.proto.ResolveCoordinatesResponse.Place) + PlaceOrBuilder { + private static final long serialVersionUID = 0L; + // Use Place.newBuilder() to construct. + private Place(com.google.protobuf.GeneratedMessageV3.Builder builder) { + super(builder); + } + + private Place() { + dcid_ = ""; + dominantType_ = ""; + } + + @java.lang.Override + @SuppressWarnings({"unused"}) + protected java.lang.Object newInstance(UnusedPrivateParameter unused) { + return new Place(); + } + + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet getUnknownFields() { + return this.unknownFields; + } + + private Place( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + this(); + if (extensionRegistry == null) { + throw new java.lang.NullPointerException(); + } + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + case 10: + { + java.lang.String s = input.readStringRequireUtf8(); + + dcid_ = s; + break; + } + case 18: + { + java.lang.String s = input.readStringRequireUtf8(); + + dominantType_ = s; + break; + } + default: + { + if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { + done = true; + } + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException(e) + .setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return org.datacommons.proto.Recon + .internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor; + } + + @java.lang.Override + protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.datacommons.proto.Recon + .internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.class, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder.class); + } + + public static final int DCID_FIELD_NUMBER = 1; + private volatile java.lang.Object dcid_; + /** + * string dcid = 1; + * + * @return The dcid. + */ + public java.lang.String getDcid() { + java.lang.Object ref = dcid_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + dcid_ = s; + return s; + } + } + /** + * string dcid = 1; + * + * @return The bytes for dcid. + */ + public com.google.protobuf.ByteString getDcidBytes() { + java.lang.Object ref = dcid_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + dcid_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int DOMINANT_TYPE_FIELD_NUMBER = 2; + private volatile java.lang.Object dominantType_; + /** + * string dominant_type = 2; + * + * @return The dominantType. + */ + public java.lang.String getDominantType() { + java.lang.Object ref = dominantType_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + dominantType_ = s; + return s; + } + } + /** + * string dominant_type = 2; + * + * @return The bytes for dominantType. + */ + public com.google.protobuf.ByteString getDominantTypeBytes() { + java.lang.Object ref = dominantType_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + dominantType_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private byte memoizedIsInitialized = -1; + + @java.lang.Override + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized == 1) return true; + if (isInitialized == 0) return false; + + memoizedIsInitialized = 1; + return true; + } + + @java.lang.Override + public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { + if (!getDcidBytes().isEmpty()) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 1, dcid_); + } + if (!getDominantTypeBytes().isEmpty()) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 2, dominantType_); + } + unknownFields.writeTo(output); + } + + @java.lang.Override + public int getSerializedSize() { + int size = memoizedSize; + if (size != -1) return size; + + size = 0; + if (!getDcidBytes().isEmpty()) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, dcid_); + } + if (!getDominantTypeBytes().isEmpty()) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, dominantType_); + } + size += unknownFields.getSerializedSize(); + memoizedSize = size; + return size; + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place)) { + return super.equals(obj); + } + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place other = + (org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place) obj; + + if (!getDcid().equals(other.getDcid())) return false; + if (!getDominantType().equals(other.getDominantType())) return false; + if (!unknownFields.equals(other.unknownFields)) return false; + return true; + } + + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptor().hashCode(); + hash = (37 * hash) + DCID_FIELD_NUMBER; + hash = (53 * hash) + getDcid().hashCode(); + hash = (37 * hash) + DOMINANT_TYPE_FIELD_NUMBER; + hash = (53 * hash) + getDominantType().hashCode(); + hash = (29 * hash) + unknownFields.hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + java.io.InputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseDelimitedFrom( + java.io.InputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException(PARSER, input); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseDelimitedFrom( + java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input, extensionRegistry); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + com.google.protobuf.CodedInputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + @java.lang.Override + public Builder newBuilderForType() { + return newBuilder(); + } + + public static Builder newBuilder() { + return DEFAULT_INSTANCE.toBuilder(); + } + + public static Builder newBuilder( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place prototype) { + return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); + } + + @java.lang.Override + public Builder toBuilder() { + return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); + } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** Protobuf type {@code org.datacommons.proto.ResolveCoordinatesResponse.Place} */ + public static final class Builder + extends com.google.protobuf.GeneratedMessageV3.Builder + implements + // @@protoc_insertion_point(builder_implements:org.datacommons.proto.ResolveCoordinatesResponse.Place) + org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return org.datacommons.proto.Recon + .internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor; + } + + @java.lang.Override + protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.datacommons.proto.Recon + .internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.class, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder.class); + } + + // Construct using org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) {} + } + + @java.lang.Override + public Builder clear() { + super.clear(); + dcid_ = ""; + + dominantType_ = ""; + + return this; + } + + @java.lang.Override + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { + return org.datacommons.proto.Recon + .internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor; + } + + @java.lang.Override + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + getDefaultInstanceForType() { + return org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.getDefaultInstance(); + } + + @java.lang.Override + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place build() { + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + @java.lang.Override + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place buildPartial() { + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place result = + new org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place(this); + result.dcid_ = dcid_; + result.dominantType_ = dominantType_; + onBuilt(); + return result; + } + + @java.lang.Override + public Builder clone() { + return super.clone(); + } + + @java.lang.Override + public Builder setField( + com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { + return super.setField(field, value); + } + + @java.lang.Override + public Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field) { + return super.clearField(field); + } + + @java.lang.Override + public Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof) { + return super.clearOneof(oneof); + } + + @java.lang.Override + public Builder setRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, + int index, + java.lang.Object value) { + return super.setRepeatedField(field, index, value); + } + + @java.lang.Override + public Builder addRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { + return super.addRepeatedField(field, value); + } + + @java.lang.Override + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place) { + return mergeFrom((org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place) other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place other) { + if (other + == org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.getDefaultInstance()) + return this; + if (!other.getDcid().isEmpty()) { + dcid_ = other.dcid_; + onChanged(); + } + if (!other.getDominantType().isEmpty()) { + dominantType_ = other.dominantType_; + onChanged(); + } + this.mergeUnknownFields(other.unknownFields); + onChanged(); + return this; + } + + @java.lang.Override + public final boolean isInitialized() { + return true; + } + + @java.lang.Override + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = + (org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place) + e.getUnfinishedMessage(); + throw e.unwrapIOException(); + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + private java.lang.Object dcid_ = ""; + /** + * string dcid = 1; + * + * @return The dcid. + */ + public java.lang.String getDcid() { + java.lang.Object ref = dcid_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + dcid_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * string dcid = 1; + * + * @return The bytes for dcid. + */ + public com.google.protobuf.ByteString getDcidBytes() { + java.lang.Object ref = dcid_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + dcid_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * string dcid = 1; + * + * @param value The dcid to set. + * @return This builder for chaining. + */ + public Builder setDcid(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + + dcid_ = value; + onChanged(); + return this; + } + /** + * string dcid = 1; + * + * @return This builder for chaining. + */ + public Builder clearDcid() { + + dcid_ = getDefaultInstance().getDcid(); + onChanged(); + return this; + } + /** + * string dcid = 1; + * + * @param value The bytes for dcid to set. + * @return This builder for chaining. + */ + public Builder setDcidBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + + dcid_ = value; + onChanged(); + return this; + } + + private java.lang.Object dominantType_ = ""; + /** + * string dominant_type = 2; + * + * @return The dominantType. + */ + public java.lang.String getDominantType() { + java.lang.Object ref = dominantType_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + dominantType_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * string dominant_type = 2; + * + * @return The bytes for dominantType. + */ + public com.google.protobuf.ByteString getDominantTypeBytes() { + java.lang.Object ref = dominantType_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + dominantType_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * string dominant_type = 2; + * + * @param value The dominantType to set. + * @return This builder for chaining. + */ + public Builder setDominantType(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + + dominantType_ = value; + onChanged(); + return this; + } + /** + * string dominant_type = 2; + * + * @return This builder for chaining. + */ + public Builder clearDominantType() { + + dominantType_ = getDefaultInstance().getDominantType(); + onChanged(); + return this; + } + /** + * string dominant_type = 2; + * + * @param value The bytes for dominantType to set. + * @return This builder for chaining. + */ + public Builder setDominantTypeBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + + dominantType_ = value; + onChanged(); + return this; + } + + @java.lang.Override + public final Builder setUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return super.setUnknownFields(unknownFields); + } + + @java.lang.Override + public final Builder mergeUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return super.mergeUnknownFields(unknownFields); + } + + // @@protoc_insertion_point(builder_scope:org.datacommons.proto.ResolveCoordinatesResponse.Place) + } + + // @@protoc_insertion_point(class_scope:org.datacommons.proto.ResolveCoordinatesResponse.Place) + private static final org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + DEFAULT_INSTANCE; + + static { + DEFAULT_INSTANCE = new org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place(); + } + + public static org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + private static final com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + @java.lang.Override + public Place parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new Place(input, extensionRegistry); + } + }; + + public static com.google.protobuf.Parser parser() { + return PARSER; + } + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + @java.lang.Override + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + getDefaultInstanceForType() { + return DEFAULT_INSTANCE; + } + } + public interface PlaceCoordinateOrBuilder extends // @@protoc_insertion_point(interface_extends:org.datacommons.proto.ResolveCoordinatesResponse.PlaceCoordinate) @@ -11484,6 +12221,30 @@ public interface PlaceCoordinateOrBuilder * @return The bytes of the placeDcids at the given index. */ com.google.protobuf.ByteString getPlaceDcidsBytes(int index); + + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + java.util.List getPlacesList(); + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place getPlaces(int index); + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + int getPlacesCount(); + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + java.util.List< + ? extends org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder> + getPlacesOrBuilderList(); + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder getPlacesOrBuilder( + int index); } /** Protobuf type {@code org.datacommons.proto.ResolveCoordinatesResponse.PlaceCoordinate} */ public static final class PlaceCoordinate extends com.google.protobuf.GeneratedMessageV3 @@ -11498,6 +12259,7 @@ private PlaceCoordinate(com.google.protobuf.GeneratedMessageV3.Builder builde private PlaceCoordinate() { placeDcids_ = com.google.protobuf.LazyStringArrayList.EMPTY; + places_ = java.util.Collections.emptyList(); } @java.lang.Override @@ -11550,6 +12312,20 @@ private PlaceCoordinate( placeDcids_.add(s); break; } + case 34: + { + if (!((mutable_bitField0_ & 0x00000002) != 0)) { + places_ = + new java.util.ArrayList< + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place>(); + mutable_bitField0_ |= 0x00000002; + } + places_.add( + input.readMessage( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.parser(), + extensionRegistry)); + break; + } default: { if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { @@ -11568,6 +12344,9 @@ private PlaceCoordinate( if (((mutable_bitField0_ & 0x00000001) != 0)) { placeDcids_ = placeDcids_.getUnmodifiableView(); } + if (((mutable_bitField0_ & 0x00000002) != 0)) { + places_ = java.util.Collections.unmodifiableList(places_); + } this.unknownFields = unknownFields.build(); makeExtensionsImmutable(); } @@ -11648,6 +12427,43 @@ public com.google.protobuf.ByteString getPlaceDcidsBytes(int index) { return placeDcids_.getByteString(index); } + public static final int PLACES_FIELD_NUMBER = 4; + private java.util.List places_; + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public java.util.List + getPlacesList() { + return places_; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public java.util.List< + ? extends org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder> + getPlacesOrBuilderList() { + return places_; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public int getPlacesCount() { + return places_.size(); + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place getPlaces(int index) { + return places_.get(index); + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder + getPlacesOrBuilder(int index) { + return places_.get(index); + } + private byte memoizedIsInitialized = -1; @java.lang.Override @@ -11671,6 +12487,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io for (int i = 0; i < placeDcids_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 3, placeDcids_.getRaw(i)); } + for (int i = 0; i < places_.size(); i++) { + output.writeMessage(4, places_.get(i)); + } unknownFields.writeTo(output); } @@ -11694,6 +12513,9 @@ public int getSerializedSize() { size += dataSize; size += 1 * getPlaceDcidsList().size(); } + for (int i = 0; i < places_.size(); i++) { + size += com.google.protobuf.CodedOutputStream.computeMessageSize(4, places_.get(i)); + } size += unknownFields.getSerializedSize(); memoizedSize = size; return size; @@ -11716,6 +12538,7 @@ public boolean equals(final java.lang.Object obj) { if (java.lang.Double.doubleToLongBits(getLongitude()) != java.lang.Double.doubleToLongBits(other.getLongitude())) return false; if (!getPlaceDcidsList().equals(other.getPlaceDcidsList())) return false; + if (!getPlacesList().equals(other.getPlacesList())) return false; if (!unknownFields.equals(other.unknownFields)) return false; return true; } @@ -11741,6 +12564,10 @@ public int hashCode() { hash = (37 * hash) + PLACE_DCIDS_FIELD_NUMBER; hash = (53 * hash) + getPlaceDcidsList().hashCode(); } + if (getPlacesCount() > 0) { + hash = (37 * hash) + PLACES_FIELD_NUMBER; + hash = (53 * hash) + getPlacesList().hashCode(); + } hash = (29 * hash) + unknownFields.hashCode(); memoizedHashCode = hash; return hash; @@ -11885,7 +12712,9 @@ private Builder(com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { } private void maybeForceBuilderInitialization() { - if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) {} + if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) { + getPlacesFieldBuilder(); + } } @java.lang.Override @@ -11897,6 +12726,12 @@ public Builder clear() { placeDcids_ = com.google.protobuf.LazyStringArrayList.EMPTY; bitField0_ = (bitField0_ & ~0x00000001); + if (placesBuilder_ == null) { + places_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + } else { + placesBuilder_.clear(); + } return this; } @@ -11936,6 +12771,15 @@ public org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceCoordinate bu bitField0_ = (bitField0_ & ~0x00000001); } result.placeDcids_ = placeDcids_; + if (placesBuilder_ == null) { + if (((bitField0_ & 0x00000002) != 0)) { + places_ = java.util.Collections.unmodifiableList(places_); + bitField0_ = (bitField0_ & ~0x00000002); + } + result.places_ = places_; + } else { + result.places_ = placesBuilder_.build(); + } onBuilt(); return result; } @@ -12008,6 +12852,33 @@ public Builder mergeFrom( } onChanged(); } + if (placesBuilder_ == null) { + if (!other.places_.isEmpty()) { + if (places_.isEmpty()) { + places_ = other.places_; + bitField0_ = (bitField0_ & ~0x00000002); + } else { + ensurePlacesIsMutable(); + places_.addAll(other.places_); + } + onChanged(); + } + } else { + if (!other.places_.isEmpty()) { + if (placesBuilder_.isEmpty()) { + placesBuilder_.dispose(); + placesBuilder_ = null; + places_ = other.places_; + bitField0_ = (bitField0_ & ~0x00000002); + placesBuilder_ = + com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders + ? getPlacesFieldBuilder() + : null; + } else { + placesBuilder_.addAllMessages(other.places_); + } + } + } this.mergeUnknownFields(other.unknownFields); onChanged(); return this; @@ -12222,6 +13093,268 @@ public Builder addPlaceDcidsBytes(com.google.protobuf.ByteString value) { return this; } + private java.util.List + places_ = java.util.Collections.emptyList(); + + private void ensurePlacesIsMutable() { + if (!((bitField0_ & 0x00000002) != 0)) { + places_ = + new java.util.ArrayList< + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place>(places_); + bitField0_ |= 0x00000002; + } + } + + private com.google.protobuf.RepeatedFieldBuilderV3< + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder> + placesBuilder_; + + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public java.util.List + getPlacesList() { + if (placesBuilder_ == null) { + return java.util.Collections.unmodifiableList(places_); + } else { + return placesBuilder_.getMessageList(); + } + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public int getPlacesCount() { + if (placesBuilder_ == null) { + return places_.size(); + } else { + return placesBuilder_.getCount(); + } + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place getPlaces(int index) { + if (placesBuilder_ == null) { + return places_.get(index); + } else { + return placesBuilder_.getMessage(index); + } + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder setPlaces( + int index, org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place value) { + if (placesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensurePlacesIsMutable(); + places_.set(index, value); + onChanged(); + } else { + placesBuilder_.setMessage(index, value); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder setPlaces( + int index, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder builderForValue) { + if (placesBuilder_ == null) { + ensurePlacesIsMutable(); + places_.set(index, builderForValue.build()); + onChanged(); + } else { + placesBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder addPlaces( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place value) { + if (placesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensurePlacesIsMutable(); + places_.add(value); + onChanged(); + } else { + placesBuilder_.addMessage(value); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder addPlaces( + int index, org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place value) { + if (placesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensurePlacesIsMutable(); + places_.add(index, value); + onChanged(); + } else { + placesBuilder_.addMessage(index, value); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder addPlaces( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder builderForValue) { + if (placesBuilder_ == null) { + ensurePlacesIsMutable(); + places_.add(builderForValue.build()); + onChanged(); + } else { + placesBuilder_.addMessage(builderForValue.build()); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder addPlaces( + int index, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder builderForValue) { + if (placesBuilder_ == null) { + ensurePlacesIsMutable(); + places_.add(index, builderForValue.build()); + onChanged(); + } else { + placesBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder addAllPlaces( + java.lang.Iterable< + ? extends org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place> + values) { + if (placesBuilder_ == null) { + ensurePlacesIsMutable(); + com.google.protobuf.AbstractMessageLite.Builder.addAll(values, places_); + onChanged(); + } else { + placesBuilder_.addAllMessages(values); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder clearPlaces() { + if (placesBuilder_ == null) { + places_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + onChanged(); + } else { + placesBuilder_.clear(); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public Builder removePlaces(int index) { + if (placesBuilder_ == null) { + ensurePlacesIsMutable(); + places_.remove(index); + onChanged(); + } else { + placesBuilder_.remove(index); + } + return this; + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder + getPlacesBuilder(int index) { + return getPlacesFieldBuilder().getBuilder(index); + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder + getPlacesOrBuilder(int index) { + if (placesBuilder_ == null) { + return places_.get(index); + } else { + return placesBuilder_.getMessageOrBuilder(index); + } + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public java.util.List< + ? extends org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder> + getPlacesOrBuilderList() { + if (placesBuilder_ != null) { + return placesBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(places_); + } + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder + addPlacesBuilder() { + return getPlacesFieldBuilder() + .addBuilder( + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + .getDefaultInstance()); + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder + addPlacesBuilder(int index) { + return getPlacesFieldBuilder() + .addBuilder( + index, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place + .getDefaultInstance()); + } + /** + * repeated .org.datacommons.proto.ResolveCoordinatesResponse.Place places = 4; + */ + public java.util.List + getPlacesBuilderList() { + return getPlacesFieldBuilder().getBuilderList(); + } + + private com.google.protobuf.RepeatedFieldBuilderV3< + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder> + getPlacesFieldBuilder() { + if (placesBuilder_ == null) { + placesBuilder_ = + new com.google.protobuf.RepeatedFieldBuilderV3< + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place.Builder, + org.datacommons.proto.Recon.ResolveCoordinatesResponse.PlaceOrBuilder>( + places_, ((bitField0_ & 0x00000002) != 0), getParentForChildren(), isClean()); + places_ = null; + } + return placesBuilder_; + } + @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { @@ -13102,6 +14235,10 @@ public org.datacommons.proto.Recon.ResolveCoordinatesResponse getDefaultInstance internal_static_org_datacommons_proto_ResolveCoordinatesResponse_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_org_datacommons_proto_ResolveCoordinatesResponse_fieldAccessorTable; + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_org_datacommons_proto_ResolveCoordinatesResponse_PlaceCoordinate_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable @@ -13147,12 +14284,15 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + "dinatesRequest\022P\n\013coordinates\030\001 \003(\0132;.or" + "g.datacommons.proto.ResolveCoordinatesRe" + "quest.Coordinate\0321\n\nCoordinate\022\020\n\010latitu" - + "de\030\001 \001(\001\022\021\n\tlongitude\030\002 \001(\001\"\307\001\n\032ResolveC" + + "de\030\001 \001(\001\022\021\n\tlongitude\030\002 \001(\001\"\277\002\n\032ResolveC" + "oordinatesResponse\022\\\n\021place_coordinates\030" + "\001 \003(\0132A.org.datacommons.proto.ResolveCoo" - + "rdinatesResponse.PlaceCoordinate\032K\n\017Plac" - + "eCoordinate\022\020\n\010latitude\030\001 \001(\001\022\021\n\tlongitu" - + "de\030\002 \001(\001\022\023\n\013place_dcids\030\003 \003(\tb\006proto3" + + "rdinatesResponse.PlaceCoordinate\032,\n\005Plac" + + "e\022\014\n\004dcid\030\001 \001(\t\022\025\n\rdominant_type\030\002 \001(\t\032\224" + + "\001\n\017PlaceCoordinate\022\020\n\010latitude\030\001 \001(\001\022\021\n\t" + + "longitude\030\002 \001(\001\022\023\n\013place_dcids\030\003 \003(\t\022G\n\006" + + "places\030\004 \003(\01327.org.datacommons.proto.Res" + + "olveCoordinatesResponse.Placeb\006proto3" }; descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom( @@ -13280,15 +14420,25 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new java.lang.String[] { "PlaceCoordinates", }); - internal_static_org_datacommons_proto_ResolveCoordinatesResponse_PlaceCoordinate_descriptor = + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor = internal_static_org_datacommons_proto_ResolveCoordinatesResponse_descriptor .getNestedTypes() .get(0); + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_Place_descriptor, + new java.lang.String[] { + "Dcid", "DominantType", + }); + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_PlaceCoordinate_descriptor = + internal_static_org_datacommons_proto_ResolveCoordinatesResponse_descriptor + .getNestedTypes() + .get(1); internal_static_org_datacommons_proto_ResolveCoordinatesResponse_PlaceCoordinate_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_org_datacommons_proto_ResolveCoordinatesResponse_PlaceCoordinate_descriptor, new java.lang.String[] { - "Latitude", "Longitude", "PlaceDcids", + "Latitude", "Longitude", "PlaceDcids", "Places", }); org.datacommons.proto.Mcf.getDescriptor(); } diff --git a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java index 674bf2be..ab2d4bff 100644 --- a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java +++ b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java @@ -1,9 +1,11 @@ package org.datacommons.util; +import static java.util.stream.Collectors.toList; + +import com.google.common.collect.Lists; import java.io.IOException; -import java.util.LinkedHashSet; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import org.datacommons.proto.Mcf.McfGraph.PropertyValues; @@ -12,9 +14,20 @@ import org.datacommons.proto.Recon.ResolveCoordinatesRequest; import org.datacommons.proto.Recon.ResolveCoordinatesRequest.Coordinate; import org.datacommons.proto.Recon.ResolveCoordinatesResponse; - -/** Resolves nodes with lat-lngs by calling the DC coordinates resolution API. */ +import org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place; + +/** + * Resolves nodes with lat-lngs by calling the DC coordinates resolution API. + * + *

The resolver should be called in 3 phases: + *

  • 1. submitNode - submit nodes to be resolved in this phase. + *
  • 2. resolve - nodes will be resolved by invoking the recon API in this phase. + *
  • 3. getResolvedNode - query the resolver to get the resolved DCID in this phase. + */ public class CoordinatesResolver { + private static final int DEFAULT_CHUNK_SIZE = 500; + + private final int chunkSize; private final AtomicBoolean resolved = new AtomicBoolean(false); private final Set resolveCoordinates = ConcurrentHashMap.newKeySet(); @@ -24,8 +37,13 @@ public class CoordinatesResolver { private final ReconClient client; - public CoordinatesResolver(ReconClient client) { + public CoordinatesResolver(ReconClient client, int chunkSize) { this.client = client; + this.chunkSize = chunkSize; + } + + public CoordinatesResolver(ReconClient client) { + this(client, DEFAULT_CHUNK_SIZE); } public void submitNode(PropertyValues node) { @@ -36,28 +54,42 @@ public void submitNode(PropertyValues node) { } // TODO: Pick the ID based on a preferred list. - public String resolveNode(PropertyValues node) { + public String getResolvedNode(PropertyValues node) { return getCoordinate(node) .filter(resolvedCoordinates::containsKey) .flatMap(coordinate -> resolvedCoordinates.get(coordinate).stream().findFirst()) .orElse(""); } - // TODO: Support chunking in batches of max size 500. - public void remoteResolve() throws IOException, InterruptedException { - if (resolved.get()) { - throw new IllegalStateException("remoteResolve called after remote resolution."); + public CompletableFuture resolve() { + if (resolved.getAndSet(true)) { + throw new IllegalStateException("execute called after remote resolution."); } - resolved.set(true); if (resolveCoordinates.isEmpty()) { - return; + return CompletableFuture.completedFuture(null); } - ResolveCoordinatesRequest request = - ResolveCoordinatesRequest.newBuilder().addAllCoordinates(resolveCoordinates).build(); - ResolveCoordinatesResponse response = client.resolveCoordinates(request); - populateResolvedCandidates(response); + List> chunks = Lists.partition(new ArrayList<>(resolveCoordinates), chunkSize); + return CompletableFuture.allOf( + chunks.stream() + .map( + chunk -> { + try { + return client + .resolveCoordinates( + ResolveCoordinatesRequest.newBuilder().addAllCoordinates(chunk).build()) + .thenApply( + response -> { + populateResolvedCandidates(response); + return null; + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }) + .collect(toList()) + .toArray(new CompletableFuture[0])); } boolean isResolved() { @@ -69,13 +101,16 @@ private void populateResolvedCandidates(ResolveCoordinatesResponse response) { .getPlaceCoordinatesList() .forEach( placeCoordinate -> { - if (placeCoordinate.getPlaceDcidsCount() > 0) { + if (placeCoordinate.getPlacesCount() > 0) { resolvedCoordinates.put( Coordinate.newBuilder() .setLatitude(placeCoordinate.getLatitude()) .setLongitude(placeCoordinate.getLongitude()) .build(), - new LinkedHashSet<>(placeCoordinate.getPlaceDcidsList())); + new LinkedHashSet<>( + placeCoordinate.getPlacesList().stream() + .map(Place::getDcid) + .collect(toList()))); } }); } diff --git a/util/src/main/java/org/datacommons/util/ReconClient.java b/util/src/main/java/org/datacommons/util/ReconClient.java index 7c7d3d72..66fb1e6e 100644 --- a/util/src/main/java/org/datacommons/util/ReconClient.java +++ b/util/src/main/java/org/datacommons/util/ReconClient.java @@ -2,6 +2,7 @@ import static java.net.http.HttpClient.Version.HTTP_1_1; +import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; import com.google.protobuf.util.JsonFormat; import java.io.IOException; @@ -10,6 +11,7 @@ import java.net.http.HttpRequest; import java.net.http.HttpRequest.BodyPublishers; import java.net.http.HttpResponse.BodyHandlers; +import java.util.concurrent.CompletableFuture; import org.datacommons.proto.Recon.ResolveCoordinatesRequest; import org.datacommons.proto.Recon.ResolveCoordinatesResponse; @@ -28,24 +30,31 @@ public ReconClient(HttpClient httpClient) { this.httpClient = httpClient; } - public ResolveCoordinatesResponse resolveCoordinates(ResolveCoordinatesRequest request) - throws IOException, InterruptedException { + public CompletableFuture resolveCoordinates( + ResolveCoordinatesRequest request) throws IOException { return callApi( RESOLVE_COORDINATES_API_URL, request, ResolveCoordinatesResponse.getDefaultInstance()); } - private T callApi( - String apiUrl, Message requestMessage, T responseDefaultInstance) - throws IOException, InterruptedException { - var request = + private CompletableFuture callApi( + String apiUrl, Message requestMessage, T responseDefaultInstance) throws IOException { + HttpRequest request = HttpRequest.newBuilder(URI.create(apiUrl)) .version(HTTP_1_1) .header("accept", "application/json") .POST(BodyPublishers.ofString(StringUtil.msgToJson(requestMessage))) .build(); - var response = httpClient.send(request, BodyHandlers.ofString()); - var responseMessageBuilder = responseDefaultInstance.newBuilderForType(); - JsonFormat.parser().merge(response.body().trim(), responseMessageBuilder); - return (T) responseMessageBuilder.build(); + return httpClient + .sendAsync(request, BodyHandlers.ofString()) + .thenApply( + response -> { + Message.Builder responseMessageBuilder = responseDefaultInstance.newBuilderForType(); + try { + JsonFormat.parser().merge(response.body().trim(), responseMessageBuilder); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + return (T) responseMessageBuilder.build(); + }); } } diff --git a/util/src/main/proto/recon.proto b/util/src/main/proto/recon.proto index 613cbe56..93607fbb 100644 --- a/util/src/main/proto/recon.proto +++ b/util/src/main/proto/recon.proto @@ -23,73 +23,78 @@ package org.datacommons.proto; import "Mcf.proto"; message IdWithProperty { - string prop = 1; - string val = 2; + string prop = 1; + string val = 2; } message EntityIds { - repeated IdWithProperty ids = 1; + repeated IdWithProperty ids = 1; } // An entity is represented by a subgraph, which contains itself and its neighbors. message EntitySubGraph { - // REQUIRED: source_id must be a key within `sub_graph.nodes`, or one of the `ids`. - string source_id = 1; - oneof graph_representation { - McfGraph sub_graph = 2; - EntityIds entity_ids = 3; - } + // REQUIRED: source_id must be a key within `sub_graph.nodes`, or one of the `ids`. + string source_id = 1; + oneof graph_representation { + McfGraph sub_graph = 2; + EntityIds entity_ids = 3; + } } message EntityPair { - EntitySubGraph entity_one = 1; - EntitySubGraph entity_two = 2; + EntitySubGraph entity_one = 1; + EntitySubGraph entity_two = 2; } message CompareEntitiesRequest { - repeated EntityPair entity_pairs = 1; + repeated EntityPair entity_pairs = 1; } message CompareEntitiesResponse { - message Comparison { - // Must have two source_ids, one for each entity. - repeated string source_ids = 1; - double probability = 2; - } - repeated Comparison comparisons = 1; + message Comparison { + // Must have two source_ids, one for each entity. + repeated string source_ids = 1; + double probability = 2; + } + repeated Comparison comparisons = 1; } message ResolveEntitiesRequest { - repeated EntitySubGraph entities = 1; - // The properties of IDs to find. If empty, all known IDs are returned. - repeated string wanted_id_properties = 2; + repeated EntitySubGraph entities = 1; + // The properties of IDs to find. If empty, all known IDs are returned. + repeated string wanted_id_properties = 2; } message ResolveEntitiesResponse { - message ResolvedId { - repeated IdWithProperty ids = 1; - double probability = 2; - } - message ResolvedEntity { - string source_id = 1; - repeated ResolvedId resolved_ids = 2; - } - repeated ResolvedEntity resolved_entities = 1; + message ResolvedId { + repeated IdWithProperty ids = 1; + double probability = 2; + } + message ResolvedEntity { + string source_id = 1; + repeated ResolvedId resolved_ids = 2; + } + repeated ResolvedEntity resolved_entities = 1; } message ResolveCoordinatesRequest { - message Coordinate { - double latitude = 1; - double longitude = 2; - } - repeated Coordinate coordinates = 1; + message Coordinate { + double latitude = 1; + double longitude = 2; + } + repeated Coordinate coordinates = 1; } message ResolveCoordinatesResponse { - message PlaceCoordinate { - double latitude = 1; - double longitude = 2; - repeated string place_dcids = 3; - } - repeated PlaceCoordinate place_coordinates = 1; + message Place { + string dcid = 1; + string dominant_type = 2; + } + message PlaceCoordinate { + double latitude = 1; + double longitude = 2; + repeated string place_dcids = 3; + repeated Place places = 4; + } + repeated PlaceCoordinate place_coordinates = 1; } diff --git a/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java index da6fa540..9efcc58e 100644 --- a/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java +++ b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java @@ -37,13 +37,34 @@ public void endToEnd() throws Exception { assertThat(resolver.isResolved()).isFalse(); - resolver.remoteResolve(); + resolver.resolve().get(); assertThat(resolver.isResolved()).isTrue(); - assertThat(resolver.resolveNode(SF)).isEqualTo(SF_ZIP_DCID); - assertThat(resolver.resolveNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); - assertThat(resolver.resolveNode(UNSUBMITTED_NODE)).isEmpty(); + assertThat(resolver.getResolvedNode(SF)).isEqualTo(SF_ZIP_DCID); + assertThat(resolver.getResolvedNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); + assertThat(resolver.getResolvedNode(UNSUBMITTED_NODE)).isEmpty(); + } + + @Test + public void endToEnd_chunked() throws Exception { + CoordinatesResolver resolver = new CoordinatesResolver(new ReconClient(newHttpClient()), 1); + + assertThat(resolver.isResolved()).isFalse(); + + for (PropertyValues node : TEST_NODES) { + resolver.submitNode(node); + } + + assertThat(resolver.isResolved()).isFalse(); + + resolver.resolve().get(); + + assertThat(resolver.isResolved()).isTrue(); + + assertThat(resolver.getResolvedNode(SF)).isEqualTo(SF_ZIP_DCID); + assertThat(resolver.getResolvedNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); + assertThat(resolver.getResolvedNode(UNSUBMITTED_NODE)).isEmpty(); } private static PropertyValues newNode(String typeOf, Map props) { diff --git a/util/src/test/java/org/datacommons/util/ReconClientTest.java b/util/src/test/java/org/datacommons/util/ReconClientTest.java index 879c03aa..b76eac01 100644 --- a/util/src/test/java/org/datacommons/util/ReconClientTest.java +++ b/util/src/test/java/org/datacommons/util/ReconClientTest.java @@ -21,7 +21,7 @@ public void resolveCoordinates_endToEndApiCall() throws Exception { .build()) .build(); - var result = client.resolveCoordinates(request); + var result = client.resolveCoordinates(request).get(); assertThat(result.getPlaceCoordinatesCount()).isEqualTo(1); assertThat(result.getPlaceCoordinates(0).getPlaceDcidsList()).contains("country/USA"); From 9f7921b4cf8bce4743d17302fe0a23131515f433 Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Mon, 7 Aug 2023 10:21:42 -0700 Subject: [PATCH 3/6] Create Resolver hierarchy and chunk calls in ReconClient. --- util/pom.xml | 6 + .../datacommons/util/CoordinatesResolver.java | 75 +++--------- .../org/datacommons/util/FuturesUtil.java | 13 +++ .../org/datacommons/util/ReconClient.java | 93 ++++++++++++--- .../java/org/datacommons/util/Resolver.java | 58 ++++++++++ .../java/org/datacommons/util/StringUtil.java | 16 +++ .../util/CoordinatesResolverTest.java | 40 +++---- .../org/datacommons/util/ReconClientTest.java | 109 +++++++++++++++--- 8 files changed, 298 insertions(+), 112 deletions(-) create mode 100644 util/src/main/java/org/datacommons/util/FuturesUtil.java create mode 100644 util/src/main/java/org/datacommons/util/Resolver.java diff --git a/util/pom.xml b/util/pom.xml index 56b06e2f..07cd8397 100644 --- a/util/pom.xml +++ b/util/pom.xml @@ -73,6 +73,12 @@ 0.46 test + + com.google.truth.extensions + truth-java8-extension + 0.46 + test + com.google.truth.extensions truth-proto-extension diff --git a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java index ab2d4bff..372e2c00 100644 --- a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java +++ b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java @@ -2,12 +2,9 @@ import static java.util.stream.Collectors.toList; -import com.google.common.collect.Lists; -import java.io.IOException; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicBoolean; import org.datacommons.proto.Mcf.McfGraph.PropertyValues; import org.datacommons.proto.Mcf.McfGraph.Values; import org.datacommons.proto.Mcf.ValueType; @@ -16,20 +13,8 @@ import org.datacommons.proto.Recon.ResolveCoordinatesResponse; import org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place; -/** - * Resolves nodes with lat-lngs by calling the DC coordinates resolution API. - * - *

    The resolver should be called in 3 phases: - *

  • 1. submitNode - submit nodes to be resolved in this phase. - *
  • 2. resolve - nodes will be resolved by invoking the recon API in this phase. - *
  • 3. getResolvedNode - query the resolver to get the resolved DCID in this phase. - */ -public class CoordinatesResolver { - private static final int DEFAULT_CHUNK_SIZE = 500; - - private final int chunkSize; - private final AtomicBoolean resolved = new AtomicBoolean(false); - +/** Resolves nodes with lat-lngs by calling the DC coordinates resolution API. */ +public class CoordinatesResolver extends Resolver { private final Set resolveCoordinates = ConcurrentHashMap.newKeySet(); private final ConcurrentHashMap> resolvedCoordinates = @@ -37,63 +22,33 @@ public class CoordinatesResolver { private final ReconClient client; - public CoordinatesResolver(ReconClient client, int chunkSize) { + public CoordinatesResolver(ReconClient client) { this.client = client; - this.chunkSize = chunkSize; } - public CoordinatesResolver(ReconClient client) { - this(client, DEFAULT_CHUNK_SIZE); - } - - public void submitNode(PropertyValues node) { - if (resolved.get()) { - throw new IllegalStateException("submitNode called after remote resolution."); - } - getCoordinate(node).ifPresent(resolveCoordinates::add); + @Override + protected boolean submit(PropertyValues node) { + return getCoordinate(node).map(resolveCoordinates::add).map(unused -> true).orElse(false); } // TODO: Pick the ID based on a preferred list. - public String getResolvedNode(PropertyValues node) { + @Override + protected Optional getResolved(PropertyValues node) { return getCoordinate(node) .filter(resolvedCoordinates::containsKey) - .flatMap(coordinate -> resolvedCoordinates.get(coordinate).stream().findFirst()) - .orElse(""); + .flatMap(coordinate -> resolvedCoordinates.get(coordinate).stream().findFirst()); } - public CompletableFuture resolve() { - if (resolved.getAndSet(true)) { - throw new IllegalStateException("execute called after remote resolution."); - } - + @Override + protected CompletableFuture resolve() { if (resolveCoordinates.isEmpty()) { return CompletableFuture.completedFuture(null); } - List> chunks = Lists.partition(new ArrayList<>(resolveCoordinates), chunkSize); - return CompletableFuture.allOf( - chunks.stream() - .map( - chunk -> { - try { - return client - .resolveCoordinates( - ResolveCoordinatesRequest.newBuilder().addAllCoordinates(chunk).build()) - .thenApply( - response -> { - populateResolvedCandidates(response); - return null; - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - }) - .collect(toList()) - .toArray(new CompletableFuture[0])); - } - - boolean isResolved() { - return resolved.get(); + return client + .resolveCoordinates( + ResolveCoordinatesRequest.newBuilder().addAllCoordinates(resolveCoordinates).build()) + .thenAccept(this::populateResolvedCandidates); } private void populateResolvedCandidates(ResolveCoordinatesResponse response) { diff --git a/util/src/main/java/org/datacommons/util/FuturesUtil.java b/util/src/main/java/org/datacommons/util/FuturesUtil.java new file mode 100644 index 00000000..a3b52334 --- /dev/null +++ b/util/src/main/java/org/datacommons/util/FuturesUtil.java @@ -0,0 +1,13 @@ +package org.datacommons.util; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; + +/** Utilities related to futures. */ +final class FuturesUtil { + static CompletableFuture> toFutureOfList(List> futures) { + return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) + .thenApply(v -> futures.stream().map(CompletableFuture::join).collect(Collectors.toList())); + } +} diff --git a/util/src/main/java/org/datacommons/util/ReconClient.java b/util/src/main/java/org/datacommons/util/ReconClient.java index 66fb1e6e..67d9af09 100644 --- a/util/src/main/java/org/datacommons/util/ReconClient.java +++ b/util/src/main/java/org/datacommons/util/ReconClient.java @@ -1,11 +1,12 @@ package org.datacommons.util; +import static com.google.common.collect.Lists.partition; import static java.net.http.HttpClient.Version.HTTP_1_1; +import static java.util.stream.Collectors.toList; +import static org.datacommons.util.FuturesUtil.toFutureOfList; +import static org.datacommons.util.StringUtil.toJson; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; -import java.io.IOException; import java.net.URI; import java.net.http.HttpClient; import java.net.http.HttpRequest; @@ -14,6 +15,8 @@ import java.util.concurrent.CompletableFuture; import org.datacommons.proto.Recon.ResolveCoordinatesRequest; import org.datacommons.proto.Recon.ResolveCoordinatesResponse; +import org.datacommons.proto.Recon.ResolveEntitiesRequest; +import org.datacommons.proto.Recon.ResolveEntitiesResponse; /** * Client to the DC resolution APIs. @@ -24,36 +27,98 @@ public class ReconClient { private static final String RESOLVE_COORDINATES_API_URL = "https://api.datacommons.org/v1/recon/resolve/coordinate"; + private static final String RESOLVE_ENTITIES_API_URL = + "https://api.datacommons.org/v1/recon/entity/resolve"; + + static final String NUM_API_CALLS_COUNTER = "ReconClient_NumApiCalls"; + + private static final int DEFAULT_CHUNK_SIZE = 500; + + private final int chunkSize; + private final HttpClient httpClient; - public ReconClient(HttpClient httpClient) { + private final LogWrapper logWrapper; + + public ReconClient(HttpClient httpClient, LogWrapper logWrapper) { + this(httpClient, logWrapper, DEFAULT_CHUNK_SIZE); + } + + public ReconClient(HttpClient httpClient, LogWrapper logWrapper, int chunkSize) { this.httpClient = httpClient; + this.logWrapper = logWrapper; + this.chunkSize = chunkSize; } public CompletableFuture resolveCoordinates( - ResolveCoordinatesRequest request) throws IOException { - return callApi( - RESOLVE_COORDINATES_API_URL, request, ResolveCoordinatesResponse.getDefaultInstance()); + ResolveCoordinatesRequest request) { + ResolveCoordinatesResponse defaultResponse = ResolveCoordinatesResponse.getDefaultInstance(); + if (request.getCoordinatesCount() < 1) { + return CompletableFuture.completedFuture(defaultResponse); + } + + return toFutureOfList( + partition(request.getCoordinatesList(), chunkSize).stream() + .map( + chunk -> + request.toBuilder().clearCoordinates().addAllCoordinates(chunk).build()) + .map( + chunkedRequest -> + callApi(RESOLVE_COORDINATES_API_URL, chunkedRequest, defaultResponse)) + .collect(toList())) + .thenApply( + chunkedResponses -> + ResolveCoordinatesResponse.newBuilder() + .addAllPlaceCoordinates( + chunkedResponses.stream() + .flatMap( + chunkedResponse -> + chunkedResponse.getPlaceCoordinatesList().stream()) + .collect(toList())) + .build()); + } + + public CompletableFuture resolveEntities( + ResolveEntitiesRequest request) { + ResolveEntitiesResponse defaultResponse = ResolveEntitiesResponse.getDefaultInstance(); + if (request.getEntitiesCount() < 1) { + return CompletableFuture.completedFuture(defaultResponse); + } + + return toFutureOfList( + partition(request.getEntitiesList(), chunkSize).stream() + .map(chunk -> request.toBuilder().clearEntities().addAllEntities(chunk).build()) + .map( + chunkedRequest -> + callApi(RESOLVE_ENTITIES_API_URL, chunkedRequest, defaultResponse)) + .collect(toList())) + .thenApply( + chunkedResponses -> + ResolveEntitiesResponse.newBuilder() + .addAllResolvedEntities( + chunkedResponses.stream() + .flatMap( + chunkedResponse -> + chunkedResponse.getResolvedEntitiesList().stream()) + .collect(toList())) + .build()); } private CompletableFuture callApi( - String apiUrl, Message requestMessage, T responseDefaultInstance) throws IOException { + String apiUrl, Message requestMessage, T responseDefaultInstance) { + logWrapper.incrementInfoCounterBy(NUM_API_CALLS_COUNTER, 1); HttpRequest request = HttpRequest.newBuilder(URI.create(apiUrl)) .version(HTTP_1_1) .header("accept", "application/json") - .POST(BodyPublishers.ofString(StringUtil.msgToJson(requestMessage))) + .POST(BodyPublishers.ofString(toJson(requestMessage))) .build(); return httpClient .sendAsync(request, BodyHandlers.ofString()) .thenApply( response -> { Message.Builder responseMessageBuilder = responseDefaultInstance.newBuilderForType(); - try { - JsonFormat.parser().merge(response.body().trim(), responseMessageBuilder); - } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(e); - } + StringUtil.fromJson(response.body().trim(), responseMessageBuilder); return (T) responseMessageBuilder.build(); }); } diff --git a/util/src/main/java/org/datacommons/util/Resolver.java b/util/src/main/java/org/datacommons/util/Resolver.java new file mode 100644 index 00000000..6d4d8032 --- /dev/null +++ b/util/src/main/java/org/datacommons/util/Resolver.java @@ -0,0 +1,58 @@ +package org.datacommons.util; + +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; + +/** + * The base class for external resolvers. + * + *

    The resolver should be called in 3 phases: + * + *

      + *
    1. 1. submitNode - submit nodes to be resolved in this phase. + *
    2. 2. resolveNodes - nodes will be resolved by invoking the recon API in this phase (no-op for + * local resolvers). + *
    3. 3. getResolvedNode - query the resolver to get the resolved DCID in this phase. + *
    + * + *

    Concrete implementations are required to override the protected methods corresponding to the + * above. i.e. submit, resolve and getResolved respectively. + */ +abstract class Resolver { + private final AtomicBoolean resolved = new AtomicBoolean(false); + + protected abstract boolean submit(PropertyValues node); + + protected abstract CompletableFuture resolve(); + + protected abstract Optional getResolved(PropertyValues node); + + boolean submitNode(PropertyValues node) { + if (resolved.get()) { + throw new IllegalStateException("submitNode called after resolution."); + } + return submit(node); + } + + // TODO: Pick the ID based on a preferred list. + Optional getResolvedNode(PropertyValues node) { + if (!resolved.get()) { + throw new IllegalStateException("getResolvedNode called before resolution."); + } + return getResolved(node); + } + + CompletableFuture resolveNodes() { + if (resolved.getAndSet(true)) { + throw new IllegalStateException("resolveNodes called after resolution."); + } + + return resolve(); + } + + boolean isResolved() { + return resolved.get(); + } +} diff --git a/util/src/main/java/org/datacommons/util/StringUtil.java b/util/src/main/java/org/datacommons/util/StringUtil.java index 9dde67e2..83d43cb3 100644 --- a/util/src/main/java/org/datacommons/util/StringUtil.java +++ b/util/src/main/java/org/datacommons/util/StringUtil.java @@ -230,4 +230,20 @@ public static String msgToJson(Message msg) throws InvalidProtocolBufferExceptio // Without the un-escaping something like 'Node' shows up as \u0027Node\u0027 return StringEscapeUtils.unescapeJson(JsonFormat.printer().print(msg)); } + + public static String toJson(Message message) { + try { + return msgToJson(message); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + } + + public static void fromJson(String json, Message.Builder builder) { + try { + JsonFormat.parser().merge(json, builder); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + } } diff --git a/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java index 9efcc58e..9c8bd1bc 100644 --- a/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java +++ b/util/src/test/java/org/datacommons/util/CoordinatesResolverTest.java @@ -1,7 +1,9 @@ package org.datacommons.util; import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.Truth8.assertThat; import static java.net.http.HttpClient.newHttpClient; +import static org.datacommons.util.TestUtil.newLogCtx; import static org.datacommons.util.Vocabulary.LATITUDE; import static org.datacommons.util.Vocabulary.LONGITUDE; @@ -20,14 +22,17 @@ public class CoordinatesResolverTest { newNode("Place", Map.of(LATITUDE, "51.510357", LONGITUDE, "-0.116773")); private static final String BIG_BEN_NUTS_DCID = "nuts/UKI32"; + private static final PropertyValues NON_LAT_LNG_NODE = newNode("Place", Map.of("isoCode", "IN")); + + private static final List TEST_NODES = List.of(SF, BIG_BEN, NON_LAT_LNG_NODE); + private static final PropertyValues UNSUBMITTED_NODE = newNode("City", Map.of(LATITUDE, "12.34", LONGITUDE, "56.78")); - private static final List TEST_NODES = List.of(SF, BIG_BEN); - @Test public void endToEnd() throws Exception { - CoordinatesResolver resolver = new CoordinatesResolver(new ReconClient(newHttpClient())); + CoordinatesResolver resolver = + new CoordinatesResolver(new ReconClient(newHttpClient(), newLogCtx())); assertThat(resolver.isResolved()).isFalse(); @@ -37,34 +42,23 @@ public void endToEnd() throws Exception { assertThat(resolver.isResolved()).isFalse(); - resolver.resolve().get(); + resolver.resolveNodes().get(); assertThat(resolver.isResolved()).isTrue(); - assertThat(resolver.getResolvedNode(SF)).isEqualTo(SF_ZIP_DCID); - assertThat(resolver.getResolvedNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); + assertThat(resolver.getResolvedNode(SF)).hasValue(SF_ZIP_DCID); + assertThat(resolver.getResolvedNode(BIG_BEN)).hasValue(BIG_BEN_NUTS_DCID); assertThat(resolver.getResolvedNode(UNSUBMITTED_NODE)).isEmpty(); } @Test - public void endToEnd_chunked() throws Exception { - CoordinatesResolver resolver = new CoordinatesResolver(new ReconClient(newHttpClient()), 1); - - assertThat(resolver.isResolved()).isFalse(); + public void submitNode() { + CoordinatesResolver resolver = + new CoordinatesResolver(new ReconClient(newHttpClient(), newLogCtx())); - for (PropertyValues node : TEST_NODES) { - resolver.submitNode(node); - } - - assertThat(resolver.isResolved()).isFalse(); - - resolver.resolve().get(); - - assertThat(resolver.isResolved()).isTrue(); - - assertThat(resolver.getResolvedNode(SF)).isEqualTo(SF_ZIP_DCID); - assertThat(resolver.getResolvedNode(BIG_BEN)).isEqualTo(BIG_BEN_NUTS_DCID); - assertThat(resolver.getResolvedNode(UNSUBMITTED_NODE)).isEmpty(); + assertThat(resolver.submitNode(SF)).isTrue(); + assertThat(resolver.submitNode(BIG_BEN)).isTrue(); + assertThat(resolver.submitNode(NON_LAT_LNG_NODE)).isFalse(); } private static PropertyValues newNode(String typeOf, Map props) { diff --git a/util/src/test/java/org/datacommons/util/ReconClientTest.java b/util/src/test/java/org/datacommons/util/ReconClientTest.java index b76eac01..f76ed225 100644 --- a/util/src/test/java/org/datacommons/util/ReconClientTest.java +++ b/util/src/test/java/org/datacommons/util/ReconClientTest.java @@ -1,29 +1,108 @@ package org.datacommons.util; import static com.google.common.truth.Truth.assertThat; +import static java.util.stream.Collectors.toList; +import static org.datacommons.util.ReconClient.NUM_API_CALLS_COUNTER; +import static org.datacommons.util.TestUtil.getCounter; +import static org.datacommons.util.TestUtil.newLogCtx; import java.net.http.HttpClient; -import org.datacommons.proto.Recon; +import org.datacommons.proto.Recon.*; +import org.datacommons.proto.Recon.ResolveCoordinatesRequest.Coordinate; import org.junit.Test; public class ReconClientTest { + // India using isoCode + private static final EntitySubGraph INDIA_ENTITY = newEntity("isoCode", "IN"); + private static final String INDIA_SOURCE_ID = "isoCode:IN"; + private static final String INDIA_DCID = "country/IND"; + private static final IdWithProperty INDIA_RESOLVED_ID = + IdWithProperty.newBuilder().setProp("dcid").setVal(INDIA_DCID).build(); + private static final Coordinate SF_COORDINATES = + Coordinate.newBuilder().setLatitude(37.77493).setLongitude(-122.41942).build(); + private static final String USA_DCID = "country/USA"; + private static final Coordinate BIG_BEN_COORDINATES = + Coordinate.newBuilder().setLatitude(51.510357).setLongitude(-0.116773).build(); + private static final String GBR_DCID = "country/GBR"; + @Test - public void resolveCoordinates_endToEndApiCall() throws Exception { - var client = new ReconClient(HttpClient.newHttpClient()); - - // San Francisco coordinates - var request = - Recon.ResolveCoordinatesRequest.newBuilder() - .addCoordinates( - Recon.ResolveCoordinatesRequest.Coordinate.newBuilder() - .setLatitude(37.77493) - .setLongitude(-122.41942) - .build()) + public void resolveEntities() throws Exception { + LogWrapper logWrapper = newLogCtx(); + ReconClient client = new ReconClient(HttpClient.newHttpClient(), logWrapper); + + ResolveEntitiesRequest request = + ResolveEntitiesRequest.newBuilder().addEntities(INDIA_ENTITY).build(); + + ResolveEntitiesResponse result = client.resolveEntities(request).get(); + + assertThat(result.getResolvedEntitiesCount()).isEqualTo(1); + assertThat(result.getResolvedEntities(0).getSourceId()).isEqualTo(INDIA_SOURCE_ID); + assertThat(result.getResolvedEntities(0).getResolvedIdsCount()).isEqualTo(1); + assertThat(result.getResolvedEntities(0).getResolvedIds(0).getIdsList()) + .contains(INDIA_RESOLVED_ID); + assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(1); + } + + @Test + public void resolveCoordinates() throws Exception { + LogWrapper logWrapper = newLogCtx(); + ReconClient client = new ReconClient(HttpClient.newHttpClient(), logWrapper); + + ResolveCoordinatesRequest request = + ResolveCoordinatesRequest.newBuilder() + .addCoordinates(SF_COORDINATES) + .addCoordinates(BIG_BEN_COORDINATES) .build(); - var result = client.resolveCoordinates(request).get(); + ResolveCoordinatesResponse result = client.resolveCoordinates(request).get(); + + assertThat(result.getPlaceCoordinatesCount()).isEqualTo(2); + assertThat( + result.getPlaceCoordinates(0).getPlacesList().stream() + .map(ResolveCoordinatesResponse.Place::getDcid) + .collect(toList())) + .contains(USA_DCID); + assertThat( + result.getPlaceCoordinates(1).getPlacesList().stream() + .map(ResolveCoordinatesResponse.Place::getDcid) + .collect(toList())) + .contains(GBR_DCID); + assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(1); + } + + @Test + public void resolveCoordinates_chunked() throws Exception { + LogWrapper logWrapper = newLogCtx(); + ReconClient client = new ReconClient(HttpClient.newHttpClient(), logWrapper, 1); + + ResolveCoordinatesRequest request = + ResolveCoordinatesRequest.newBuilder() + .addCoordinates(SF_COORDINATES) + .addCoordinates(BIG_BEN_COORDINATES) + .build(); + + ResolveCoordinatesResponse result = client.resolveCoordinates(request).get(); + + assertThat(result.getPlaceCoordinatesCount()).isEqualTo(2); + assertThat( + result.getPlaceCoordinates(0).getPlacesList().stream() + .map(ResolveCoordinatesResponse.Place::getDcid) + .collect(toList())) + .contains(USA_DCID); + assertThat( + result.getPlaceCoordinates(1).getPlacesList().stream() + .map(ResolveCoordinatesResponse.Place::getDcid) + .collect(toList())) + .contains(GBR_DCID); + assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(2); + } - assertThat(result.getPlaceCoordinatesCount()).isEqualTo(1); - assertThat(result.getPlaceCoordinates(0).getPlaceDcidsList()).contains("country/USA"); + private static EntitySubGraph newEntity(String property, String value) { + return EntitySubGraph.newBuilder() + .setSourceId(String.format("%s:%s", property, value)) + .setEntityIds( + EntityIds.newBuilder() + .addIds(IdWithProperty.newBuilder().setProp(property).setVal(value))) + .build(); } } From 4b5ceeede6a68bd1377d12a3cd9cb9dbabfdc2bb Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Mon, 7 Aug 2023 20:26:43 -0700 Subject: [PATCH 4/6] Add Resolver to resolve entities. --- .../datacommons/util/CoordinatesResolver.java | 2 +- .../org/datacommons/util/EntityResolver.java | 130 ++++++++++++++++++ .../java/org/datacommons/util/McfUtil.java | 20 +++ .../datacommons/util/EntityResolverTest.java | 96 +++++++++++++ .../org/datacommons/util/ReconClientTest.java | 62 ++++++--- 5 files changed, 293 insertions(+), 17 deletions(-) create mode 100644 util/src/main/java/org/datacommons/util/EntityResolver.java create mode 100644 util/src/test/java/org/datacommons/util/EntityResolverTest.java diff --git a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java index 372e2c00..9aa376c6 100644 --- a/util/src/main/java/org/datacommons/util/CoordinatesResolver.java +++ b/util/src/main/java/org/datacommons/util/CoordinatesResolver.java @@ -14,7 +14,7 @@ import org.datacommons.proto.Recon.ResolveCoordinatesResponse.Place; /** Resolves nodes with lat-lngs by calling the DC coordinates resolution API. */ -public class CoordinatesResolver extends Resolver { +final class CoordinatesResolver extends Resolver { private final Set resolveCoordinates = ConcurrentHashMap.newKeySet(); private final ConcurrentHashMap> resolvedCoordinates = diff --git a/util/src/main/java/org/datacommons/util/EntityResolver.java b/util/src/main/java/org/datacommons/util/EntityResolver.java new file mode 100644 index 00000000..7b588d88 --- /dev/null +++ b/util/src/main/java/org/datacommons/util/EntityResolver.java @@ -0,0 +1,130 @@ +package org.datacommons.util; + +import static java.util.stream.Collectors.toList; +import static org.datacommons.util.McfUtil.newIdWithProperty; + +import com.google.common.collect.ImmutableSet; +import java.util.*; +import java.util.AbstractMap.SimpleEntry; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import org.datacommons.proto.Debug.Log.Level; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.McfGraph.TypedValue; +import org.datacommons.proto.Mcf.ValueType; +import org.datacommons.proto.Recon.IdWithProperty; +import org.datacommons.proto.Recon.ResolveEntitiesRequest; +import org.datacommons.proto.Recon.ResolveEntitiesResponse; +import org.datacommons.proto.Recon.ResolveEntitiesResponse.ResolvedEntity; +import org.datacommons.proto.Recon.ResolveEntitiesResponse.ResolvedId; + +/** Resolves nodes by calling the DC entity resolution API. */ +final class EntityResolver extends Resolver { + private final Set resolveEntities = ConcurrentHashMap.newKeySet(); + // Map of Entity (represented as IdWithProperty) to DCID. + private final ConcurrentHashMap resolvedEntities = + new ConcurrentHashMap<>(); + + private final ReconClient client; + + private final LogWrapper logWrapper; + + public EntityResolver(ReconClient client, LogWrapper logWrapper) { + this.client = client; + this.logWrapper = logWrapper; + } + + @Override + protected boolean submit(PropertyValues node) { + Set entities = getEntities(node); + if (!entities.isEmpty()) { + resolveEntities.addAll(entities); + return true; + } + return false; + } + + @Override + protected CompletableFuture resolve() { + if (resolveEntities.isEmpty()) { + return CompletableFuture.completedFuture(null); + } + + return client + .resolveEntities( + ResolveEntitiesRequest.newBuilder() + .addAllEntities( + resolveEntities.stream().map(McfUtil::newEntitySubGraph).collect(toList())) + .build()) + .thenAccept(this::populateResolvedEntities); + } + + @Override + protected Optional getResolved(PropertyValues node) { + Set externalEntities = getEntities(node); + Set dcids = + new LinkedHashSet<>( + externalEntities.stream() + .filter(resolvedEntities::containsKey) + .map(resolvedEntities::get) + .collect(toList())); + + if (dcids.isEmpty()) { + return Optional.empty(); + } + + if (dcids.size() > 1) { + logWrapper.addEntry( + Level.LEVEL_ERROR, + "Resolution_DivergingDcidsForExternalIds", + String.format("Divergence found. dcids = %s, external ids = %s", dcids, externalEntities), + node.getLocationsList()); + return Optional.empty(); + } + + return dcids.stream().findFirst(); + } + + private void populateResolvedEntities(ResolveEntitiesResponse response) { + for (ResolvedEntity entity : response.getResolvedEntitiesList()) { + fromResolvedEntity(entity) + .ifPresent(entry -> resolvedEntities.put(entry.getKey(), entry.getValue())); + } + } + + private static Set getEntities(PropertyValues node) { + ImmutableSet.Builder builder = ImmutableSet.builder(); + + for (String prop : Vocabulary.PLACE_RESOLVABLE_AND_ASSIGNABLE_IDS) { + if (node.containsPvs(prop)) { + for (TypedValue typedValue : node.getPvsMap().get(prop).getTypedValuesList()) { + if (typedValue.getType() == ValueType.TEXT || typedValue.getType() == ValueType.NUMBER) { + builder.add(newIdWithProperty(prop, typedValue.getValue())); + } + } + } + } + + return builder.build(); + } + + private static Optional> fromResolvedEntity( + ResolvedEntity entity) { + String[] propVal = entity.getSourceId().split(":"); + if (propVal.length != 2) { + return Optional.empty(); + } + IdWithProperty key = newIdWithProperty(propVal[0], propVal[1]); + + List resolvedIds = entity.getResolvedIdsList(); + if (resolvedIds.isEmpty()) { + return Optional.empty(); + } + + return resolvedIds.get(0).getIdsList().stream() + .filter(idWithProperty -> idWithProperty.getProp().equals(Vocabulary.DCID)) + .findFirst() + .map(IdWithProperty::getVal) + .map(dcid -> new SimpleEntry<>(key, dcid)); + } +} diff --git a/util/src/main/java/org/datacommons/util/McfUtil.java b/util/src/main/java/org/datacommons/util/McfUtil.java index e3a68c69..d5afb527 100644 --- a/util/src/main/java/org/datacommons/util/McfUtil.java +++ b/util/src/main/java/org/datacommons/util/McfUtil.java @@ -19,6 +19,9 @@ import org.datacommons.proto.Mcf; import org.datacommons.proto.Mcf.McfGraph; import org.datacommons.proto.Mcf.ValueType; +import org.datacommons.proto.Recon.EntityIds; +import org.datacommons.proto.Recon.EntitySubGraph; +import org.datacommons.proto.Recon.IdWithProperty; // A container class of MCF related utilities. public class McfUtil { @@ -180,6 +183,23 @@ public static boolean isSvObWithNumberValue(McfGraph.PropertyValues node) { && nodeValues.getTypedValues(0).getType() == ValueType.NUMBER; } + // Returns a new EntitySubGraph with sourceId = ":" and a single entity ID with + // the specified property and value. + public static EntitySubGraph newEntitySubGraph(String property, String value) { + return newEntitySubGraph(newIdWithProperty(property, value)); + } + + public static EntitySubGraph newEntitySubGraph(IdWithProperty idWithProperty) { + return EntitySubGraph.newBuilder() + .setSourceId(String.format("%s:%s", idWithProperty.getProp(), idWithProperty.getVal())) + .setEntityIds(EntityIds.newBuilder().addIds(idWithProperty)) + .build(); + } + + public static IdWithProperty newIdWithProperty(String property, String value) { + return IdWithProperty.newBuilder().setProp(property).setVal(value).build(); + } + private static String getValue(Mcf.McfGraph.TypedValue typedValue) { if (typedValue.getType() == Mcf.ValueType.TEXT) { return "\"" + typedValue.getValue() + "\""; diff --git a/util/src/test/java/org/datacommons/util/EntityResolverTest.java b/util/src/test/java/org/datacommons/util/EntityResolverTest.java new file mode 100644 index 00000000..55e91eb4 --- /dev/null +++ b/util/src/test/java/org/datacommons/util/EntityResolverTest.java @@ -0,0 +1,96 @@ +package org.datacommons.util; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.Truth8.assertThat; +import static org.datacommons.util.McfUtil.newValues; +import static org.datacommons.util.TestUtil.newLogCtx; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.http.HttpClient; +import java.util.List; +import java.util.Map; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.ValueType; +import org.junit.Test; + +public class EntityResolverTest { + // This includes 6 nodes with 7 external IDs. + + // India using isoCode + PropertyValues in = buildNode("Place", Map.of("isoCode", "IN")); + String inDcid = "country/IND"; + // CA, but the type is not a valid place type. + PropertyValues ca = buildNode("USState", Map.of("geoId", "06")); + String caDcid = "geoId/06"; + // SF using wikidataId + PropertyValues sf = buildNode("City", Map.of("wikidataId", "Q62")); + String sfDcid = "geoId/0667000"; + // Venezia using nuts + PropertyValues vz = buildNode("Place", Map.of("nutsCode", "ITH35")); + String vzDcid = "nuts/ITH35"; + // Unknown country + PropertyValues unk = buildNode("Country", Map.of("isoCode", "ZZZ")); + // Tamil Nadu / Karnataka using diverging IDs + PropertyValues tn = buildNode("Place", Map.of("isoCode", "IN-KA", "wikidataId", "Q1445")); + + List testPlaceNodes = List.of(in, ca, sf, vz, unk, tn); + + @Test + public void endToEndWithApiCalls() throws Exception { + LogWrapper lw = newLogCtx(); + + EntityResolver resolver = + new EntityResolver(new ReconClient(HttpClient.newHttpClient(), lw), lw); + for (PropertyValues node : testPlaceNodes) { + resolver.submitNode(node); + } + + // Submit 20 more SF nodes. This should result in NO more external resolutions since the + // entities are maintained in a Set. + for (int i = 0; i < 20; i++) { + resolver.submitNode(sf); + } + + resolver.resolveNodes().get(); + + testAssertionSuiteOnResolverInstance(resolver, lw); + } + + // Runs assertions on the place constants as defined in the class constants. + // These assertions are factored out of individual tests to allow testing different + // input methods (API, addLocalGraph) have the same local behavior with the same input + // Does NOT test I/O related assertions, which are left to the individual test functions. + private void testAssertionSuiteOnResolverInstance(EntityResolver resolver, LogWrapper lw) + throws IOException, InterruptedException { + assertThat(resolver.getResolvedNode(in)).hasValue(inDcid); + + // CA type is not valid. At this level, the resolver does not care about the type, so it will + // resolve. + // However, when resolving from the resolver controller (i.e. ExternalIdController), it will not + // resolve. + assertThat(resolver.getResolvedNode(ca)).hasValue(caDcid); + assertThat(lw.getLog().getEntriesList()).isEmpty(); + + // SF and Venezia get mapped. + assertThat(resolver.getResolvedNode(sf)).hasValue(sfDcid); + assertThat(resolver.getResolvedNode(vz)).hasValue(vzDcid); + + assertThat(resolver.getResolvedNode(unk)).isEmpty(); + + // We provided external IDs that map to diverging DCIDs. + assertThat(resolver.getResolvedNode(tn)).isEmpty(); + assertTrue( + TestUtil.checkLog( + lw.getLog(), "Resolution_DivergingDcidsForExternalIds", "Divergence found.")); + } + + PropertyValues buildNode(String typeOf, Map extIds) { + PropertyValues.Builder node = PropertyValues.newBuilder(); + node.putPvs(Vocabulary.TYPE_OF, newValues(ValueType.RESOLVED_REF, typeOf)); + for (Map.Entry pv : extIds.entrySet()) { + node.putPvs(pv.getKey(), newValues(ValueType.TEXT, pv.getValue())); + } + return node.build(); + } +} diff --git a/util/src/test/java/org/datacommons/util/ReconClientTest.java b/util/src/test/java/org/datacommons/util/ReconClientTest.java index f76ed225..11fa9d1d 100644 --- a/util/src/test/java/org/datacommons/util/ReconClientTest.java +++ b/util/src/test/java/org/datacommons/util/ReconClientTest.java @@ -2,6 +2,7 @@ import static com.google.common.truth.Truth.assertThat; import static java.util.stream.Collectors.toList; +import static org.datacommons.util.McfUtil.newEntitySubGraph; import static org.datacommons.util.ReconClient.NUM_API_CALLS_COUNTER; import static org.datacommons.util.TestUtil.getCounter; import static org.datacommons.util.TestUtil.newLogCtx; @@ -12,18 +13,24 @@ import org.junit.Test; public class ReconClientTest { - // India using isoCode - private static final EntitySubGraph INDIA_ENTITY = newEntity("isoCode", "IN"); - private static final String INDIA_SOURCE_ID = "isoCode:IN"; private static final String INDIA_DCID = "country/IND"; + private static final String USA_DCID = "country/USA"; + private static final String GBR_DCID = "country/GBR"; + private static final String CA_DCID = "geoId/06"; + private static final String INDIA_SOURCE_ID = "isoCode:IN"; + private static final String CA_SOURCE_ID = "geoId:06"; private static final IdWithProperty INDIA_RESOLVED_ID = IdWithProperty.newBuilder().setProp("dcid").setVal(INDIA_DCID).build(); + private static final IdWithProperty CA_RESOLVED_ID = + IdWithProperty.newBuilder().setProp("dcid").setVal(CA_DCID).build(); + // India using isoCode + private static final EntitySubGraph INDIA_ENTITY = newEntitySubGraph("isoCode", "IN"); + // CA using geo ID + private static final EntitySubGraph CA_ENTITY = newEntitySubGraph("geoId", "06"); private static final Coordinate SF_COORDINATES = Coordinate.newBuilder().setLatitude(37.77493).setLongitude(-122.41942).build(); - private static final String USA_DCID = "country/USA"; private static final Coordinate BIG_BEN_COORDINATES = Coordinate.newBuilder().setLatitude(51.510357).setLongitude(-0.116773).build(); - private static final String GBR_DCID = "country/GBR"; @Test public void resolveEntities() throws Exception { @@ -31,18 +38,50 @@ public void resolveEntities() throws Exception { ReconClient client = new ReconClient(HttpClient.newHttpClient(), logWrapper); ResolveEntitiesRequest request = - ResolveEntitiesRequest.newBuilder().addEntities(INDIA_ENTITY).build(); + ResolveEntitiesRequest.newBuilder() + .addEntities(INDIA_ENTITY) + .addEntities(CA_ENTITY) + .build(); ResolveEntitiesResponse result = client.resolveEntities(request).get(); - assertThat(result.getResolvedEntitiesCount()).isEqualTo(1); + assertThat(result.getResolvedEntitiesCount()).isEqualTo(2); assertThat(result.getResolvedEntities(0).getSourceId()).isEqualTo(INDIA_SOURCE_ID); assertThat(result.getResolvedEntities(0).getResolvedIdsCount()).isEqualTo(1); assertThat(result.getResolvedEntities(0).getResolvedIds(0).getIdsList()) .contains(INDIA_RESOLVED_ID); + assertThat(result.getResolvedEntities(1).getSourceId()).isEqualTo(CA_SOURCE_ID); + assertThat(result.getResolvedEntities(1).getResolvedIdsCount()).isEqualTo(1); + assertThat(result.getResolvedEntities(1).getResolvedIds(0).getIdsList()) + .contains(CA_RESOLVED_ID); assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(1); } + @Test + public void resolveEntities_chunked() throws Exception { + LogWrapper logWrapper = newLogCtx(); + ReconClient client = new ReconClient(HttpClient.newHttpClient(), logWrapper, 1); + + ResolveEntitiesRequest request = + ResolveEntitiesRequest.newBuilder() + .addEntities(INDIA_ENTITY) + .addEntities(CA_ENTITY) + .build(); + + ResolveEntitiesResponse result = client.resolveEntities(request).get(); + + assertThat(result.getResolvedEntitiesCount()).isEqualTo(2); + assertThat(result.getResolvedEntities(0).getSourceId()).isEqualTo(INDIA_SOURCE_ID); + assertThat(result.getResolvedEntities(0).getResolvedIdsCount()).isEqualTo(1); + assertThat(result.getResolvedEntities(0).getResolvedIds(0).getIdsList()) + .contains(INDIA_RESOLVED_ID); + assertThat(result.getResolvedEntities(1).getSourceId()).isEqualTo(CA_SOURCE_ID); + assertThat(result.getResolvedEntities(1).getResolvedIdsCount()).isEqualTo(1); + assertThat(result.getResolvedEntities(1).getResolvedIds(0).getIdsList()) + .contains(CA_RESOLVED_ID); + assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(2); + } + @Test public void resolveCoordinates() throws Exception { LogWrapper logWrapper = newLogCtx(); @@ -96,13 +135,4 @@ public void resolveCoordinates_chunked() throws Exception { .contains(GBR_DCID); assertThat(getCounter(logWrapper.getLog(), NUM_API_CALLS_COUNTER)).isEqualTo(2); } - - private static EntitySubGraph newEntity(String property, String value) { - return EntitySubGraph.newBuilder() - .setSourceId(String.format("%s:%s", property, value)) - .setEntityIds( - EntityIds.newBuilder() - .addIds(IdWithProperty.newBuilder().setProp(property).setVal(value))) - .build(); - } } From 81382af3f4d06aeb209c7f2492c84f183e19d82f Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Mon, 7 Aug 2023 21:21:00 -0700 Subject: [PATCH 5/6] Create Resolver that resolves nodes locally. --- .../org/datacommons/util/EntityResolver.java | 44 +------ .../org/datacommons/util/LocalResolver.java | 55 +++++++++ .../java/org/datacommons/util/McfUtil.java | 49 ++++++++ .../datacommons/util/EntityResolverTest.java | 10 -- .../datacommons/util/LocalResolverTest.java | 109 ++++++++++++++++++ 5 files changed, 215 insertions(+), 52 deletions(-) create mode 100644 util/src/main/java/org/datacommons/util/LocalResolver.java create mode 100644 util/src/test/java/org/datacommons/util/LocalResolverTest.java diff --git a/util/src/main/java/org/datacommons/util/EntityResolver.java b/util/src/main/java/org/datacommons/util/EntityResolver.java index 7b588d88..4a8a7a81 100644 --- a/util/src/main/java/org/datacommons/util/EntityResolver.java +++ b/util/src/main/java/org/datacommons/util/EntityResolver.java @@ -1,17 +1,14 @@ package org.datacommons.util; import static java.util.stream.Collectors.toList; +import static org.datacommons.util.McfUtil.getEntities; import static org.datacommons.util.McfUtil.newIdWithProperty; -import com.google.common.collect.ImmutableSet; import java.util.*; import java.util.AbstractMap.SimpleEntry; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import org.datacommons.proto.Debug.Log.Level; import org.datacommons.proto.Mcf.McfGraph.PropertyValues; -import org.datacommons.proto.Mcf.McfGraph.TypedValue; -import org.datacommons.proto.Mcf.ValueType; import org.datacommons.proto.Recon.IdWithProperty; import org.datacommons.proto.Recon.ResolveEntitiesRequest; import org.datacommons.proto.Recon.ResolveEntitiesResponse; @@ -61,28 +58,7 @@ protected CompletableFuture resolve() { @Override protected Optional getResolved(PropertyValues node) { - Set externalEntities = getEntities(node); - Set dcids = - new LinkedHashSet<>( - externalEntities.stream() - .filter(resolvedEntities::containsKey) - .map(resolvedEntities::get) - .collect(toList())); - - if (dcids.isEmpty()) { - return Optional.empty(); - } - - if (dcids.size() > 1) { - logWrapper.addEntry( - Level.LEVEL_ERROR, - "Resolution_DivergingDcidsForExternalIds", - String.format("Divergence found. dcids = %s, external ids = %s", dcids, externalEntities), - node.getLocationsList()); - return Optional.empty(); - } - - return dcids.stream().findFirst(); + return McfUtil.getResolved(node, resolvedEntities, logWrapper); } private void populateResolvedEntities(ResolveEntitiesResponse response) { @@ -92,22 +68,6 @@ private void populateResolvedEntities(ResolveEntitiesResponse response) { } } - private static Set getEntities(PropertyValues node) { - ImmutableSet.Builder builder = ImmutableSet.builder(); - - for (String prop : Vocabulary.PLACE_RESOLVABLE_AND_ASSIGNABLE_IDS) { - if (node.containsPvs(prop)) { - for (TypedValue typedValue : node.getPvsMap().get(prop).getTypedValuesList()) { - if (typedValue.getType() == ValueType.TEXT || typedValue.getType() == ValueType.NUMBER) { - builder.add(newIdWithProperty(prop, typedValue.getValue())); - } - } - } - } - - return builder.build(); - } - private static Optional> fromResolvedEntity( ResolvedEntity entity) { String[] propVal = entity.getSourceId().split(":"); diff --git a/util/src/main/java/org/datacommons/util/LocalResolver.java b/util/src/main/java/org/datacommons/util/LocalResolver.java new file mode 100644 index 00000000..993aeb83 --- /dev/null +++ b/util/src/main/java/org/datacommons/util/LocalResolver.java @@ -0,0 +1,55 @@ +package org.datacommons.util; + +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.datacommons.util.McfUtil.getEntities; +import static org.datacommons.util.McfUtil.getPropVal; + +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Recon.IdWithProperty; + +/** Resolves nodes locally. */ +final class LocalResolver extends Resolver { + // Map of Entity (represented as IdWithProperty) to DCID. + private final ConcurrentHashMap resolvedEntities = + new ConcurrentHashMap<>(); + + private final LogWrapper logWrapper; + + public LocalResolver(LogWrapper logWrapper) { + this.logWrapper = logWrapper; + } + + @Override + protected boolean submit(PropertyValues node) { + Set entities = getEntities(node); + String dcid = getPropVal(node, Vocabulary.DCID); + if (isEmpty(dcid)) { + // Even if a DCID is not present in this node, but if one with the same entities was + // previously added locally and can be already be resolved, then return true. + return getResolved(node).map(unused -> true).orElse(false); + } + + if (!entities.isEmpty()) { + for (IdWithProperty entity : entities) { + resolvedEntities.put(entity, dcid); + } + return true; + } + return false; + } + + @Override + protected CompletableFuture resolve() { + // Nothing to resolve externally, so return immediately. + return CompletableFuture.completedFuture(null); + } + + @Override + protected Optional getResolved(PropertyValues node) { + return McfUtil.getResolved(node, resolvedEntities, logWrapper); + } +} diff --git a/util/src/main/java/org/datacommons/util/McfUtil.java b/util/src/main/java/org/datacommons/util/McfUtil.java index d5afb527..1244b3da 100644 --- a/util/src/main/java/org/datacommons/util/McfUtil.java +++ b/util/src/main/java/org/datacommons/util/McfUtil.java @@ -14,10 +14,16 @@ package org.datacommons.util; +import static java.util.stream.Collectors.toList; + +import com.google.common.collect.ImmutableSet; import java.util.*; +import org.datacommons.proto.Debug.Log.Level; import org.datacommons.proto.LogLocation; import org.datacommons.proto.Mcf; import org.datacommons.proto.Mcf.McfGraph; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.McfGraph.TypedValue; import org.datacommons.proto.Mcf.ValueType; import org.datacommons.proto.Recon.EntityIds; import org.datacommons.proto.Recon.EntitySubGraph; @@ -209,4 +215,47 @@ private static String getValue(Mcf.McfGraph.TypedValue typedValue) { return typedValue.getValue(); } } + + // Gets place resolvable and assignable ids with property from the specified node. + static Set getEntities(PropertyValues node) { + ImmutableSet.Builder builder = ImmutableSet.builder(); + + for (String prop : Vocabulary.PLACE_RESOLVABLE_AND_ASSIGNABLE_IDS) { + if (node.containsPvs(prop)) { + for (TypedValue typedValue : node.getPvsMap().get(prop).getTypedValuesList()) { + if (typedValue.getType() == ValueType.TEXT || typedValue.getType() == ValueType.NUMBER) { + builder.add(newIdWithProperty(prop, typedValue.getValue())); + } + } + } + } + + return builder.build(); + } + + static Optional getResolved( + PropertyValues node, Map resolvedEntities, LogWrapper logWrapper) { + Set externalEntities = getEntities(node); + Set dcids = + new LinkedHashSet<>( + externalEntities.stream() + .filter(resolvedEntities::containsKey) + .map(resolvedEntities::get) + .collect(toList())); + + if (dcids.isEmpty()) { + return Optional.empty(); + } + + if (dcids.size() > 1) { + logWrapper.addEntry( + Level.LEVEL_ERROR, + "Resolution_DivergingDcidsForExternalIds", + String.format("Divergence found. dcids = %s, external ids = %s", dcids, externalEntities), + node.getLocationsList()); + return Optional.empty(); + } + + return dcids.stream().findFirst(); + } } diff --git a/util/src/test/java/org/datacommons/util/EntityResolverTest.java b/util/src/test/java/org/datacommons/util/EntityResolverTest.java index 55e91eb4..f371dca5 100644 --- a/util/src/test/java/org/datacommons/util/EntityResolverTest.java +++ b/util/src/test/java/org/datacommons/util/EntityResolverTest.java @@ -6,7 +6,6 @@ import static org.datacommons.util.TestUtil.newLogCtx; import static org.junit.Assert.assertTrue; -import java.io.IOException; import java.net.http.HttpClient; import java.util.List; import java.util.Map; @@ -54,15 +53,6 @@ public void endToEndWithApiCalls() throws Exception { resolver.resolveNodes().get(); - testAssertionSuiteOnResolverInstance(resolver, lw); - } - - // Runs assertions on the place constants as defined in the class constants. - // These assertions are factored out of individual tests to allow testing different - // input methods (API, addLocalGraph) have the same local behavior with the same input - // Does NOT test I/O related assertions, which are left to the individual test functions. - private void testAssertionSuiteOnResolverInstance(EntityResolver resolver, LogWrapper lw) - throws IOException, InterruptedException { assertThat(resolver.getResolvedNode(in)).hasValue(inDcid); // CA type is not valid. At this level, the resolver does not care about the type, so it will diff --git a/util/src/test/java/org/datacommons/util/LocalResolverTest.java b/util/src/test/java/org/datacommons/util/LocalResolverTest.java new file mode 100644 index 00000000..dcb10a9d --- /dev/null +++ b/util/src/test/java/org/datacommons/util/LocalResolverTest.java @@ -0,0 +1,109 @@ +package org.datacommons.util; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.Truth8.assertThat; +import static org.datacommons.util.McfUtil.newValues; +import static org.datacommons.util.TestUtil.newLogCtx; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; +import org.datacommons.proto.Mcf; +import org.datacommons.proto.Mcf.McfGraph.PropertyValues; +import org.datacommons.proto.Mcf.ValueType; +import org.junit.Test; + +public class LocalResolverTest { + // This includes 6 nodes with 7 external IDs. + + // India using isoCode + PropertyValues in = buildNode("Place", Map.of("isoCode", "IN")); + String inDcid = "country/IND"; + // CA, but the type is not a valid place type. + PropertyValues ca = buildNode("USState", Map.of("geoId", "06")); + String caDcid = "geoId/06"; + // SF using wikidataId + PropertyValues sf = buildNode("City", Map.of("wikidataId", "Q62")); + String sfDcid = "geoId/0667000"; + // Venezia using nuts + PropertyValues vz = buildNode("Place", Map.of("nutsCode", "ITH35")); + String vzDcid = "nuts/ITH35"; + // Unknown country + PropertyValues unk = buildNode("Country", Map.of("isoCode", "ZZZ")); + // Tamil Nadu / Karnataka using diverging IDs + PropertyValues tn = buildNode("Place", Map.of("isoCode", "IN-KA", "wikidataId", "Q1445")); + + List testPlaceNodes = List.of(in, ca, sf, vz, unk, tn); + + @Test + public void withAllPhases() throws Exception { + LogWrapper lw = newLogCtx(); + LocalResolver resolver = new LocalResolver(lw); + + List nodesWithoutDcid = List.of(in, sf, vz); + + for (PropertyValues node : nodesWithoutDcid) { + // assert that test nodes without DCID are initially NOT resolvable by this resolver. + assertThat(resolver.submitNode(node)).isFalse(); + } + + // Construct input side MCF where we also provide the DCIDs of the nodes + var inWithDcid = addDcidToNode(in, inDcid); + var sfWithDcid = addDcidToNode(sf, sfDcid); + var vzWithDcid = addDcidToNode(vz, vzDcid); + + // Used for test where resolving an input node with diverging "external" + // (loaded from local graph) throws an error + var tamilNaduWithDcid = + addDcidToNode(buildNode("Place", Map.of("isoCode", "IN-KA")), "wikidataId/Q1445"); + var karnatakaWithDcid = + addDcidToNode(buildNode("Place", Map.of("wikidataId", "Q1445")), "wikidataId/Q1185"); + + resolver.submitNode(inWithDcid); + resolver.submitNode(sfWithDcid); + resolver.submitNode(vzWithDcid); + resolver.submitNode(tamilNaduWithDcid); + resolver.submitNode(karnatakaWithDcid); + + for (PropertyValues node : nodesWithoutDcid) { + // assert that the same nodes without DCID are now resolvable by this resolver. + assertThat(resolver.submitNode(node)).isTrue(); + } + + resolver.resolveNodes().get(); + + // India, SF and Venezia are mapped. + assertThat(resolver.getResolvedNode(in)).hasValue(inDcid); + + // SF and Venezia get mapped. + assertThat(resolver.getResolvedNode(sf)).hasValue(sfDcid); + assertThat(resolver.getResolvedNode(vz)).hasValue(vzDcid); + + // CA and unknown are not mapped. + assertThat(resolver.getResolvedNode(ca)).isEmpty(); + assertThat(resolver.getResolvedNode(unk)).isEmpty(); + assertThat(lw.getLog().getEntriesList()).isEmpty(); + + // We provided external IDs that map to diverging DCIDs. + assertThat(resolver.getResolvedNode(tn)).isEmpty(); + assertTrue( + TestUtil.checkLog( + lw.getLog(), "Resolution_DivergingDcidsForExternalIds", "Divergence found.")); + } + + PropertyValues buildNode(String typeOf, Map extIds) { + PropertyValues.Builder node = PropertyValues.newBuilder(); + node.putPvs(Vocabulary.TYPE_OF, newValues(ValueType.RESOLVED_REF, typeOf)); + for (Map.Entry pv : extIds.entrySet()) { + node.putPvs(pv.getKey(), newValues(ValueType.TEXT, pv.getValue())); + } + return node.build(); + } + + // Given a node, returns a copy of the node with the given dcid added as a PV + private static PropertyValues addDcidToNode(PropertyValues node, String dcid) { + return PropertyValues.newBuilder(node) + .putPvs("dcid", newValues(Mcf.ValueType.TEXT, dcid)) + .build(); + } +} From f1ea39bb201b8335a234518a99d613dbdb3863a6 Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Mon, 7 Aug 2023 21:32:06 -0700 Subject: [PATCH 6/6] Remove non-applicable comment from Resolver. --- util/src/main/java/org/datacommons/util/Resolver.java | 1 - 1 file changed, 1 deletion(-) diff --git a/util/src/main/java/org/datacommons/util/Resolver.java b/util/src/main/java/org/datacommons/util/Resolver.java index 6d4d8032..34ee3898 100644 --- a/util/src/main/java/org/datacommons/util/Resolver.java +++ b/util/src/main/java/org/datacommons/util/Resolver.java @@ -36,7 +36,6 @@ boolean submitNode(PropertyValues node) { return submit(node); } - // TODO: Pick the ID based on a preferred list. Optional getResolvedNode(PropertyValues node) { if (!resolved.get()) { throw new IllegalStateException("getResolvedNode called before resolution.");