diff --git a/parser/build.gradle b/parser/build.gradle index 6a24f483a..2ae5e1291 100644 --- a/parser/build.gradle +++ b/parser/build.gradle @@ -14,8 +14,10 @@ dependencies { //json-p under its new home @ https://projects.eclipse.org/projects/ee4j.jsonp api "jakarta.json:jakarta.json-api:2.0.1" implementation "org.glassfish:jakarta.json:2.0.1" +// implementation project(path: ':helper:helper', configuration: 'shadow') testImplementation project(":test-common") + testImplementation project(path: ':helper:helper', configuration: 'shadow') testImplementation "org.apache.avro:avro:1.10.2" testImplementation "org.skyscreamer:jsonassert:1.5.0" } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroArrayLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroArrayLiteral.java index 9f323f427..f98220d71 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroArrayLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroArrayLiteral.java @@ -22,6 +22,10 @@ public AvroArrayLiteral(AvroArraySchema schema, CodeLocation codeLocation, List< this.value = value; } + public List getValue() { + return value; + } + @Override public String toString() { StringJoiner csv = new StringJoiner(", "); diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroBooleanLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroBooleanLiteral.java index b03b690e5..167713f71 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroBooleanLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroBooleanLiteral.java @@ -15,10 +15,14 @@ public AvroBooleanLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, } @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.BOOLEAN; } + public boolean getValue() { + return value; + } + @Override public String toString() { return Boolean.toString(value); diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroBytesLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroBytesLiteral.java index 64b3e2d37..5888c4650 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroBytesLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroBytesLiteral.java @@ -14,8 +14,12 @@ public AvroBytesLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, b this.value = value; } + public byte[] getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.BYTES; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroDoubleLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroDoubleLiteral.java index 251465eec..183f1b5f6 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroDoubleLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroDoubleLiteral.java @@ -14,8 +14,12 @@ public AvroDoubleLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, this.value = value; } + public double getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.DOUBLE; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumLiteral.java index c4f15e20c..e10fbe86c 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumLiteral.java @@ -22,6 +22,10 @@ public AvroEnumLiteral(AvroEnumSchema schema, CodeLocation codeLocation, String this.value = value; } + public String getValue() { + return value; + } + @Override public String toString() { return value; diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumSchema.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumSchema.java index d8a3149a1..51c384e41 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumSchema.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroEnumSchema.java @@ -16,14 +16,14 @@ public class AvroEnumSchema extends AvroNamedSchema { public AvroEnumSchema( CodeLocation codeLocation, - String simpleName, - String namespace, + AvroName name, + List aliases, String doc, List symbols, String defaultSymbol, JsonPropertiesContainer props ) { - super(codeLocation, simpleName, namespace, doc, props); + super(codeLocation, name, aliases, doc, props); //TODO - check for dup symbols, same-name-different-case, etc //TODO - check default (if exists) is a symbol this.symbols = Collections.unmodifiableList(new ArrayList<>(symbols)); diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedLiteral.java index a24bc8117..98bf7e441 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedLiteral.java @@ -22,6 +22,10 @@ public AvroFixedLiteral(AvroFixedSchema schema, CodeLocation codeLocation, byte[ this.value = value; } + public byte[] getValue() { + return value; + } + @Override public String toString() { //TODO - hex encode diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedSchema.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedSchema.java index d5b64c2a8..bb306bc54 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedSchema.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFixedSchema.java @@ -6,22 +6,25 @@ package com.linkedin.avroutil1.model; +import java.util.List; + + public class AvroFixedSchema extends AvroNamedSchema { private final int size; private final AvroLogicalType logicalType; public AvroFixedSchema( CodeLocation codeLocation, - String simpleName, - String namespace, + AvroName name, + List aliases, String doc, int size, AvroLogicalType logicalType, JsonPropertiesContainer props ) { - super(codeLocation, simpleName, namespace, doc, props); + super(codeLocation, name, aliases, doc, props); if (logicalType != null && !logicalType.getParentTypes().contains(type())) { - throw new IllegalArgumentException(type() + " " + simpleName + " at " + codeLocation + throw new IllegalArgumentException(type() + " " + getSimpleName() + " at " + codeLocation + " cannot have a logical type of " + logicalType + " (which can only be a logical type of " + logicalType.getParentTypes() + ")"); } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFloatLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFloatLiteral.java index aa1cfa960..242417af8 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroFloatLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroFloatLiteral.java @@ -14,8 +14,12 @@ public AvroFloatLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, f this.value = value; } + public float getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.FLOAT; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroIntegerLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroIntegerLiteral.java index e3aaecd5b..5dc17d3e1 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroIntegerLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroIntegerLiteral.java @@ -14,8 +14,12 @@ public AvroIntegerLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, this.value = value; } + public int getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.INT; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroJavaStringRepresentation.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroJavaStringRepresentation.java index ce116e728..de9a2e1e5 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroJavaStringRepresentation.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroJavaStringRepresentation.java @@ -6,40 +6,53 @@ package com.linkedin.avroutil1.model; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + + /** * represents the possible ways avro code generation may represent String fields * in generated java code. * specified by setting the "avro.java.string" property on string types in schemas */ public enum AvroJavaStringRepresentation { - CHAR_SEQUENCE, STRING, UTF8; + CHAR_SEQUENCE("CharSequence"), STRING("String"), UTF8("Utf8"); + private final String jsonValue; - public String getJsonValue() { - switch (this) { - case CHAR_SEQUENCE: - return "CharSequence"; - case STRING: - return "String"; - case UTF8: - return "Utf8"; + AvroJavaStringRepresentation(String jsonValue) { + if (jsonValue == null || jsonValue.isEmpty()) { + throw new IllegalArgumentException("jsonValue required"); + } + this.jsonValue = jsonValue; + } + + private final static List LEGAL_JSON_VALUES; + static { + List temp = new ArrayList<>(values().length); + for (AvroJavaStringRepresentation rep : values()) { + temp.add(rep.getJsonValue()); } - throw new IllegalStateException("unhandled: " + this); + LEGAL_JSON_VALUES = Collections.unmodifiableList(temp); + } + + public String getJsonValue() { + return jsonValue; } public static AvroJavaStringRepresentation fromJson(String jsonStringRepStr) { if (jsonStringRepStr == null || jsonStringRepStr.isEmpty()) { return null; } - switch (jsonStringRepStr) { - //CharSequence, String, Utf8 - case "CharSequence": - return CHAR_SEQUENCE; - case "String": - return STRING; - case "Utf8": - return UTF8; - default: - return null; + for (AvroJavaStringRepresentation candidate : values()) { + if (candidate.jsonValue.equals(jsonStringRepStr)) { + return candidate; + } } + return null; + } + + public static List legalJsonValues() { + return LEGAL_JSON_VALUES; } } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroLiteral.java index c203259ce..5a50bc23c 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroLiteral.java @@ -26,6 +26,10 @@ public AvroLiteral(AvroSchema schema, CodeLocation codeLocation) { this.codeLocation = codeLocation; } + public AvroSchema getSchema() { + return schema; + } + public AvroType type() { return schema.type(); } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroLongLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroLongLiteral.java index 1d109827d..86749cf03 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroLongLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroLongLiteral.java @@ -14,8 +14,12 @@ public AvroLongLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, lo this.value = value; } + public long getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.LONG; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroName.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroName.java new file mode 100644 index 000000000..01a230aa2 --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroName.java @@ -0,0 +1,80 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.model; + +import java.util.Objects; + + +public class AvroName { + private final static String NO_NAMESPACE = ""; + + private final String simpleName; + private final String namespace; + private final String fullname; + + public AvroName(String simpleName, String namespace) { + if (simpleName == null || simpleName.isEmpty()) { + throw new IllegalArgumentException("simple name required"); + } + if (simpleName.contains(".")) { + throw new IllegalArgumentException("simple name must be simple: " + simpleName); + } + this.simpleName = simpleName; + if (namespace != null && !namespace.isEmpty()) { + this.namespace = namespace; + this.fullname = namespace + "." + simpleName; + } else { + this.namespace = NO_NAMESPACE; + this.fullname = this.simpleName; + } + } + + public String getSimpleName() { + return simpleName; + } + + public String getNamespace() { + return namespace; + } + + public String getFullname() { + return fullname; + } + + public boolean hasNamespace() { + return !NO_NAMESPACE.equals(namespace); + } + + public String qualified(String contextNamespace) { + if (!namespace.equals(contextNamespace)) { //also works if argument is null + return fullname; + } + return simpleName; + } + + @Override + public String toString() { + return fullname; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AvroName avroName = (AvroName) o; + return Objects.equals(fullname, avroName.fullname); + } + + @Override + public int hashCode() { + return Objects.hash(fullname); + } +} diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroNamedSchema.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroNamedSchema.java index cb2ae2dae..77c2bb0df 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroNamedSchema.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroNamedSchema.java @@ -6,41 +6,52 @@ package com.linkedin.avroutil1.model; +import java.util.List; + + /** * parent class for all avro named types: records, enums and fixed */ public abstract class AvroNamedSchema extends AvroSchema { - protected final String simpleName; - protected final String namespace; + protected final AvroName name; + protected final List aliases; //as fullnames protected final String doc; - public AvroNamedSchema(CodeLocation codeLocation, String simpleName, String namespace, String doc, JsonPropertiesContainer props) { + public AvroNamedSchema( + CodeLocation codeLocation, + AvroName name, + List aliases, + String doc, + JsonPropertiesContainer props + ) { super(codeLocation, props); - if (simpleName == null || simpleName.isEmpty()) { - throw new IllegalArgumentException("simpleName cannot be null or empty"); + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); } - if (namespace == null) { - throw new IllegalArgumentException("namespace for " + simpleName + " cannot be null"); //can be empty - } - this.simpleName = simpleName; - this.namespace = namespace; + this.name = name; + this.aliases = aliases; this.doc = doc; } + public AvroName getName() { + return name; + } + public String getSimpleName() { - return simpleName; + return name.getSimpleName(); } public String getNamespace() { - return namespace; + return name.getNamespace(); } public String getFullName() { - if (namespace.isEmpty()) { - return simpleName; - } - return namespace + "." + simpleName; + return name.getFullname(); + } + + public List getAliases() { + return aliases; } public String getDoc() { diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroNullLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroNullLiteral.java index 9324af1a9..6c0451d6d 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroNullLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroNullLiteral.java @@ -13,7 +13,7 @@ public AvroNullLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation) { } @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.NULL; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroPrimitiveLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroPrimitiveLiteral.java index 2b160b9ee..ecaadccf9 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroPrimitiveLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroPrimitiveLiteral.java @@ -10,11 +10,11 @@ public abstract class AvroPrimitiveLiteral extends AvroLiteral { public AvroPrimitiveLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation) { super(schema, codeLocation); - if (!expectedType().equals(schema.type())) { + if (!primitiveType().equals(schema.type())) { throw new IllegalArgumentException("schema " + schema + " is not a " - + expectedType() + " schema but rather a " + schema.type()); + + primitiveType() + " schema but rather a " + schema.type()); } } - protected abstract AvroType expectedType(); + protected abstract AvroType primitiveType(); } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroRecordSchema.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroRecordSchema.java index 7f1a60a98..d197d0c70 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroRecordSchema.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroRecordSchema.java @@ -19,8 +19,14 @@ public class AvroRecordSchema extends AvroNamedSchema { */ private List fields; - public AvroRecordSchema(CodeLocation codeLocation, String simpleName, String namespace, String doc, JsonPropertiesContainer props) { - super(codeLocation, simpleName, namespace, doc, props); + public AvroRecordSchema( + CodeLocation codeLocation, + AvroName name, + List aliases, + String doc, + JsonPropertiesContainer props + ) { + super(codeLocation, name, aliases, doc, props); } public void setFields(List fields) { diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroSchemaField.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroSchemaField.java index c1293e798..652ce29a5 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroSchemaField.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroSchemaField.java @@ -79,6 +79,10 @@ public String getName() { return name; } + public boolean hasDoc() { + return doc != null; //empty string counts + } + public String getDoc() { return doc; } @@ -102,6 +106,10 @@ public SchemaOrRef getSchemaOrRef() { return schema; } + public boolean hasDefaultValue() { + return defaultValue != null; + } + public AvroLiteral getDefaultValue() { return defaultValue; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroStringLiteral.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroStringLiteral.java index 36e3e88c4..152b2c5a9 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroStringLiteral.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroStringLiteral.java @@ -14,8 +14,12 @@ public AvroStringLiteral(AvroPrimitiveSchema schema, CodeLocation codeLocation, this.value = value; } + public String getValue() { + return value; + } + @Override - protected AvroType expectedType() { + protected AvroType primitiveType() { return AvroType.STRING; } diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/AvroType.java b/parser/src/main/java/com/linkedin/avroutil1/model/AvroType.java index 3303d155b..8ea9b19dc 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/AvroType.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/AvroType.java @@ -81,7 +81,11 @@ public boolean isCollection() { return collection; } - public static AvroType fromJson(String jsonTypeStr) { + public String toTypeName() { + return name().toLowerCase(Locale.ROOT); + } + + public static AvroType fromTypeName(String jsonTypeStr) { //todo - optimize to not rely on exception try { return valueOf(jsonTypeStr.toUpperCase(Locale.ROOT)); diff --git a/parser/src/main/java/com/linkedin/avroutil1/model/JsonPropertiesContainer.java b/parser/src/main/java/com/linkedin/avroutil1/model/JsonPropertiesContainer.java index 91bb448f2..1d8e72042 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/model/JsonPropertiesContainer.java +++ b/parser/src/main/java/com/linkedin/avroutil1/model/JsonPropertiesContainer.java @@ -42,6 +42,11 @@ public String toString() { } }; + default boolean hasProperties() { + Set propertyNames = propertyNames(); + return propertyNames != null && !propertyNames.isEmpty(); + } + /** * @return returns all the "extra" (non core) properties defined * on an avro schema part, in order of definition in the source diff --git a/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscIssues.java b/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscIssues.java index 22c8dc8bf..222801179 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscIssues.java +++ b/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscIssues.java @@ -6,6 +6,7 @@ package com.linkedin.avroutil1.parser.avsc; +import com.linkedin.avroutil1.model.AvroJavaStringRepresentation; import com.linkedin.avroutil1.model.AvroLogicalType; import com.linkedin.avroutil1.model.AvroType; import com.linkedin.avroutil1.model.CodeLocation; @@ -127,7 +128,8 @@ public static AvscIssue unknownJavaStringRepresentation( return new AvscIssue( valueLocation, IssueSeverity.WARNING, - "unknown string representation \"" + badValue + "\" at " + valueLocation.getStart(), + "unknown string representation \"" + badValue + "\" at " + valueLocation.getStart() + + ". expected values: " + AvroJavaStringRepresentation.legalJsonValues(), null ); } @@ -192,4 +194,16 @@ public static AvscIssue badPropertyType( null ); } + + public static AvscIssue duplicateAlias( + String fullAlias, + CodeLocation aliasLocation + ) { + return new AvscIssue( + aliasLocation, + IssueSeverity.WARNING, + "alias " + fullAlias + " at " + aliasLocation + " is a duplicate", + null + ); + } } diff --git a/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscParser.java b/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscParser.java index fc1eaf416..ec88718dd 100644 --- a/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscParser.java +++ b/parser/src/main/java/com/linkedin/avroutil1/parser/avsc/AvscParser.java @@ -49,6 +49,7 @@ import com.linkedin.avroutil1.parser.jsonpext.JsonStringExt; import com.linkedin.avroutil1.parser.jsonpext.JsonValueExt; import com.linkedin.avroutil1.util.Util; +import com.linkedin.avroutil1.model.AvroName; import jakarta.json.JsonValue; import jakarta.json.stream.JsonParsingException; @@ -124,11 +125,11 @@ public class AvscParser { public AvscParseResult parse(String avsc) { JsonReaderExt jsonReader = new JsonReaderWithLocations(new StringReader(avsc), null); - JsonObjectExt root; + JsonValueExt root; AvscFileParseContext context = new AvscFileParseContext(avsc); AvscParseResult result = new AvscParseResult(); try { - root = jsonReader.readObject(); + root = jsonReader.readValue(); } catch (JsonParsingException e) { Throwable rootCause = Util.rootCause(e); String message = rootCause.getMessage(); @@ -146,7 +147,7 @@ public AvscParseResult parse(String avsc) { } try { - parseSchemaDeclOrRef(root, context, true); + SchemaOrRef schemaOrRef = parseSchemaDeclOrRef(root, context, true); context.resolveReferences(); result.recordParseComplete(context); } catch (Exception parseIssue) { @@ -173,7 +174,7 @@ private SchemaOrRef parseSchemaDeclOrRef ( JsonValue.ValueType nodeType = node.getValueType(); switch (nodeType) { case STRING: //primitive or ref - return parseSimplePrimitiveOrRef((JsonStringExt) node, context); + return parseSimplePrimitiveOrRef((JsonStringExt) node, context, topLevel); case OBJECT: //record/enum/fixed/array/map/error or a simpler type with extra props thrown-in return parseComplexSchema((JsonObjectExt) node, context, topLevel); case ARRAY: //union @@ -186,11 +187,12 @@ private SchemaOrRef parseSchemaDeclOrRef ( private SchemaOrRef parseSimplePrimitiveOrRef( JsonStringExt stringNode, - AvscFileParseContext context + AvscFileParseContext context, + boolean topLevel ) { CodeLocation codeLocation = locationOf(context.getUri(), stringNode); String typeString = stringNode.getString(); - AvroType avroType = AvroType.fromJson(typeString); + AvroType avroType = AvroType.fromTypeName(typeString); //TODO - screen for reserved words?? if (avroType == null) { //assume it's a ref @@ -198,15 +200,11 @@ private SchemaOrRef parseSimplePrimitiveOrRef( } if (avroType.isPrimitive()) { //no logical type information, string representation or props in the schema if we got here - return new SchemaOrRef(codeLocation, AvroPrimitiveSchema.forType( - codeLocation, - avroType, - null, - null, - 0, - 0, - JsonPropertiesContainer.EMPTY - )); + AvroPrimitiveSchema primitiveSchema = AvroPrimitiveSchema.forType( + codeLocation, avroType, null, null, 0, 0, JsonPropertiesContainer.EMPTY + ); + context.defineSchema(primitiveSchema, topLevel); + return new SchemaOrRef(codeLocation, primitiveSchema); } //if we got here it means we found something like "record" as a type literal. which is not valid syntax throw new AvroSyntaxException("Illegal avro type \"" + typeString + "\" at " + codeLocation.getStart() + ". " @@ -280,7 +278,7 @@ private SchemaOrRef parseComplexSchema( ) { CodeLocation codeLocation = locationOf(context.getUri(), objectNode); Located typeStr = getRequiredString(objectNode, "type", () -> "it is a schema declaration"); - AvroType avroType = AvroType.fromJson(typeStr.getValue()); + AvroType avroType = AvroType.fromTypeName(typeStr.getValue()); if (avroType == null) { throw new AvroSyntaxException("unknown avro type \"" + typeStr.getValue() + "\" at " + typeStr.getLocation() + ". expecting \"record\", \"enum\" or \"fixed\""); @@ -312,46 +310,18 @@ private AvroNamedSchema parseNamedSchema( CodeLocation codeLocation, JsonPropertiesContainer extraProps ) { - Located nameStr = getRequiredString(objectNode, "name", () -> avroType + " is a named type"); - Located namespaceStr = getOptionalString(objectNode, "namespace"); + AvroName schemaName = parseSchemaName(objectNode, context, avroType); + List aliases = parseAliases(objectNode, context, avroType, schemaName); + //technically the avro spec does not allow "doc" on type fixed, but screw that Located docStr = getOptionalString(objectNode, "doc"); - - String name = nameStr.getValue(); - String namespace = namespaceStr != null ? namespaceStr.getValue() : null; String doc = docStr != null ? docStr.getValue() : null; - String schemaSimpleName; - String schemaNamespace; - if (name.contains(".")) { - //the name specified is a full name (namespace included) - context.addIssue(AvscIssues.useOfFullName( - new CodeLocation(context.getUri(), nameStr.getLocation(), nameStr.getLocation()), - avroType, name)); - if (namespace != null) { - //namespace will be ignored, but it's confusing to even list it - context.addIssue(AvscIssues.ignoredNamespace( - new CodeLocation(context.getUri(), namespaceStr.getLocation(), namespaceStr.getLocation()), - avroType, namespace, name)); - } - //TODO - validate names (no ending in dot, no spaces, etc) - int lastDot = name.lastIndexOf('.'); - schemaSimpleName = name.substring(lastDot + 1); - schemaNamespace = name.substring(0, lastDot); - } else { - schemaSimpleName = name; - schemaNamespace = namespace; - } - - String contextNamespace = context.getCurrentNamespace(); // != null boolean namespaceChanged = false; //check if context namespace changed - if (schemaNamespace != null) { - if (!contextNamespace.equals(schemaNamespace)) { - context.pushNamespace(schemaNamespace); - namespaceChanged = true; - contextNamespace = schemaNamespace; - } + if (!context.getCurrentNamespace().equals(schemaName.getNamespace())) { + context.pushNamespace(schemaName.getNamespace()); + namespaceChanged = true; } AvroNamedSchema namedSchema; @@ -360,8 +330,8 @@ private AvroNamedSchema parseNamedSchema( case RECORD: AvroRecordSchema recordSchema = new AvroRecordSchema( codeLocation, - schemaSimpleName, - contextNamespace, + schemaName, + aliases, doc, extraProps ); @@ -371,7 +341,7 @@ private AvroNamedSchema parseNamedSchema( JsonValueExt fieldDeclNode = (JsonValueExt) fieldsNode.get(fieldNum); //!=null JsonValue.ValueType fieldNodeType = fieldDeclNode.getValueType(); if (fieldNodeType != JsonValue.ValueType.OBJECT) { - throw new AvroSyntaxException("field " + fieldNum + " for record " + schemaSimpleName + " at " + throw new AvroSyntaxException("field " + fieldNum + " for record " + schemaName.getSimpleName() + " at " + fieldDeclNode.getStartLocation() + " expected to be an OBJECT, not a " + JsonPUtil.describe(fieldNodeType) + " (" + fieldDeclNode + ")"); } @@ -412,7 +382,7 @@ private AvroNamedSchema parseNamedSchema( JsonValueExt symbolNode = (JsonValueExt) symbolsNode.get(ordinal); JsonValue.ValueType symbolNodeType = symbolNode.getValueType(); if (symbolNodeType != JsonValue.ValueType.STRING) { - throw new AvroSyntaxException("symbol " + ordinal + " for enum " + schemaSimpleName + " at " + throw new AvroSyntaxException("symbol " + ordinal + " for enum " + schemaName.getSimpleName() + " at " + symbolNode.getStartLocation() + " expected to be a STRING, not a " + JsonPUtil.describe(symbolNodeType) + " (" + symbolNode + ")"); } @@ -424,15 +394,15 @@ private AvroNamedSchema parseNamedSchema( defaultSymbol = defaultStr.getValue(); if (!symbols.contains(defaultSymbol)) { context.addIssue(AvscIssues.badEnumDefaultValue(locationOf(context.getUri(), defaultStr), - defaultSymbol, schemaSimpleName, symbols)); + defaultSymbol, schemaName.getSimpleName(), symbols)); //TODO - support "fixing" by selecting 1st symbol as default? defaultSymbol = null; } } namedSchema = new AvroEnumSchema( codeLocation, - schemaSimpleName, - contextNamespace, + schemaName, + aliases, doc, symbols, defaultSymbol, @@ -442,7 +412,7 @@ private AvroNamedSchema parseNamedSchema( case FIXED: JsonValueExt sizeNode = getRequiredNode(objectNode, "size", () -> "fixed types must have a size property"); if (sizeNode.getValueType() != JsonValue.ValueType.NUMBER || !(((JsonNumberExt) sizeNode).isIntegral())) { - throw new AvroSyntaxException("size for fixed " + schemaSimpleName + " at " + throw new AvroSyntaxException("size for fixed " + schemaName.getSimpleName() + " at " + sizeNode.getStartLocation() + " expected to be an INTEGER, not a " + JsonPUtil.describe(sizeNode.getValueType()) + " (" + sizeNode + ")"); } @@ -453,8 +423,8 @@ private AvroNamedSchema parseNamedSchema( } namedSchema = new AvroFixedSchema( codeLocation, - schemaSimpleName, - contextNamespace, + schemaName, + aliases, doc, fixedSize, logicalTypeResult.getData(), @@ -510,6 +480,79 @@ private SchemaOrRef parseUnionSchema( return new SchemaOrRef(codeLocation, unionSchema); } + private AvroName parseSchemaName( + JsonObjectExt objectNode, + AvscFileParseContext context, + AvroType avroType + ) { + Located nameStr = getRequiredString(objectNode, "name", () -> avroType + " is a named type"); + Located namespaceStr = getOptionalString(objectNode, "namespace"); + + String name = nameStr.getValue(); + String namespace = namespaceStr != null ? namespaceStr.getValue() : null; + + AvroName schemaName; + if (name.contains(".")) { + //the name specified is a full name (namespace included) + context.addIssue(AvscIssues.useOfFullName( + new CodeLocation(context.getUri(), nameStr.getLocation(), nameStr.getLocation()), + avroType, name)); + if (namespace != null) { + //namespace will be ignored, but it's confusing to even list it + context.addIssue(AvscIssues.ignoredNamespace( + new CodeLocation(context.getUri(), namespaceStr.getLocation(), namespaceStr.getLocation()), + avroType, namespace, name)); + } + //TODO - validate names (no ending in dot, no spaces, etc) + int lastDot = name.lastIndexOf('.'); + schemaName = new AvroName(name.substring(lastDot + 1), name.substring(0, lastDot)); + } else { + String inheritedNamespace = namespace != null ? namespace : context.getCurrentNamespace(); + schemaName = new AvroName(name, inheritedNamespace); + } + return schemaName; + } + + private List parseAliases( + JsonObjectExt objectNode, + AvscFileParseContext context, + AvroType avroType, + AvroName name + ) { + JsonArrayExt aliasesArray = getOptionalArray(objectNode, "aliases"); + if (aliasesArray == null || aliasesArray.isEmpty()) { + return null; + } + List aliases = new ArrayList<>(aliasesArray.size()); + for (int i = 0; i < aliasesArray.size(); i++) { + JsonValueExt aliasNode = (JsonValueExt) aliasesArray.get(i); //!=null + JsonValue.ValueType fieldNodeType = aliasNode.getValueType(); + if (fieldNodeType != JsonValue.ValueType.STRING) { + throw new AvroSyntaxException("alias " + i + " for " + name.getSimpleName() + " at " + + aliasNode.getStartLocation() + " expected to be a STRING, not a " + + JsonPUtil.describe(fieldNodeType) + " (" + aliasNode + ")"); + } + String aliasStr = ((JsonStringExt)aliasNode).getString(); + AvroName alias; + if (aliasStr.contains(".")) { + int lastDot = aliasStr.lastIndexOf('.'); + alias = new AvroName(aliasStr.substring(lastDot + 1), aliasStr.substring(0, lastDot)); + } else { + alias = new AvroName(aliasStr, name.getNamespace()); + } + + if (aliases.contains(alias)) { + TextLocation fieldStartLocation = Util.convertLocation(aliasNode.getStartLocation()); + TextLocation fieldEndLocation = Util.convertLocation(aliasNode.getEndLocation()); + CodeLocation aliasCodeLocation = new CodeLocation(context.getUri(), fieldStartLocation, fieldEndLocation); + context.addIssue(AvscIssues.duplicateAlias(alias.getFullname(), aliasCodeLocation)); + } else { + aliases.add(alias); + } + } + return aliases; + } + private LiteralOrIssue parseLiteral( JsonValueExt literalNode, AvroSchema schema, diff --git a/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvroSchemaWriter.java b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvroSchemaWriter.java new file mode 100644 index 000000000..a12ff94a7 --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvroSchemaWriter.java @@ -0,0 +1,27 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +import com.linkedin.avroutil1.model.AvroSchema; +import java.util.List; + + +/** + * writes {@link com.linkedin.avroutil1.model.AvroSchema}s out to file(s) + */ +interface AvroSchemaWriter { + + List write(AvroSchema schema, AvscWriterConfig config); + + default AvscFile writeSingle(AvroSchema schema) { + List list = write(schema, AvscWriterConfig.CORRECT_MITIGATED); + if (list == null || list.size() != 1) { + throw new IllegalStateException("expecting single schema, got " + (list == null ? 0 : list.size())); + } + return list.get(0); + } +} diff --git a/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscFile.java b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscFile.java new file mode 100644 index 000000000..e8f6de85d --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscFile.java @@ -0,0 +1,37 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +import com.linkedin.avroutil1.model.AvroSchema; +import java.nio.file.Path; + + +public class AvscFile { + public static final String SUFFIX = "avsc"; + + private AvroSchema topLevelSchema; + private Path pathFromRoot; + private String contents; + + public AvscFile(AvroSchema topLevelSchema, Path pathFromRoot, String contents) { + this.topLevelSchema = topLevelSchema; + this.pathFromRoot = pathFromRoot; + this.contents = contents; + } + + public AvroSchema getTopLevelSchema() { + return topLevelSchema; + } + + public Path getPathFromRoot() { + return pathFromRoot; + } + + public String getContents() { + return contents; + } +} diff --git a/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriter.java b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriter.java new file mode 100644 index 000000000..ecbe468e2 --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriter.java @@ -0,0 +1,400 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +import com.linkedin.avroutil1.model.AvroArrayLiteral; +import com.linkedin.avroutil1.model.AvroArraySchema; +import com.linkedin.avroutil1.model.AvroBooleanLiteral; +import com.linkedin.avroutil1.model.AvroBytesLiteral; +import com.linkedin.avroutil1.model.AvroDoubleLiteral; +import com.linkedin.avroutil1.model.AvroEnumLiteral; +import com.linkedin.avroutil1.model.AvroEnumSchema; +import com.linkedin.avroutil1.model.AvroFixedLiteral; +import com.linkedin.avroutil1.model.AvroFixedSchema; +import com.linkedin.avroutil1.model.AvroFloatLiteral; +import com.linkedin.avroutil1.model.AvroIntegerLiteral; +import com.linkedin.avroutil1.model.AvroLiteral; +import com.linkedin.avroutil1.model.AvroLongLiteral; +import com.linkedin.avroutil1.model.AvroMapSchema; +import com.linkedin.avroutil1.model.AvroName; +import com.linkedin.avroutil1.model.AvroNamedSchema; +import com.linkedin.avroutil1.model.AvroNullLiteral; +import com.linkedin.avroutil1.model.AvroPrimitiveSchema; +import com.linkedin.avroutil1.model.AvroRecordSchema; +import com.linkedin.avroutil1.model.AvroSchema; +import com.linkedin.avroutil1.model.AvroSchemaField; +import com.linkedin.avroutil1.model.AvroStringLiteral; +import com.linkedin.avroutil1.model.AvroType; +import com.linkedin.avroutil1.model.AvroUnionSchema; +import com.linkedin.avroutil1.model.JsonPropertiesContainer; +import com.linkedin.avroutil1.model.SchemaOrRef; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonReader; +import jakarta.json.JsonValue; +import jakarta.json.JsonWriter; +import jakarta.json.stream.JsonGenerator; +import java.io.StringReader; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + + +public class AvscSchemaWriter implements AvroSchemaWriter { + + @Override + public List write(AvroSchema schema, AvscWriterConfig config) { + String avsc = generateAvsc(schema, AvscWriterConfig.CORRECT_MITIGATED); + Path relativeFileName = pathForSchema(schema); //null of not a named schema + AvscFile file = new AvscFile(schema, relativeFileName, avsc); + return Collections.singletonList(file); + } + + protected Path pathForSchema(AvroSchema maybeNamed) { + if (!(maybeNamed instanceof AvroNamedSchema)) { + return null; + } + AvroNamedSchema namedSchema = (AvroNamedSchema) maybeNamed; + AvroName name = namedSchema.getName(); + + if (!name.hasNamespace()) { + return Paths.get(name.getSimpleName() + "." + AvscFile.SUFFIX); + } + String fullname = name.getFullname(); + String[] parts = fullname.split("\\."); + String[] pathParts = new String[parts.length - 1]; + for (int i = 1; i < parts.length; i++) { + if (i == parts.length - 1) { + pathParts[i - 1] = parts[i] + "." + AvscFile.SUFFIX; + } else { + pathParts[i -1] = parts[i]; + } + } + + return Paths.get(parts[0], pathParts); + } + + protected String generateAvsc(AvroSchema schema, AvscWriterConfig config) { + AvscWriterContext context = new AvscWriterContext(); + Map jsonConfig = new HashMap<>(); + if (config.isPretty()) { + jsonConfig.put(JsonGenerator.PRETTY_PRINTING, "true"); + } + StringWriter stringWriter = new StringWriter(); + JsonValue dom = writeSchema(schema, context, config); + JsonWriter writer = Json.createWriterFactory(jsonConfig).createWriter(stringWriter); + writer.write(dom); + return stringWriter.toString(); + } + + protected JsonValue writeSchema(AvroSchema schema, AvscWriterContext context, AvscWriterConfig config) { + AvroType type = schema.type(); + JsonObjectBuilder definitionBuilder; + switch (type) { + case ENUM: + case FIXED: + case RECORD: + return writeNamedSchema((AvroNamedSchema) schema, context, config); + case ARRAY: + AvroArraySchema arraySchema = (AvroArraySchema) schema; + definitionBuilder = Json.createObjectBuilder(); + definitionBuilder.add("type", "array"); + definitionBuilder.add("items", writeSchema(arraySchema.getValueSchema(), context, config)); + emitJsonProperties(schema, context, config, definitionBuilder); + return definitionBuilder.build(); + case MAP: + AvroMapSchema mapSchema = (AvroMapSchema) schema; + definitionBuilder = Json.createObjectBuilder(); + definitionBuilder.add("type", "map"); + definitionBuilder.add("values", writeSchema(mapSchema.getValueSchema(), context, config)); + emitJsonProperties(schema, context, config, definitionBuilder); + return definitionBuilder.build(); + case UNION: + AvroUnionSchema unionSchema = (AvroUnionSchema) schema; + JsonArrayBuilder unionBuilder = Json.createArrayBuilder(); + for (SchemaOrRef unionBranch : unionSchema.getTypes()) { + AvroSchema branchSchema = unionBranch.getSchema(); //will throw if unresolved ref + unionBuilder.add(writeSchema(branchSchema, context, config)); + } + return unionBuilder.build(); + default: + AvroPrimitiveSchema primitiveSchema = (AvroPrimitiveSchema) schema; + if (!primitiveSchema.hasProperties()) { + return Json.createValue(primitiveSchema.type().name().toLowerCase(Locale.ROOT)); + } + definitionBuilder = Json.createObjectBuilder(); + definitionBuilder.add("type", primitiveSchema.type().toTypeName()); + emitJsonProperties(primitiveSchema, context, config, definitionBuilder); + return definitionBuilder.build(); + } + } + + protected JsonValue writeNamedSchema(AvroNamedSchema schema, AvscWriterContext context, AvscWriterConfig config) { + boolean seenBefore = context.schemaEncountered(schema); + if (seenBefore) { + return writeSchemaRef(schema, context, config); + } + //common parts to all named schemas + JsonObjectBuilder definitionBuilder = Json.createObjectBuilder(); + AvroName extraAlias = emitSchemaName(schema, context, config, definitionBuilder); + emitSchemaAliases(schema, context, config, extraAlias, definitionBuilder); + if (schema.getDoc() != null) { + definitionBuilder.add("doc", Json.createValue(schema.getDoc())); + } + + AvroType type = schema.type(); + switch (type) { + case ENUM: + AvroEnumSchema enumSchema = (AvroEnumSchema) schema; + definitionBuilder.add("type", "enum"); + List symbols = enumSchema.getSymbols(); + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (String symbol : symbols) { + arrayBuilder.add(symbol); + } + definitionBuilder.add("symbols", arrayBuilder); + String defaultSymbol = enumSchema.getDefaultSymbol(); + if (defaultSymbol != null) { + definitionBuilder.add("default", Json.createValue(defaultSymbol)); + } + break; + case FIXED: + AvroFixedSchema fixedSchema = (AvroFixedSchema) schema; + definitionBuilder.add("type", "fixed"); + definitionBuilder.add("size", Json.createValue(fixedSchema.getSize())); + break; + case RECORD: + AvroRecordSchema recordSchema = (AvroRecordSchema) schema; + definitionBuilder.add("type", "record"); //TODO - support error types? + emitRecordFields(recordSchema, context, config, definitionBuilder); + break; + default: + throw new IllegalStateException("not expecting " + type); + } + emitJsonProperties(schema, context, config, definitionBuilder); + context.popNamingContext(); + return definitionBuilder.build(); + } + + /** + * writes out a reference (either a full name of a simple name, if context namespace permits) to a named schema + * @param schema a schema to write a reference to + * @param context avsc generation context + */ + protected JsonValue writeSchemaRef(AvroNamedSchema schema, AvscWriterContext context, AvscWriterConfig config) { + if (config.isAlwaysEmitNamespace()) { + //emit fullname always + return Json.createValue(schema.getFullName()); + } + //figure out what the context namespace is + String contextNamespace = config.isUsePreAvro702Logic() ? + context.getAvro702ContextNamespace() : context.getCorrectContextNamespace(); + String qualified = schema.getName().qualified(contextNamespace); + return Json.createValue(qualified); + } + + protected AvroName emitSchemaName(AvroNamedSchema schema, AvscWriterContext context, AvscWriterConfig config, JsonObjectBuilder output) { + + //before we get to actually writing anything we need to do some accounting of what horrible old avro would do for 702 + + AvroName schemaName = schema.getName(); + + //what would ancient avro do? + String contextNamespaceAfter702; + boolean shouldEmitNSPre702 = shouldEmitNamespace(schemaName, context.getAvro702ContextNamespace()); + if (shouldEmitNSPre702) { + contextNamespaceAfter702 = schema.getNamespace(); + } else { + contextNamespaceAfter702 = context.getAvro702ContextNamespace(); + } + + //what would modern avro do? + String contextNamespaceAfter; + boolean shouldEmitNSNormally = shouldEmitNamespace(schemaName, context.getCorrectContextNamespace()); + if (shouldEmitNSNormally) { + contextNamespaceAfter = schema.getNamespace(); + } else { + contextNamespaceAfter = context.getCorrectContextNamespace(); + } + + //how will Schema.parse() read the output of ancient and modern avro? + AvroName fullnameWhenParsedUnder702 = new AvroName(schemaName.getSimpleName(), contextNamespaceAfter702); + AvroName fullnameWhenParsed = new AvroName(schemaName.getSimpleName(), contextNamespaceAfter); + + AvroName extraAlias = null; + if (!fullnameWhenParsed.equals(fullnameWhenParsedUnder702)) { + if (config.isUsePreAvro702Logic()) { + extraAlias = fullnameWhenParsed; + } else { + extraAlias = fullnameWhenParsedUnder702; + } + } + + if (config.isAlwaysEmitNamespace()) { + if (config.isEmitNamespacesSeparately() || schemaName.getNamespace().isEmpty()) { + //there's no way to build a fullname for something in the empty namespace + //so for those we always need to emit an empty namespace prop. + output.add("namespace", schemaName.getNamespace()); + output.add("name", schemaName.getSimpleName()); + } else { + output.add("name", schemaName.getFullname()); + } + } else { + boolean emitNS = config.isUsePreAvro702Logic() ? shouldEmitNSPre702 : shouldEmitNSNormally; + if (emitNS) { + output.add("namespace", schemaName.getNamespace()); + } + output.add("name", schemaName.getSimpleName()); + } + + context.pushNamingContext(schema, contextNamespaceAfter, contextNamespaceAfter702); + + return extraAlias; + } + + protected void emitSchemaAliases( + AvroNamedSchema schema, + AvscWriterContext context, + AvscWriterConfig config, + AvroName extraAlias, + JsonObjectBuilder output + ) { + List aliases = schema.getAliases(); + int numAliases = (extraAlias != null ? 1 : 0) + (aliases != null ? aliases.size() : 0); + if (numAliases == 0) { + return; + } + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + if (aliases != null) { + for (AvroName alias : aliases) { + arrayBuilder.add(alias.getFullname()); + } + } + if (extraAlias != null) { + arrayBuilder.add(extraAlias.getFullname()); + } + output.add("aliases", arrayBuilder); + } + + protected void emitJsonProperties( + JsonPropertiesContainer fieldOrSchema, + AvscWriterContext context, + AvscWriterConfig config, + JsonObjectBuilder output + ) { + Set propNames = fieldOrSchema.propertyNames(); + if (propNames == null || propNames.isEmpty()) { + return; + } + for (String propName : propNames) { + String json = fieldOrSchema.getPropertyAsJsonLiteral(propName); + JsonReader reader = Json.createReader(new StringReader(json)); + JsonValue propValue = reader.readValue(); + output.add(propName, propValue); + } + } + + protected void emitRecordFields(AvroRecordSchema schema, AvscWriterContext context, AvscWriterConfig config, JsonObjectBuilder output) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + List fields = schema.getFields(); + for (AvroSchemaField field : fields) { + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder(); + fieldBuilder.add("name", field.getName()); + if (field.hasDoc()) { + fieldBuilder.add("doc", field.getDoc()); + } + AvroSchema fieldSchema = field.getSchema(); + fieldBuilder.add("type", writeSchema(fieldSchema, context, config)); + if (field.hasDefaultValue()) { + AvroLiteral defaultValue = field.getDefaultValue(); + JsonValue defaultValueLiteral = writeDefaultValue(fieldSchema, defaultValue); + fieldBuilder.add("default", defaultValueLiteral); + } + //TODO - order + //TODO - aliases + arrayBuilder.add(fieldBuilder); + } + output.add("fields", arrayBuilder); + } + + protected JsonValue writeDefaultValue(AvroSchema fieldSchema, AvroLiteral literal) { + AvroType type = fieldSchema.type(); + String temp; + switch (type) { + case NULL: + //noinspection unused (kept as a sanity check) + AvroNullLiteral nullLiteral = (AvroNullLiteral) literal; + return JsonValue.NULL; + case BOOLEAN: + AvroBooleanLiteral boolLiteral = (AvroBooleanLiteral) literal; + return boolLiteral.getValue() ? JsonValue.TRUE : JsonValue.FALSE; + case INT: + AvroIntegerLiteral intLiteral = (AvroIntegerLiteral) literal; + return Json.createValue(intLiteral.getValue()); + case LONG: + AvroLongLiteral longLiteral = (AvroLongLiteral) literal; + return Json.createValue(longLiteral.getValue()); + case FLOAT: + AvroFloatLiteral floatLiteral = (AvroFloatLiteral) literal; + return Json.createValue(floatLiteral.getValue()); + case DOUBLE: + AvroDoubleLiteral doubleLiteral = (AvroDoubleLiteral) literal; + return Json.createValue(doubleLiteral.getValue()); + case STRING: + AvroStringLiteral stringLiteral = (AvroStringLiteral) literal; + return Json.createValue(stringLiteral.getValue()); + case BYTES: + AvroBytesLiteral bytesLiteral = (AvroBytesLiteral) literal; + //spec says "values for bytes and fixed fields are JSON strings, where Unicode code points + //0-255 are mapped to unsigned 8-bit byte values 0-255", and this is how its done + temp = new String(bytesLiteral.getValue(), StandardCharsets.ISO_8859_1); + return Json.createValue(temp); + case ENUM: + AvroEnumLiteral enumLiteral = (AvroEnumLiteral) literal; + return Json.createValue(enumLiteral.getValue()); + case FIXED: + AvroFixedLiteral fixedLiteral = (AvroFixedLiteral) literal; + //spec says "values for bytes and fixed fields are JSON strings, where Unicode code points + //0-255 are mapped to unsigned 8-bit byte values 0-255", and this is how its done + temp = new String(fixedLiteral.getValue(), StandardCharsets.ISO_8859_1); + return Json.createValue(temp); + case ARRAY: + AvroArrayLiteral arrayLiteral = (AvroArrayLiteral) literal; + List array = arrayLiteral.getValue(); + AvroArraySchema arraySchema = (AvroArraySchema) arrayLiteral.getSchema(); + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (AvroLiteral element : array) { + arrayBuilder.add(writeDefaultValue(arraySchema.getValueSchema(), element)); + } + return arrayBuilder.build(); + default: + throw new UnsupportedOperationException("writing default values for " + type + " not implemented yet"); + } + } + + /** + * checks if vanilla avro would emit a "namespace" json property given a context namespace and a schema name + * @param name schema name, required + * @param contextNamespace context namespace, can be null. + * @return true if vanilla avro would emit a "namespace" json property + */ + private boolean shouldEmitNamespace(AvroName name, String contextNamespace) { + if (contextNamespace == null) { + return name.getNamespace() != null && !name.getNamespace().isEmpty(); + } + //name.namespace could be "" and sometimes need to be emitted explicitly still + return !contextNamespace.equals(name.getNamespace()); + } +} diff --git a/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterConfig.java b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterConfig.java new file mode 100644 index 000000000..577b9e7fe --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterConfig.java @@ -0,0 +1,99 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +public class AvscWriterConfig { + private final boolean pretty; + /** + * true to print horrible schemas recreating ancient avro behaviour from + * before AVRO-702 + */ + private final boolean usePreAvro702Logic; + /** + * true to add aliases to all avro-702-impacted schemas. + * aliases are either to the correct or the bad fullname, + * depending on value of {@link #usePreAvro702Logic}. + */ + private final boolean addAvro702Aliases; + /** + * avro specification says if namespace isn't defined its + * inherited from parent, and hence does not emit namespace + * on records who's namespace is the same as their parent's. + * in addition to definitions, this also applies to references + * + * true changes this behaviour and always emits namespace, + * even if its not strictly required by the specification. + */ + private final boolean alwaysEmitNamespace; + /** + * true to emit "name" and "namespace" json properties. + * false to always emit just "name" property for both + * (which would then be either the simple name or the + * fullname, as required) + */ + private final boolean emitNamespacesSeparately; + /** + * true to define all nested schemas inline (aka "exploded schema"). + * + * this is the only legal schema definition according to the avro + * specification (which has no notion of imports/references). + * this is also the only legal value of SCHEMA$ fields in generated + * classes. + * + * false emits all nested schemas as a "reference" (a simple + * fullname string). this is non-standard, but allows for common + * schema reuse in large codebases. + */ + private final boolean inlineAllNestedSchemas; + + public AvscWriterConfig( + boolean pretty, + boolean usePreAvro702Logic, + boolean addAvro702Aliases, + boolean alwaysEmitNamespace, + boolean emitNamespacesSeparately, + boolean inlineAllNestedSchemas + ) { + this.pretty = pretty; + this.usePreAvro702Logic = usePreAvro702Logic; + this.addAvro702Aliases = addAvro702Aliases; + this.alwaysEmitNamespace = alwaysEmitNamespace; + this.emitNamespacesSeparately = emitNamespacesSeparately; + this.inlineAllNestedSchemas = inlineAllNestedSchemas; + if (usePreAvro702Logic && alwaysEmitNamespace) { + throw new IllegalArgumentException("cant specify usePreAvro702Logic and alwaysEmitNamespace together"); + } + } + + public static final AvscWriterConfig CORRECT_MITIGATED = new AvscWriterConfig( + false, false, true, false, true, true + ); + + public boolean isPretty() { + return pretty; + } + + public boolean isUsePreAvro702Logic() { + return usePreAvro702Logic; + } + + public boolean isAddAvro702Aliases() { + return addAvro702Aliases; + } + + public boolean isAlwaysEmitNamespace() { + return alwaysEmitNamespace; + } + + public boolean isEmitNamespacesSeparately() { + return emitNamespacesSeparately; + } + + public boolean isInlineAllNestedSchemas() { + return inlineAllNestedSchemas; + } +} diff --git a/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterContext.java b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterContext.java new file mode 100644 index 000000000..7a54697ed --- /dev/null +++ b/parser/src/main/java/com/linkedin/avroutil1/writer/avsc/AvscWriterContext.java @@ -0,0 +1,104 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +import com.linkedin.avroutil1.model.AvroName; +import com.linkedin.avroutil1.model.AvroNamedSchema; +import com.linkedin.avroutil1.model.AvroSchema; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.Map; + + +public class AvscWriterContext { + + /** + * updates every time we "enter" into and exit out of a named type, this + * keeps track of the namespace vs which any "unqualified" (==not full) + * name(s) would be resolved. + * to implement avro-702 mitigation it also keeps track of how 702-afflicted + * records will be parsed + */ + private final ArrayDeque contextStack = new ArrayDeque<>(1); + /** + * latches to the 1st non-null namespace encountered, never pop()ed or reset + */ + private String avro702ContextNamespace = null; + + + private final Map known = new HashMap<>(); //by their fullname + + public AvscWriterContext() { + //context starts at the "root" namespace + contextStack.push(new NamingContext(null, "", "")); + } + + /** + * @return the correct namespace at this current inner-most context + */ + public String getCorrectContextNamespace() { + assert contextStack.peek() != null; + return contextStack.peek().correcttNamespace; + } + + /** + * @return the namespace that old pre-702 avro logic would evaluate the current context namespace to be + */ + public String getAvro702ContextNamespace() { + return avro702ContextNamespace; + } + + /** + * registers a (possibly known) named schema + * @param schema a name schema encountered during avsc generation + * @return true if schema has already been encountered (and hence defined) before, false if schema is new (seen 1st time) + */ + public boolean schemaEncountered(AvroNamedSchema schema) { + String fullName = schema.getFullName(); + AvroSchema alreadySeen = known.get(fullName); + if (alreadySeen == null) { + known.put(fullName, schema); + return false; + } + //make sure we dont have a different redefinition + if (!alreadySeen.equals(schema)) { + throw new IllegalStateException("schema " + fullName + " at " + schema.getCodeLocation() + + " already seen (and different) at " + alreadySeen.getCodeLocation()); + } + return true; + } + + public void pushNamingContext(AvroNamedSchema root, String correctNamespace, String pre702ParsedNamespace) { + contextStack.push(new NamingContext(root.getName(), correctNamespace, pre702ParsedNamespace)); + } + + public void popNamingContext() { + contextStack.pop(); //will throw if empty + } + + private static class NamingContext { + /** + * the (always correct) named type at the root of this context (if any) + */ + private final AvroName root; + /** + * the correct namespace of this context, as defined by the avro specification + */ + private final String correcttNamespace; + /** + * what would be the context namespace here on parsing if the schema was + * written under pre-702 (incorrect) logic + */ + private final String pre702ParsedNamespace; + + public NamingContext(AvroName root, String correctNamespace, String pre702ParsedNamespace) { + this.root = root; + this.correcttNamespace = correctNamespace; + this.pre702ParsedNamespace = pre702ParsedNamespace; + } + } +} diff --git a/parser/src/test/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriterTest.java b/parser/src/test/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriterTest.java new file mode 100644 index 000000000..db39b6fe7 --- /dev/null +++ b/parser/src/test/java/com/linkedin/avroutil1/writer/avsc/AvscSchemaWriterTest.java @@ -0,0 +1,115 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.writer.avsc; + +import com.linkedin.avroutil1.compatibility.HelperConsts; +import com.linkedin.avroutil1.model.AvroSchema; +import com.linkedin.avroutil1.parser.avsc.AvscIssue; +import com.linkedin.avroutil1.parser.avsc.AvscParseResult; +import com.linkedin.avroutil1.parser.avsc.AvscParser; +import java.io.File; +import java.util.List; +import org.apache.avro.Schema; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class AvscSchemaWriterTest { + + @Test + public void testWritingPrimitiveSchemas() throws Exception { + + //simplest + + testParsingCycle("\"null\""); + testParsingCycle("\"boolean\""); + testParsingCycle("\"int\""); + testParsingCycle("\"long\""); + testParsingCycle("\"float\""); + testParsingCycle("\"double\""); + testParsingCycle("\"string\""); + testParsingCycle("\"bytes\""); + + //"fatter" + + testParsingCycle("{\"type\": \"null\"}"); + testParsingCycle("{\"type\": \"boolean\"}"); + testParsingCycle("{\"type\": \"int\"}"); + testParsingCycle("{\"type\": \"long\"}"); + testParsingCycle("{\"type\": \"float\"}"); + testParsingCycle("{\"type\": \"double\"}"); + testParsingCycle("{\"type\": \"string\"}"); + testParsingCycle("{\"type\": \"bytes\"}"); + + //with props and logical types + + testParsingCycle("{\"type\": \"null\", \"prop1\": null}"); + testParsingCycle("{\"type\": \"boolean\", \"prop2\": 45.6}"); + testParsingCycle("{\"type\": \"int\", \"logicalType\": \"time-millis\"}"); + testParsingCycle("{\"type\": \"long\", \"logicalType\": \"timestamp-micros\"}"); + testParsingCycle("{\"type\": \"float\", \"prop3\": \"string value\"}"); + testParsingCycle("{\"type\": \"double\", \"prop4\": [1, null, false, \"str\"]}"); + testParsingCycle("{\"type\": \"string\", \"avro.java.string\": \"String\"}"); + testParsingCycle("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2}"); + } + + @Test + public void testWritingContainerSchemas() throws Exception { + testParsingCycle("{\"type\": \"array\", \"items\": \"string\"}"); + testParsingCycle("{\"type\": \"map\", \"values\": \"long\"}"); + testParsingCycle("[\"int\", \"null\"]"); + testParsingCycle("[{\"type\": \"array\", \"items\": {\"type\": \"map\", \"values\": \"long\"}}, {\"type\": \"map\", \"values\": \"bytes\"}, \"string\"]"); + } + + @Test + public void testWritingSimpleNamedSchemas() throws Exception { + testParsingCycle("{\"type\": \"enum\", \"namespace\": \"ns\", \"name\": \"MyEnum\", \"doc\": \"doc\", \"symbols\": [\"A\", \"B\"], \"default\": \"B\"}"); + testParsingCycle("{\"type\": \"fixed\", \"name\": \"MyFixed\", \"size\": 7, \"aliases\": [\"some.OtherName\"]}"); + testParsingCycle( + "{\n" + + " \"type\": \"record\",\n" + + " \"name\": \"SimpleRecord\",\n" + + " \"fields\": [\n" + + " {\"name\": \"f1\", \"type\": \"int\", \"default\": 42}\n" + + " ]\n" + + "}" + ); + } + + /** + * given an avsc, parses and re-prints it using our code + * and compares the result to vanilla avro. + * @param avsc + */ + private void testParsingCycle(String avsc) { + Schema reference = Schema.parse(avsc); + + AvscParser parser = new AvscParser(); + AvscParseResult parseResults = parser.parse(avsc); + List parseIssues = parseResults.getIssues(); + Assert.assertTrue(parseIssues == null || parseIssues.isEmpty(), "parse issues: " + parseIssues); + AvroSchema parsed = parseResults.getTopLevelSchema(); + Assert.assertNotNull(parsed); + AvscSchemaWriter writer = new AvscSchemaWriter(); + AvscFile file = writer.writeSingle(parsed); + Assert.assertNotNull(file); + if (HelperConsts.NAMED_TYPES.contains(reference.getType())){ + //for named schemas the file path is determined by schema name + Assert.assertNotNull(file.getPathFromRoot()); + String expectedFileName = reference.getFullName().replaceAll("\\.", File.separator) + ".avsc"; + Assert.assertEquals(file.getPathFromRoot().toString(), expectedFileName); + } else { + //cant auto-name files containing other schema types + Assert.assertNull(file.getPathFromRoot()); + } + String avsc2 = file.getContents(); + + Schema afterCycle = Schema.parse(avsc2); + + Assert.assertEquals(reference, afterCycle); + } +}