Skip to content

Commit

Permalink
[WIP] Avro 1.9 perf regression - fix MODEL$ and add some tests (#138)
Browse files Browse the repository at this point in the history
Avro 1.9 perf regression - fix MODEL$ and add some tests

Co-authored-by: Navina Ramesh <[email protected]>
  • Loading branch information
Navina Ramesh and Navina Ramesh authored Apr 14, 2021
1 parent e0e3a01 commit 8e15d64
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public class CodeTransformations {
private static final Pattern IMPORT_CODECS_PATTERN = Pattern.compile("import org\\.apache\\.avro\\.message\\.([\\w.]+);");
private static final Pattern AVROGENERATED_ANNOTATION_PATTERN = Pattern.compile(Pattern.quote("@org.apache.avro.specific.AvroGenerated"));
private static final Pattern MODEL_DECL_PATTERN = Pattern.compile(Pattern.quote("private static SpecificData MODEL$ = new SpecificData();"));
private static final Pattern MODEL_ADD_TYPE_CONVERSION_PATTERN = Pattern.compile("MODEL\\$\\.addLogicalTypeConversion\\(.*\\);");
private static final String MODEL_DECL_REPLACEMENT = "private static org.apache.avro.specific.SpecificData MODEL$ = org.apache.avro.specific.SpecificData.get();";
private static final Pattern GET_SPECIFICDATA_METHOD_PATTERN = Pattern.compile("public\\s*org\\.apache\\.avro\\.specific\\.SpecificData\\s*getSpecificData\\s*\\(\\s*\\)\\s*\\{\\s*return\\s*MODEL\\$\\s*;\\s*}");
private static final Pattern WRITER_DOLLAR_DECL = Pattern.compile("WRITER\\$\\s*=\\s*([^;]+);");
private static final String WRITER_DOLLAR_DECL_REPLACEMENT = Matcher.quoteReplacement("WRITER$ = new org.apache.avro.specific.SpecificDatumWriter<>(SCHEMA$);");
Expand Down Expand Up @@ -76,6 +78,10 @@ public class CodeTransformations {

private static final int MAX_STRING_LITERAL_SIZE = 65000; //just under 64k

private CodeTransformations() {

}

/**
* applies all transformations to a java class generated by avro
* @param code raw java code for a class generated by avro
Expand Down Expand Up @@ -548,6 +554,51 @@ public static String removeAvroGeneratedAnnotation(String code, AvroVersion minS
return AVROGENERATED_ANNOTATION_PATTERN.matcher(code).replaceAll("");
}

/**
* MODEL$ was introduced as a static field in the generated class, starting 1.8, which indicates the conversions
* applicable for logical types in a specific record. An example of generated code looks like:
* <pre>
* {@code
* private static SpecificData MODEL$ = new SpecificData();
* static {
* MODEL$.addLogicalTypeConversion(new org.apache.avro.data.TimeConversions.TimestampMillisConversion());
* }
* }
* </pre>
* However, starting 1.9, avro uses reflection to look up this
* field, which will throw a {@link ReflectiveOperationException} exception for records generated from older version.
* This results in performance degradation.
* Moreover, older avro runtime will not have classes used in these conversions.
*
* This methods avoids the exception by introducing this field in older versions of the generated record.
*
* @param code avro generated code that may or may not have the MODEL$ declaration
* @param minSupportedVersion lowest avro version under which the generated code should work
* @param maxSupportedVersion highest avro version under which the generated code should work
* @return code where MODEL$ exists for avro versions expecting it at runtime (&gt;= 1.9)
*/
public static String pacifyModel$Delcaration(String code, AvroVersion minSupportedVersion,
AvroVersion maxSupportedVersion) {
Matcher match = MODEL_DECL_PATTERN.matcher(code);
if (match.find()) {
if (minSupportedVersion.earlierThan(AvroVersion.AVRO_1_8)) { // minAvro < 1.8
// replace MODEL$ with specificdata.get and remove any static block after that contains any
code = match.replaceAll(Matcher.quoteReplacement(MODEL_DECL_REPLACEMENT));
return MODEL_ADD_TYPE_CONVERSION_PATTERN.matcher(code).replaceAll("");
}
return code;
} else {
if (maxSupportedVersion.equals(AvroVersion.AVRO_1_9) || maxSupportedVersion.laterThan(AvroVersion.AVRO_1_9)) { // maxAvro >= 1.9
// add no-op MODEL$ to the end of the generate schema$ string
int schema$EndPosition = findEndOfSchemaDeclaration(code);
return code.substring(0, schema$EndPosition) + "\n " + MODEL_DECL_REPLACEMENT + "\n " + code.substring(
schema$EndPosition);
}
// do nothing - not intended for use under avro that cares about MODEL$
return code;
}
}

/**
* {@link org.apache.avro.specific.SpecificRecordBase} implements {@link java.io.Externalizable} starting with avro 1.8+
* trying to compile code generated by 1.8+ under older avro will result in compilation errors as the {@link Override}
Expand Down Expand Up @@ -591,8 +642,8 @@ public static String removeAvroGeneratedAnnotation(String code, AvroVersion minS
* @return code where the externalizable support still exists but is compatible with earlier avro at runtime
*/
public static String transformExternalizableSupport(String code, AvroVersion minSupportedVersion, AvroVersion maxSupportedVersion) {
//strip out MODEL$ completely
String codeWithoutModel = MODEL_DECL_PATTERN.matcher(code).replaceAll("");
//handle MODEL$ based on supported versions
String codeWithoutModel = pacifyModel$Delcaration(code, minSupportedVersion, maxSupportedVersion);
//then strip out the getSpecificData() method that returns MODEL$ under avro 1.9+
String codeWithoutGetSpecificData = GET_SPECIFICDATA_METHOD_PATTERN.matcher(codeWithoutModel).replaceAll("");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@ public void testTransformAvro110Externalizable() throws Exception {
Assert.assertNotNull(transformedClass);
}

@Test
public void testPacifyModel$Declaration() throws Exception {
String avsc = TestUtil.load("RecordWithLogicalTypes.avsc");
Schema schema = AvroCompatibilityHelper.parse(avsc);
String originalCode = runNativeCodegen(schema);

String transformedCode = CodeTransformations.pacifyModel$Delcaration(originalCode, AvroVersion.earliest(), AvroVersion.latest());
Class<?> transformedClass = CompilerUtils.CACHED_COMPILER.loadFromJava(schema.getFullName(), transformedCode);
Assert.assertNotNull(transformedClass);
Assert.assertNotNull(transformedClass.getDeclaredField("MODEL$"));
}

private String runNativeCodegen(Schema schema) throws Exception {
File outputRoot = Files.createTempDirectory(null).toFile();
SpecificCompiler compiler = new SpecificCompiler(schema);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ public void testTransformAvro14RecordWithMultilineDoc() throws Exception {
Assert.assertNotNull(transformedClass);
}

@Test
public void testPacifyModel$Declaration() throws Exception {
String avsc = TestUtil.load("RecordWithLogicalTypes.avsc");
Schema schema = AvroCompatibilityHelper.parse(avsc);
String originalCode = runNativeCodegen(schema);

String transformedCode = CodeTransformations.pacifyModel$Delcaration(originalCode, AvroVersion.earliest(), AvroVersion.latest());
Class<?> transformedClass = CompilerUtils.CACHED_COMPILER.loadFromJava(schema.getFullName(), transformedCode);
Assert.assertNotNull(transformedClass);
Assert.assertNotNull(transformedClass.getDeclaredField("MODEL$"));
}

private String runNativeCodegen(Schema schema) throws Exception {
Method compilerCompileToDestinationMethod = SpecificCompiler.class.getDeclaredMethod("compileToDestination", File.class, File.class);
compilerCompileToDestinationMethod.setAccessible(true); //private
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,39 @@ public void testRecordGeneratedUnderAvro16HasGetClassSchema() throws Exception {
public void testRecordGeneratedUnderAvro17HasGetClassSchema() throws Exception {
Assert.assertEquals(under17.SimpleRecord.getClassSchema(), under17.SimpleRecord.SCHEMA$);
}

@Test
public void testRecordGeneratedUnderAvro14HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under14.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedUnderAvro15HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under15.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedUnderAvro16HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under16.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedUnderAvro17HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under17.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedUnderAvro18HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under18.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedByAvro19HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under19.SimpleRecord.class.getDeclaredField("MODEL$"));
}

@Test
public void testRecordGeneratedByAvro110HasModel$Declaration() throws NoSuchFieldException {
Assert.assertNotNull(under110.SimpleRecord.class.getDeclaredField("MODEL$"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"type": "record",
"namespace": "com.acme",
"name": "RecordWithLogicalTypes",
"fields": [
{
"name": "uuidFiled",
"type": {
"type": "string",
"logicalType": "uuid"
}
},
{
"name": "timestampField",
"type": {
"type": "long",
"logicalType": "timestamp-millis"
}
}
]
}

0 comments on commit 8e15d64

Please sign in to comment.