Skip to content

Commit

Permalink
Support era and string numbers (#17)
Browse files Browse the repository at this point in the history
* Support Era and string Numbers

* Update Readme

* Modify exception formatting

* Added infinity support for double type

* Merge date and datetime formatters into one

* Added onValidStringNumber

* Support base56 decode

* Added test to verify Infinity and NaN is not supported for avro int type
  • Loading branch information
suhomud authored Dec 12, 2022
1 parent 95f4099 commit a003e28
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 49 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ The original [json-avro-converter](https://github.com/allegro/json-avro-converte
## CHANGELOG

| Version | Description |
| ------- | ----------- |
|---------| ----------- |
| 1.0.2 | Support era and string numbers |
| 1.0.1 | Fix publication to JitPack. |
| 1.0.0 | Publish to JitPack. |
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,27 @@
import org.apache.avro.AvroTypeException;

class AvroTypeExceptions {

static AvroTypeException enumException(Deque<String> fieldPath, String expectedSymbols, Object offendingValue) {
return new AvroTypeException(new StringBuilder()
.append("Field ")
.append(PathsPrinter.print(fieldPath))
.append(" is expected to be of enum type and be one of ")
.append(expectedSymbols)
.append(", but it is: ")
.append(offendingValue)
.toString());
return new AvroTypeException(
String.format("Field %s is expected to be of enum type and be one of %s, but it is: %s",
PathsPrinter.print(fieldPath), expectedSymbols, offendingValue));
}

static AvroTypeException unionException(String fieldName, String expectedTypes, Deque<String> offendingPath, Object offendingValue) {
return new AvroTypeException(new StringBuilder()
.append("Could not evaluate union, field ")
.append(fieldName)
.append(" is expected to be one of these: ")
.append(expectedTypes)
.append(". If this is a complex type, check if offending field (path: ")
.append(PathsPrinter.print(offendingPath))
.append(") adheres to schema: ")
.append(offendingValue)
.toString());
return new AvroTypeException(
String.format("Could not evaluate union, field %s is expected to be one of these: %s. "
+ "If this is a complex type, check if offending field (path: %s) adheres to schema: %s",
fieldName, expectedTypes, PathsPrinter.print(offendingPath), offendingValue));
}

static AvroTypeException typeException(Deque<String> fieldPath, String expectedType, Object offendingValue) {
return new AvroTypeException(new StringBuilder()
.append("Field ")
.append(PathsPrinter.print(fieldPath))
.append(" is expected to be type: ")
.append(expectedType)
.append(", but it is: ")
.append(offendingValue)
.toString());
return new AvroTypeException(
String.format("Field %s is expected to be type: %s, but it is: %s", PathsPrinter.print(fieldPath), expectedType, offendingValue));
}

static AvroTypeException numberFormatException(Deque<String> fieldPath, Object offendingValue) {
return new AvroTypeException(
String.format("Field %s is expected to be Number format, but it is: %s", PathsPrinter.print(fieldPath), offendingValue));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static tech.allegro.schema.json2avro.converter.AdditionalPropertyField.DEFAULT_AVRO_FIELD_NAME;
import static tech.allegro.schema.json2avro.converter.AdditionalPropertyField.DEFAULT_JSON_FIELD_NAMES;
import static tech.allegro.schema.json2avro.converter.AvroTypeExceptions.enumException;
import static tech.allegro.schema.json2avro.converter.AvroTypeExceptions.numberFormatException;
import static tech.allegro.schema.json2avro.converter.AvroTypeExceptions.typeException;
import static tech.allegro.schema.json2avro.converter.AvroTypeExceptions.unionException;

Expand All @@ -31,6 +32,7 @@
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecordBuilder;
import tech.allegro.schema.json2avro.converter.util.DateTimeUtils;
import tech.allegro.schema.json2avro.converter.util.StringUtil;

public class JsonGenericRecordReader {

Expand Down Expand Up @@ -201,7 +203,9 @@ private Object read(Schema.Field field, Schema schema, Object value, Deque<Strin
if (logicalType != null && logicalType.equals(LogicalTypes.date())) {
result = onValidType(value, String.class, path, silently, DateTimeUtils::getEpochDay);
} else {
result = onValidNumber(value, path, silently, Number::intValue);
result = value instanceof String valueString? // implicit cast to String
onValidStringNumber(valueString, path, silently, Integer::parseInt) :
onValidNumber(value, path, silently, Number::intValue);
}
break;
case LONG:
Expand All @@ -213,14 +217,20 @@ private Object read(Schema.Field field, Schema schema, Object value, Deque<Strin
} else if (logicalType != null && logicalType.equals(LogicalTypes.timeMicros())) {
result = onValidType(value, String.class, path, silently, DateTimeUtils::getMicroSeconds);
} else {
result = onValidNumber(value, path, silently, Number::longValue);
result = value instanceof String stringValue ? // implicit cast to String
onValidStringNumber(stringValue, path, silently, Long::parseLong) :
onValidNumber(value, path, silently, Number::longValue);
}
break;
case FLOAT:
result = onValidNumber(value, path, silently, Number::floatValue);
result = value instanceof String stringValue ? // implicit cast to String
onValidStringNumber(stringValue, path, silently, Float::parseFloat) :
onValidNumber(value, path, silently, Number::floatValue);
break;
case DOUBLE:
result = onValidNumber(value, path, silently, Number::doubleValue);
result = value instanceof String stringValue ? // implicit cast to String
onValidStringNumber(stringValue, path, silently, Double::parseDouble) :
onValidNumber(value, path, silently, Number::doubleValue);
break;
case BOOLEAN:
result = onValidType(value, Boolean.class, path, silently, bool -> bool);
Expand All @@ -236,7 +246,7 @@ private Object read(Schema.Field field, Schema schema, Object value, Deque<Strin
}
break;
case BYTES:
result = onValidType(value, String.class, path, silently, string -> bytesForString(string));
result = onValidType(value, String.class, path, silently, this::bytesForString);
break;
case NULL:
result = value == null ? value : INCOMPATIBLE;
Expand Down Expand Up @@ -307,9 +317,17 @@ private Object ensureEnum(Schema schema, Object value, Deque<String> path) {
}

private ByteBuffer bytesForString(String string) {
if (StringUtil.isBase64(string)) {
return ByteBuffer.wrap(StringUtil.decodeBase64(string).getBytes(StandardCharsets.UTF_8));
}
return ByteBuffer.wrap(string.getBytes(StandardCharsets.UTF_8));
}

/**
* converted value based on passed function
*
* @throws AvroTypeException if type class != value class
*/
@SuppressWarnings("unchecked")
public <T> Object onValidType(Object value, Class<T> type, Deque<String> path, boolean silently, Function<T, Object> function)
throws AvroTypeException {
Expand All @@ -318,15 +336,32 @@ public <T> Object onValidType(Object value, Class<T> type, Deque<String> path, b
Object result = function.apply((T) value);
return result == null ? INCOMPATIBLE : result;
} else {
if (silently) {
return INCOMPATIBLE;
} else {
throw typeException(path, type.getTypeName(), value);
}
return processException(silently, typeException(path, type.getTypeName(), value));
}
}

/**
* tries to convert string value numbers
*
* @throws AvroTypeException if value is not numeric
*/
public Object onValidStringNumber(String value, Deque<String> path, boolean silently, Function<String, Object> function)
throws AvroTypeException {
try {
return onValidType(value, String.class, path, silently, function);
} catch (NumberFormatException nfe) {
return processException(silently, numberFormatException(path, value));
}
}

public Object onValidNumber(Object value, Deque<String> path, boolean silently, Function<Number, Object> function) {
return onValidType(value, Number.class, path, silently, function);
}

private Object processException(boolean silently, AvroTypeException ex) throws AvroTypeException {
if (silently) {
return INCOMPATIBLE;
} else {throw ex;}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@

public class DateTimeUtils {

private static final DateTimeFormatter formatter =
DateTimeFormatter.ofPattern("[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d]" +
"[[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X]]]");
private static final DateTimeFormatter timeFormatter =
DateTimeFormatter.ofPattern("HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS]]");
private static final DateTimeFormatter DATE_TIME_FORMATTER =
DateTimeFormatter.ofPattern("[yyyy][yy]['-']['/']['.'][' '][MMM][MM][M]['-']['/']['.'][' '][dd][d][[' '][G]][[' ']['T']HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X][[' '][G]]]]");
private static final DateTimeFormatter TIME_FORMATTER =
DateTimeFormatter.ofPattern("HH:mm[':'ss[.][SSSSSS][SSSSS][SSSS][SSS][' '][z][zzz][Z][O][x][XXX][XX][X]]");

/**
* Parse the Json date-time logical type to an Avro long value.
* @return the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC.
*/
public static Long getEpochMicros(String jsonDateTime) {
jsonDateTime = cleanLineBreaks(jsonDateTime);
Instant instant = null;
if (jsonDateTime.matches("-?\\d+")) {
return Long.valueOf(jsonDateTime);
}
try {
ZonedDateTime zdt = ZonedDateTime.parse(jsonDateTime, formatter);
ZonedDateTime zdt = ZonedDateTime.parse(jsonDateTime, DATE_TIME_FORMATTER);
instant = zdt.toInstant();
} catch (DateTimeParseException e) {
try {
LocalDateTime dt = LocalDateTime.parse(jsonDateTime, formatter);
LocalDateTime dt = LocalDateTime.parse(jsonDateTime, DATE_TIME_FORMATTER);
instant = dt.toInstant(ZoneOffset.UTC);
} catch (DateTimeParseException ex) {
// no logging since it may generate too much noise
Expand All @@ -45,9 +45,10 @@ public static Long getEpochMicros(String jsonDateTime) {
* @return the number of days from the unix epoch, 1 January 1970 (ISO calendar).
*/
public static Integer getEpochDay(String jsonDate) {
jsonDate = cleanLineBreaks(jsonDate);
Integer epochDay = null;
try {
LocalDate date = LocalDate.parse(jsonDate, formatter);
LocalDate date = LocalDate.parse(jsonDate, DATE_TIME_FORMATTER);
epochDay = (int) date.toEpochDay();
} catch (DateTimeParseException e) {
// no logging since it may generate too much noise
Expand All @@ -60,21 +61,26 @@ public static Integer getEpochDay(String jsonDate) {
* @return the number of microseconds after midnight, 00:00:00.000000.
*/
public static Long getMicroSeconds(String jsonTime) {
jsonTime = cleanLineBreaks(jsonTime);
Long nanoOfDay = null;
if (jsonTime.matches("-?\\d+")) {
return Long.valueOf(jsonTime);
}
try {
LocalTime time = LocalTime.parse(jsonTime, timeFormatter);
LocalTime time = LocalTime.parse(jsonTime, TIME_FORMATTER);
nanoOfDay = time.toNanoOfDay();
} catch (DateTimeParseException e) {
try {
LocalTime time = LocalTime.parse(jsonTime, formatter);
LocalTime time = LocalTime.parse(jsonTime, DATE_TIME_FORMATTER);
nanoOfDay = time.toNanoOfDay();
} catch (DateTimeParseException ex) {
// no logging since it may generate too much noise
}
}
return nanoOfDay == null ? null : nanoOfDay / 1000;
}

private static String cleanLineBreaks(String jsonDateTime) {
return jsonDateTime.replace("\n", "").replace("\r", "");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package tech.allegro.schema.json2avro.converter.util;

import java.nio.charset.StandardCharsets;
import java.util.Base64;

public class StringUtil {

public static boolean isBase64(String value) {
return value != null && value.matches("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$");
}

public static String decodeBase64(String string) {
if (isBase64(string)) {
byte[] decoded = Base64.getDecoder().decode(string);
return new String(decoded, StandardCharsets.UTF_8);
}
return string;
}
}
Loading

0 comments on commit a003e28

Please sign in to comment.