Skip to content

Commit

Permalink
[AVRO-XXXX][C++] Add big decimal support and update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
glywk committed Sep 4, 2024
1 parent 8979c37 commit ad54e58
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 5 deletions.
8 changes: 5 additions & 3 deletions doc/content/en/docs/++version++/Specification/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,8 @@ A logical type is always serialized using its underlying Avro type so that value
Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type.

### Decimal

#### Fixed precision
The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10<sup>-scale</sup>_.

A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute.
Expand All @@ -809,19 +811,19 @@ Scale must be zero or a positive integer less than or equal to the precision.

For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match.

**alternative**
#### Scalable precision

As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_.

_Currently only available in Java and Rust_.
_Currently only available in C++, Java and Rust_.

```json
{
"type": "bytes",
"logicalType": "big-decimal"
}
```
Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility.
Here, bytes array contains two serialized properties. First part is an Avro byte arrays which is the two's-complement representation of the unscaled integer value in big-endian byte order. The second part is the scale property stored as an Avro integer. Scale must be zero or a positive integer less than or equal to the precision. Value itself needs more bytes than preceding `decimal` type, but it allows more flexibility.

### UUID

Expand Down
6 changes: 5 additions & 1 deletion lang/c++/impl/Compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,11 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) {
}

LogicalType::Type t = LogicalType::NONE;
if (typeField == "date")
if (typeField == "big-decimal"
&& !containsField(m, "precision")
&& !containsField(m, "scale"))
t = LogicalType::BIG_DECIMAL;
else if (typeField == "date")
t = LogicalType::DATE;
else if (typeField == "time-millis")
t = LogicalType::TIME_MILLIS;
Expand Down
3 changes: 3 additions & 0 deletions lang/c++/impl/LogicalType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ void LogicalType::setScale(int32_t scale) {
void LogicalType::printJson(std::ostream &os) const {
switch (type_) {
case LogicalType::NONE: break;
case LogicalType::BIG_DECIMAL:
os << R"("logicalType": "big-decimal")";
break;
case LogicalType::DECIMAL:
os << R"("logicalType": "decimal")";
os << ", \"precision\": " << precision_;
Expand Down
7 changes: 7 additions & 0 deletions lang/c++/impl/Node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ void Node::setLogicalType(LogicalType logicalType) {
// Check that the logical type is applicable to the node type.
switch (logicalType.type()) {
case LogicalType::NONE: break;
case LogicalType::BIG_DECIMAL: {
if (type_ != AVRO_BYTES) {
throw Exception("BIG_DECIMAL logical type can annotate "
"only BYTES type");
}
break;
}
case LogicalType::DECIMAL: {
if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
throw Exception("DECIMAL logical type can annotate "
Expand Down
1 change: 1 addition & 0 deletions lang/c++/include/avro/LogicalType.hh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class AVRO_DECL LogicalType {
public:
enum Type {
NONE,
BIG_DECIMAL,
DECIMAL,
DATE,
TIME_MILLIS,
Expand Down
20 changes: 19 additions & 1 deletion lang/c++/test/SchemaTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ const char *roundTripSchemas[] = {
R"({"type":"fixed","name":"Test","size":1})",

// Logical types
R"({"type":"bytes","logicalType":"big-decimal"})",
R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})",
R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})",
R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})",
Expand Down Expand Up @@ -240,6 +241,7 @@ const char *roundTripSchemas[] = {

const char *malformedLogicalTypes[] = {
// Wrong base type.
R"({"type":"long","logicalType": "big-decimal"})",
R"({"type":"long","logicalType": "decimal","precision": 10})",
R"({"type":"string","logicalType":"date"})",
R"({"type":"string","logicalType":"time-millis"})",
Expand All @@ -258,7 +260,12 @@ const char *malformedLogicalTypes[] = {
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
// Scale is larger than precision.
R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"};
R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})",
// Precision is not supported by the big-decimal logical type
// and scale is integrated in bytes.
R"({"type":"bytes","logicalType": "big-decimal","precision": 9})",
R"({"type":"bytes","logicalType": "big-decimal","scale": 2})",
R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale": 2})"};
const char *schemasToCompact[] = {
// Schema without any whitespace
R"({"type":"record","name":"Test","fields":[]})",
Expand Down Expand Up @@ -335,6 +342,10 @@ static void testCompactSchemas() {
}

static void testLogicalTypes() {
const char *bytesBigDecimalType = "{\n\
\"type\": \"bytes\",\n\
\"logicalType\": \"big-decimal\"\n\
}";
const char *bytesDecimalType = "{\n\
\"type\": \"bytes\",\n\
\"logicalType\": \"decimal\",\n\
Expand Down Expand Up @@ -390,6 +401,13 @@ static void testLogicalTypes() {
const char *unionType = "[\n\
{\"type\":\"string\", \"logicalType\":\"uuid\"},\"null\"\n\
]";
{
BOOST_TEST_CHECKPOINT(bytesBigDecimalType);
ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType);
BOOST_CHECK(schema.root()->type() == AVRO_BYTES);
LogicalType logicalType = schema.root()->logicalType();
BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL);
}
{
BOOST_TEST_CHECKPOINT(bytesDecimalType);
ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);
Expand Down

0 comments on commit ad54e58

Please sign in to comment.