Skip to content

Commit

Permalink
Merge pull request #18918 from ilhan2316/HPCC-32238
Browse files Browse the repository at this point in the history
HPCC-32238 Add large_list data type to Parquet Plugin

Reviewed-By: Jack Del Vecchio
Reviewed-By: Dan S. Camper <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Jul 31, 2024
2 parents cc911f1 + 4aa2f85 commit 5202761
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 6 deletions.
30 changes: 24 additions & 6 deletions plugins/parquet/parquetembed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ std::shared_ptr<arrow::NestedType> ParquetWriter::makeChildRecord(const RtlField
const RtlFieldInfo childFieldInfo = RtlFieldInfo("", "", child);
std::vector<std::shared_ptr<arrow::Field>> childField;
reportIfFailure(fieldToNode(&childFieldInfo, childField));
return std::make_shared<arrow::ListType>(childField[0]);
return std::make_shared<arrow::LargeListType>(childField[0]);
}
}

Expand Down Expand Up @@ -845,8 +845,8 @@ void ParquetWriter::beginSet(const RtlFieldInfo *field)
arrow::FieldPath match = getNestedFieldBuilder(field, childBuilder);
fieldBuilderStack.push_back(std::make_shared<ArrayBuilderTracker>(field, childBuilder, CPNTSet, std::move(match)));

arrow::ListBuilder *listBuilder = static_cast<arrow::ListBuilder *>(childBuilder);
reportIfFailure(listBuilder->Append());
arrow::LargeListBuilder *largeListBuilder = static_cast<arrow::LargeListBuilder *>(childBuilder);
reportIfFailure(largeListBuilder->Append());
}

/**
Expand Down Expand Up @@ -946,7 +946,7 @@ arrow::ArrayBuilder *ParquetWriter::getFieldBuilder(const RtlFieldInfo *field)
return recordBatchBuilder->GetField(schema->GetFieldIndex(fieldName.str()));
}
else if (fieldBuilderStack.back()->nodeType == CPNTSet)
return static_cast<arrow::ListBuilder *>(fieldBuilderStack.back()->structPtr)->value_builder();
return static_cast<arrow::LargeListBuilder *>(fieldBuilderStack.back()->structPtr)->value_builder();
else
return fieldBuilderStack.back()->structPtr->child(fieldBuilderStack.back()->childrenProcessed++);
}
Expand Down Expand Up @@ -1463,6 +1463,12 @@ void ParquetRowBuilder::processBeginSet(const RtlFieldInfo *field, bool &isAll)
newPathNode.childCount = arrayVisitor->listArr->value_slice(currentRow)->length();
pathStack.push_back(newPathNode);
}
else if (arrayVisitor->type == LargeListType)
{
ParquetColumnTracker newPathNode(field, arrayVisitor->largeListArr, CPNTSet);
newPathNode.childCount = arrayVisitor->largeListArr->value_slice(currentRow)->length();
pathStack.push_back(newPathNode);
}
else
{
failx("Error reading nested set with name %s.", field->name);
Expand Down Expand Up @@ -1585,8 +1591,20 @@ void ParquetRowBuilder::nextFromStruct(const RtlFieldInfo *field)
}
else if (pathStack.back().nodeType == CPNTSet)
{
auto child = arrayVisitor->listArr->value_slice(currentRow);
reportIfFailure(child->Accept(arrayVisitor.get()));
if (arrayVisitor->type == ListType)
{
auto child = arrayVisitor->listArr->value_slice(currentRow);
reportIfFailure(child->Accept(arrayVisitor.get()));
}
else if (arrayVisitor->type == LargeListType)
{
auto child = arrayVisitor->largeListArr->value_slice(currentRow);
reportIfFailure(child->Accept(arrayVisitor.get()));
}
else
{
failx("Unexpected type in CPNTSet: neither ListType nor LargeListType");
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions plugins/parquet/parquetembed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ enum ParquetArrayType
LargeBinaryType,
DecimalType,
ListType,
LargeListType,
StructType,
RealType
};
Expand Down Expand Up @@ -314,6 +315,12 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor
type = ListType;
return arrow::Status::OK();
}
arrow::Status Visit(const arrow::LargeListArray &array)
{
largeListArr = &array;
type = LargeListType;
return arrow::Status::OK();
}
arrow::Status Visit(const arrow::StructArray &array)
{
structArr = &array;
Expand Down Expand Up @@ -348,6 +355,7 @@ class ParquetArrayVisitor : public arrow::ArrayVisitor
const arrow::Decimal128Array *decArr = nullptr;
const arrow::Decimal256Array *largeDecArr = nullptr;
const arrow::ListArray *listArr = nullptr;
const arrow::LargeListArray *largeListArr = nullptr;
const arrow::StructArray *structArr = nullptr;
};

Expand Down

0 comments on commit 5202761

Please sign in to comment.