Skip to content

Commit

Permalink
[DF] add validation for number of column names imposed to CSV source
Browse files Browse the repository at this point in the history
  • Loading branch information
silverweed committed Dec 20, 2024
1 parent a4a9b0e commit dc4a5f3
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 103 deletions.
3 changes: 2 additions & 1 deletion tree/dataframe/inc/ROOT/RCsvDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ public:
/// Note that the comment character must not be part of the data, e.g. in strings.
char fComment = '\0';
/// Impose column names. This can be used if a header is missing or if the header has unparsable or
/// unwanted column names.
/// unwanted column names. If this list is not empty, it must contain exactly as many elements as
/// the number of columns in the CSV file.
std::vector<std::string> fColumnNames;
/// Specify custom column types, accepts an unordered map with keys being column name, values being type alias
/// ('O' for boolean, 'D' for double, 'L' for Long64_t, 'T' for std::string)
Expand Down
17 changes: 14 additions & 3 deletions tree/dataframe/src/RCsvDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,18 @@ void RCsvDS::RewindToData()

void RCsvDS::FillHeaders(const std::string &line)
{
const auto columns = ParseColumns(line);

if (!fOptions.fColumnNames.empty()) {
if (fOptions.fColumnNames.size() != columns.size()) {
auto msg = std::string("Error: passed ") + std::to_string(fOptions.fColumnNames.size()) +
" column names for a CSV file containing " + std::to_string(columns.size()) + " columns!";
throw std::runtime_error(msg);
}
std::swap(fHeaders, fOptions.fColumnNames);
return;
}

auto columns = ParseColumns(line);
fHeaders.reserve(columns.size());
for (auto &col : columns) {
fHeaders.emplace_back(col);
Expand Down Expand Up @@ -222,6 +228,11 @@ void RCsvDS::FillRecord(const std::string &line, Record_t &record)
void RCsvDS::GenerateHeaders(size_t size)
{
if (!fOptions.fColumnNames.empty()) {
if (fOptions.fColumnNames.size() != size) {
auto msg = std::string("Error: passed ") + std::to_string(fOptions.fColumnNames.size()) +
" column names for a CSV file containing " + std::to_string(size) + " columns!";
throw std::runtime_error(msg);
}
std::swap(fHeaders, fOptions.fColumnNames);
return;
}
Expand Down Expand Up @@ -671,6 +682,6 @@ RDataFrame FromCSV(std::string_view fileName, bool readHeaders, char delimiter,
return rdf;
}

} // ns RDF
} // namespace RDF

} // ns ROOT
} // namespace ROOT
Loading

0 comments on commit dc4a5f3

Please sign in to comment.