Skip to content

Commit

Permalink
[import] invalid CSV header row is user error (#30376)
Browse files Browse the repository at this point in the history
correctly classify errors while parsing CSV header rows.

tested against local backend with header row that has invalid utf-8 '\xc3\x28' in the header row. Before this change, the import gets stuck parsing. After the change, we throw the error so the user can see it.

GitOrigin-RevId: 2846ec9c681ce7a179475429df751ba657e012df
  • Loading branch information
ldanilek authored and Convex, Inc. committed Oct 5, 2024
1 parent 8d7be63 commit 1d6add9
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions crates/application/src/snapshot_import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,27 @@ fn map_zip_error(e: ZipError) -> anyhow::Error {
}
}

fn map_csv_error(e: csv_async::Error) -> anyhow::Error {
let pos_line =
|pos: &Option<csv_async::Position>| pos.as_ref().map_or(0, |pos| pos.line() as usize);
match e.kind() {
csv_async::ErrorKind::Utf8 { pos, .. } => {
ImportError::CsvInvalidRow(pos_line(pos), e).into()
},
csv_async::ErrorKind::UnequalLengths { pos, .. } => {
ImportError::CsvRowMissingFields(pos_line(pos)).into()
},
// IO and Seek are errors from the underlying stream.
csv_async::ErrorKind::Io(_)
| csv_async::ErrorKind::Seek
// We're not using serde for CSV parsing, so these errors are unexpected
| csv_async::ErrorKind::Serialize(_)
| csv_async::ErrorKind::Deserialize { .. }
=> e.into(),
_ => e.into(),
}
}

/// Parse and stream units from the imported file, starting with a NewTable
/// for each table and then Objects for each object to import into the table.
/// stream_body returns the file as streamed bytes. stream_body() can be called
Expand Down Expand Up @@ -930,7 +951,7 @@ async fn parse_objects<'a, Fut>(
anyhow::bail!(ImportError::CsvMissingHeaders);
}
let field_names = {
let headers = reader.headers().await?;
let headers = reader.headers().await.map_err(map_csv_error)?;
headers
.iter()
.map(|s| {
Expand All @@ -945,7 +966,7 @@ async fn parse_objects<'a, Fut>(
while let Some((i, row_r)) = enumerate_rows.next().await {
let lineno = i + 1;
let parsed_row = row_r
.map_err(|e| ImportError::CsvInvalidRow(lineno, e))?
.map_err(map_csv_error)?
.iter()
.map(parse_csv_cell)
.collect::<Vec<JsonValue>>();
Expand Down

0 comments on commit 1d6add9

Please sign in to comment.