From d9fae01a32a6a1725efe678890ade5df32cb4e3b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 19 Nov 2023 22:27:20 -0500 Subject: [PATCH] Fix ragged-CSV auto-pad --- pkg/input/record_reader_csv.go | 39 ++++++++------------ test/cases/io-multi/0045/expout | 3 +- test/cases/io-ragged-non-rfc-csv/0001/expout | 1 - 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/pkg/input/record_reader_csv.go b/pkg/input/record_reader_csv.go index b2c2f18531..e7135e2fce 100644 --- a/pkg/input/record_reader_csv.go +++ b/pkg/input/record_reader_csv.go @@ -249,20 +249,22 @@ func (reader *RecordReaderCSV) getRecordBatch( ) errorChannel <- err return - } else { - i := int64(0) - n := lib.IntMin2(nh, nd) - for i = 0; i < n; i++ { - key := reader.header[i] - value := mlrval.FromDeferredType(csvRecord[i]) - _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) - if err != nil { - errorChannel <- err - return - } + } + + i := int64(0) + n := lib.IntMin2(nh, nd) + for i = 0; i < n; i++ { + key := reader.header[i] + value := mlrval.FromDeferredType(csvRecord[i]) + _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) + if err != nil { + errorChannel <- err + return } - if nh < nd { - // if header shorter than data: use 1-up itoa keys + } + if nh < nd { + // if header shorter than data: use 1-up itoa keys + for i = nh; i < nd; i++ { key := strconv.FormatInt(i+1, 10) value := mlrval.FromDeferredType(csvRecord[i]) _, err := record.PutReferenceMaybeDedupe(key, value, dedupeFieldNames) @@ -271,17 +273,8 @@ func (reader *RecordReaderCSV) getRecordBatch( return } } - if nh > nd { - // if header longer than data: use "" values - for i = nd; i < nh; i++ { - _, err := record.PutReferenceMaybeDedupe(reader.header[i], mlrval.VOID.Copy(), dedupeFieldNames) - if err != nil { - errorChannel <- err - return - } - } - } } + // if nh > nd: leave it short. This is a job for unsparsify. } context.UpdateForInputRecord() diff --git a/test/cases/io-multi/0045/expout b/test/cases/io-multi/0045/expout index d641fc4fec..7fc515260d 100644 --- a/test/cases/io-multi/0045/expout +++ b/test/cases/io-multi/0045/expout @@ -6,8 +6,7 @@ }, { "a": 4, - "b": 5, - "c": "" + "b": 5 }, { "a": 6, diff --git a/test/cases/io-ragged-non-rfc-csv/0001/expout b/test/cases/io-ragged-non-rfc-csv/0001/expout index d83d53baa0..46e0a667a2 100644 --- a/test/cases/io-ragged-non-rfc-csv/0001/expout +++ b/test/cases/io-ragged-non-rfc-csv/0001/expout @@ -4,7 +4,6 @@ c 3 a 4 b 5 -c a 6 b 7