From 6dc659c7ed9635ceba306d89fea40b5e4faa1cdf Mon Sep 17 00:00:00 2001 From: Phil Lee Date: Wed, 6 Nov 2024 16:40:41 +0000 Subject: [PATCH] scrub illegal characters from CSV upload --- app/models/eligible_fe_providers_importer.rb | 4 ++-- lib/csv_importer/base.rb | 3 ++- .../files/eligible_fe_providers_illegal_encoding.csv | 11 +++++++++++ spec/models/eligible_fe_providers_importer_spec.rb | 10 ++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 spec/fixtures/files/eligible_fe_providers_illegal_encoding.csv diff --git a/app/models/eligible_fe_providers_importer.rb b/app/models/eligible_fe_providers_importer.rb index b710a4a3bb..816599b660 100644 --- a/app/models/eligible_fe_providers_importer.rb +++ b/app/models/eligible_fe_providers_importer.rb @@ -31,8 +31,8 @@ def delete_all_scope def row_to_hash(row) { ukprn: row.fetch("ukprn"), - max_award_amount: row.fetch("max_award_amount").gsub(/£|,/, ""), - lower_award_amount: row.fetch("lower_award_amount").gsub(/£|,/, ""), + max_award_amount: row.fetch("max_award_amount").gsub(/£|,|�/, ""), + lower_award_amount: row.fetch("lower_award_amount").gsub(/£|,|�/, ""), primary_key_contact_email_address: row.fetch("primary_key_contact_email_address"), academic_year: } diff --git a/lib/csv_importer/base.rb b/lib/csv_importer/base.rb index 95621a1d81..0fd894f71c 100644 --- a/lib/csv_importer/base.rb +++ b/lib/csv_importer/base.rb @@ -70,7 +70,8 @@ def parse_csv_file(file) errors.append("Select a file") nil else - CSV.read(file.to_io, headers: parse_headers, encoding: "BOM|UTF-8") + string = File.open(file.path, "r", encoding: "BOM|UTF-8").read.scrub + CSV.parse(string, headers: parse_headers) end rescue CSV::MalformedCSVError errors.append("The selected file must be a CSV") diff --git a/spec/fixtures/files/eligible_fe_providers_illegal_encoding.csv b/spec/fixtures/files/eligible_fe_providers_illegal_encoding.csv new file mode 100644 index 0000000000..ca363483bb --- /dev/null +++ b/spec/fixtures/files/eligible_fe_providers_illegal_encoding.csv @@ -0,0 +1,11 @@ +ukprn,max_award_amount,lower_award_amount,primary_key_contact_email_address +10083728,"£4,000","£2,000",asd@example.com +10000055,"£6,000","£3,000",asd@example.com +10004927,"£6,000","£3,000",asd@example.com +10057981,"£6,000","£3,000",asd@example.com +10000330,"£5,000","£2,500",asd@example.com +10082366,"£6,000","£3,000",asd@example.com +10000415,"£6,000","£3,000",asd@example.com +10000528,"£6,000","£3,000",asd@example.com +10000533,"£6,000","£3,000",asd@example.com +10000536,"£6,000","£3,000",asd@example.com diff --git a/spec/models/eligible_fe_providers_importer_spec.rb b/spec/models/eligible_fe_providers_importer_spec.rb index deb73f54e0..0e81deefec 100644 --- a/spec/models/eligible_fe_providers_importer_spec.rb +++ b/spec/models/eligible_fe_providers_importer_spec.rb @@ -143,5 +143,15 @@ def to_row(hash) end end end + + context "when file has illegal encoding" do + let(:file) { File.open(file_fixture("eligible_fe_providers_illegal_encoding.csv")) } + + it "ignores superfluous characters and imports new records" do + expect { subject.run }.to change { EligibleFeProvider.count }.by(10) + expect(EligibleFeProvider.pluck(:max_award_amount).uniq.sort).to eql([4_000, 5_000, 6_000]) + expect(EligibleFeProvider.pluck(:lower_award_amount).uniq.sort).to eql([2_000, 2_500, 3_000]) + end + end end end