Skip to content

Commit

Permalink
Ignore non-printable characters (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
samplackett authored Dec 19, 2024
1 parent 79aa218 commit 76b8585
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 9 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "ffc-pay-etl-framework",
"version": "1.1.3",
"version": "1.1.4",
"publisher": "Defra",
"main": "dist/cjs/index.js",
"private": false,
Expand Down
11 changes: 9 additions & 2 deletions src/loaders/csvloader.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
const fs = require("fs")
const { Transform } = require("stream")
const { parse } = require("csv-parse")
const { stdout, stderr } = require("process")

/**
*
Expand All @@ -24,7 +23,15 @@ function CSVLoader(options){
transform(chunk, _, callback){
chunk["_columns"] = options.columns
chunk["_linecount"] = lineCount
lineCount +=1
lineCount += 1

// remove non-printable characters
options.columns.forEach((_column, index) => {
if (chunk[index]) {
chunk[index] = chunk[index].replace(/[\x00-\x1F\x7F-\x9F]/g, '')
}
})

callback(null, chunk)
}
})
Expand Down
47 changes: 41 additions & 6 deletions test/loaders/csvLoader.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
const { CSVLoader } = require("../../src/loaders/csvloader")
const { Readable, PassThrough } = require("node:stream")
const fs = require("fs")
const { expect } = require("@jest/globals")
const fs = require('fs')
const { PassThrough } = require('stream')
const { CSVLoader } = require('../../src/loaders/csvloader')

jest.mock('fs')

Expand All @@ -14,7 +13,7 @@ describe('csvLoader tests', () => {
]
let lineCount = 1
const testPath = "someRandomPath"
fs.__setMockFileContent(testPath, testData)
fs.__setMockFileContent(testPath, testData.join(''))
const uut = CSVLoader({ path: testPath, columns: ["a","b","c"]})
uut
.pump(uut)
Expand All @@ -29,8 +28,8 @@ describe('csvLoader tests', () => {
}
}))
})

it('should count csv file lines', (done) => {
jest.setTimeout(10000)
const testData = [
"column1, column2, column3\n",
"1,2,3\n",
Expand All @@ -53,4 +52,40 @@ describe('csvLoader tests', () => {
}
}))
})

it('should remove non-printable characters from CSV data', (done) => {
const testData = [
"column1,column2,column3\n",
"1,\x00\x1F2,3\n",
"4,5,\x7F\x9F6\n"
]
const expectedData = [
"column1,column2,column3\n",
"1,2,3\n",
"4,5,6\n"
]
let lineCount = 1
const testPath = "someRandomPath"
fs.__setMockFileContent(testPath, testData.join(''))
const uut = CSVLoader({ path: testPath, columns: ["column1", "column2", "column3"] })
uut
.pump(uut)
.pipe(new PassThrough({
objectMode: true,
transform(chunk, _, callback) {
try {
const received = chunk.join(",")
const expected = expectedData[lineCount].replace(/\n/,"")
expect(received).toEqual(expected)
if (lineCount === expectedData.length - 1) {
done()
}
lineCount += 1
callback(null, chunk)
} catch (error) {
done(error)
}
}
}))
}, 10000)
})

0 comments on commit 76b8585

Please sign in to comment.