Skip to content

Commit

Permalink
Dont pad too much (#43)
Browse files Browse the repository at this point in the history
* Tidy up padding algorithm, used "undefined" rather than "null" for missing elements as that's what the underlying library does

* Don't ask for one more column than we have
  • Loading branch information
alaric-rd authored Sep 4, 2024
1 parent b9c9696 commit 6a8c2b9
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
36 changes: 21 additions & 15 deletions lib/importer/backend.js
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,27 @@ exports.SessionSuggestDataRange = (sid, headerRange, footerRange) => {
}

// Take a slice from an array, but if the array isn't long enough to reach to
// end, append nulls to make the result the correct length.
// end, append undefineds to make the result the correct length.
function sliceAndPad(row, start, end) {
// Slice row; but this may result in fewer elements than we want if the row
// wasn't that long to begin with
const sliced = row.slice(start, end);
const wantedLength = end-start;

if (sliced.length < (end-start)) {
let padding = new Array(end-sliced.length);
padding.fill(null);
return sliced.concat(padding);
if (sliced.length < wantedLength) {
let padding = new Array(wantedLength-sliced.length);
padding.fill(undefined);
const padded = sliced.concat(padding);
return padded;
} else {
return sliced;
}
}

function sliceAndPadRows(rows, start, end) {
return rows.map((row) => sliceAndPad(row, start, end));
}

// Returns a sample of rows in a range. range is of the form {sheet: 'Foo', start:{row: X, column: Y}, end:{row: X, column: Y}}.

// Returns three arrays - one with startCount rows from the top of the range,
Expand All @@ -216,7 +222,8 @@ exports.SessionGetInputSampleRows = (sid, range, startCount, middleCount, endCou
let data = sessionStore.get(sid).sheets.get(range.sheet);

// Extract initial rows
let startRows = data.slice(range.start.row, range.start.row+startCount);
let startRows = sliceAndPadRows(data.slice(range.start.row, range.start.row+startCount),
range.start.column, range.end.column+1);

// Extract random sample of middle rows
let middleStart = range.start.row + startCount; // First row eligible for middle sample
Expand All @@ -226,11 +233,12 @@ exports.SessionGetInputSampleRows = (sid, range, startCount, middleCount, endCou
// Extract the rows with those indexes
let middleRows = new Array();
for(let i=0; i<sortedMiddleIndexes.length; i++) {
middleRows.push(data[sortedMiddleIndexes[i]]);
middleRows.push(sliceAndPad(data[sortedMiddleIndexes[i]], range.start.column, range.end.column+1));
}

// Extract final rows
let endRows = data.slice(range.end.row-endCount+1, range.end.row+1);
let endRows = sliceAndPadRows(data.slice(range.end.row-endCount+1, range.end.row+1),
range.start.column, range.end.column+1);

// FIXME: Work out how the xlsx library represents styles and
// rowspan/colspan and make sure that what we return does something useful
Expand All @@ -239,10 +247,9 @@ exports.SessionGetInputSampleRows = (sid, range, startCount, middleCount, endCou
// form, and because styling information might be a significant part of the
// data.

// Slice out only desired columns from these rows, and return the results
return [startRows.map((row) => sliceAndPad(row, range.start.column, range.end.column+1)),
middleRows.map((row) => sliceAndPad(row, range.start.column, range.end.column+1)),
endRows.map((row) => sliceAndPad(row, range.start.column, range.end.column+1))];
return [startRows,
middleRows,
endRows];
};

// Return the unique values in each column in the range. Return no more than
Expand Down Expand Up @@ -346,9 +353,8 @@ exports.SessionPerformMappingJob = (sid, range, mapping) => {
const inputColumn = range.start.column + m;
if (inputColumn >= row.length) {
// If a row is missing values at the end, this may be
// represented as a "short" row array. Let's make it null rather
// than undefined.
record[attr] = null;
// represented as a "short" row array.
record[attr] = undefined;
} else {
record[attr] = row[range.start.column + m];
foundSomeValues = true;
Expand Down
18 changes: 18 additions & 0 deletions lib/importer/backend.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,24 @@ test('happy path', () => {
backend.SessionDelete(sid);
});

test('pad narrow samples', () => {
const sid = backend.CreateSession();
backend.SessionSetFile(sid, '../../fixtures/test.xlsx');

const dataRange = {
sheet: 'Cool Data',
start: {row: 3, column: 0},
end: {row: 5, column: 3}};

const samples = backend.SessionGetInputSampleRows(sid, dataRange,
1, 1, 1);
expect(samples).toMatchObject([
[ [ 'Boris', 13, 'High', undefined ] ],
[ [ 'Nelly', 14, 'High', undefined ] ],
[ [ 'Sid', 10, 'Medium', undefined ] ]
]);
});

test('suggest data range', () => {
const sid = backend.CreateSession();
backend.SessionSetFile(sid, '../../fixtures/test.xlsx');
Expand Down
2 changes: 1 addition & 1 deletion lib/importer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ exports.Initialise = (config, router, prototypeKit) => {
session.headerRange = {
sheet: session.sheet,
start: {row: 0, column: 0},
end: {row: 0, column: maxCol},
end: {row: 0, column: maxCol-1},
};

// Ensure the session is persisted. Currently in session, eventually another way
Expand Down

0 comments on commit 6a8c2b9

Please sign in to comment.