Skip to content

Commit

Permalink
Refactor dataset updater
Browse files Browse the repository at this point in the history
  • Loading branch information
ruchernchong committed Dec 18, 2023
1 parent cf480b0 commit 07456f3
Showing 1 changed file with 35 additions and 38 deletions.
73 changes: 35 additions & 38 deletions packages/core/src/datasets.ts
Original file line number Diff line number Diff line change
@@ -1,64 +1,61 @@
export * as Datasets from "./datasets";
import fs from "fs";
import fs from "fs/promises";
import AdmZip from "adm-zip";
import * as d3 from "d3";
import { downloadFile } from "./lib/downloadFile";
import db from "../../config/db";

export const updater = async () => {
const extractToPath = "/tmp";
const zipFileName = `Monthly New Registration of Cars by Make.zip`;
const zipFilePath = `${extractToPath}/${zipFileName}`;
const zipUrl = `https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/${zipFileName}`;

await downloadFile({
url: zipUrl,
destination: zipFilePath,
});

// Constants
const EXTRACT_PATH: string = "/tmp";
const ZIP_FILE_NAME: string = `Monthly New Registration of Cars by Make.zip`;
const ZIP_URL: string = `https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/${ZIP_FILE_NAME}`;
const COLLECTION_NAME: string = "cars";

// Helper function to extract ZIP file
const extractZipFile = async (
zipFilePath: string,
extractToPath: string,
): Promise<string> => {
const zip = new AdmZip(zipFilePath);
zip.extractAllTo(`${extractToPath}`, true);
const zipEntries = zip.getEntries();

let destinationPath = extractToPath;
zipEntries.forEach((entry) => {
if (!entry.isDirectory) {
const entryName = entry.entryName;

destinationPath = `${extractToPath}/${entryName}`;

const content = entry.getData();
fs.writeFileSync(destinationPath, content);
}
});
zip.extractAllTo(extractToPath, true);
const entry = zip.getEntries().find((entry) => !entry.isDirectory);
return entry ? entry.entryName : "";
};

console.log(destinationPath);
export const updater = async (): Promise<{ message: string }> => {
try {
const zipFilePath = `${EXTRACT_PATH}/${ZIP_FILE_NAME}`;
await downloadFile({ url: ZIP_URL, destination: zipFilePath });

const csvData = fs.readFileSync(destinationPath, "utf-8");
const parsedData = d3.csvParse(csvData);
const extractedFileName = await extractZipFile(zipFilePath, EXTRACT_PATH);
const destinationPath = `${EXTRACT_PATH}/${extractedFileName}`;
console.log(`Destination path:`, destinationPath);

let message: string;
const csvData = await fs.readFile(destinationPath, "utf-8");
const parsedData = d3.csvParse(csvData);

const existingData = await db.collection("cars").find().toArray();
if (existingData.length === 0) {
const result = await db.collection("cars").insertMany(parsedData);
message = `${result.insertedCount} document(s) inserted`;
} else {
const existingData = await db.collection(COLLECTION_NAME).find().toArray();
const existingDataMap = new Map(
existingData.map((item) => [item.month, item]),
);
const newDataToInsert = parsedData.filter(
(newItem) => !existingDataMap.has(newItem.month),
);

let message: string;
if (newDataToInsert.length > 0) {
const result = await db.collection("cars").insertMany(newDataToInsert);
const result = await db
.collection(COLLECTION_NAME)
.insertMany(newDataToInsert);
message = `${result.insertedCount} document(s) inserted`;
} else {
message =
"No new data to insert. The provided data matches the existing records.";
}
}

return { message };
return { message };
} catch (error) {
console.error(`An error has occurred:`, error);
throw error;
}
};

0 comments on commit 07456f3

Please sign in to comment.