Skip to content

Commit

Permalink
Add more descriptive return message to Scrape Sitting Report
Browse files Browse the repository at this point in the history
Also removes giant commented code
  • Loading branch information
limdingwen committed Jul 26, 2024
1 parent 4f91ef3 commit fa4194f
Showing 1 changed file with 10 additions and 94 deletions.
104 changes: 10 additions & 94 deletions supabase/functions/lib/shared-functions/scrape-sitting-report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ export default async function scrapeSittingReport(req: Request) {
`Inserted sitting metadata ${JSON.stringify(sittingReportMetadata)} with ID ${sittingId.id}.`,
);

let debateCount = 0;
let debateSpeechCount = 0;

let debateOrderNo = 0;
for (const sittingReportItem of sittingReport.takesSectionVOList) {
console.log("Scraping debate...");
Expand All @@ -103,6 +106,7 @@ export default async function scrapeSittingReport(req: Request) {
console.log(
`Inserted debate ${JSON.stringify(debateData)} with ID ${debateId.id}.`,
);
debateCount++;

console.log("Scraping debate speeches...");
const debateContentHtml = sittingReportItem.content;
Expand Down Expand Up @@ -131,7 +135,8 @@ export default async function scrapeSittingReport(req: Request) {
debateId.id,
contentBuffer,
);
debateSpeechOrderNo++;
debateSpeechCount++;

contentBuffer = "";
}

Expand All @@ -146,100 +151,11 @@ export default async function scrapeSittingReport(req: Request) {
debateId.id,
contentBuffer,
);
debateSpeechCount++;
}
}

return buildResponseProxy({ message: "done" });

//
// console.log("Parsing the downloaded HTML...");
// const billsIntroducedHtml = await billsIntroducedResponse.text();
// console.log(billsIntroducedHtml);
// const billsIntroducedDoc = new DOMParser().parseFromString(
// billsIntroducedHtml,
// "text/html",
// );
//
// let addCount = 0;
// let updateCount = 0;
//
// console.log("Scraping and uploading relevant data...");
// const billsIntroduced = billsIntroducedDoc.querySelectorAll(
// ".indv-bill",
// ) as Iterable<Element>;
// for (const billIntroduced of billsIntroduced) {
// const is_second_reading_next_available_seating = billIntroduced
// .querySelector(".indv-bill .row:nth-of-type(2) div:nth-of-type(2)")!
// .textContent.includes("Next Available Sitting");
// const passed_date: string | undefined = billIntroduced
// .querySelector(".indv-bill .row:nth-of-type(2) div:nth-of-type(3)")!
// .textContent.match(/(\d{2}\.\d{2}\.\d{4})/gm)?.[0];
//
// const scrapedData = {
// bill_no: billIntroduced
// .querySelector(".indv-bill .bill-title div:nth-of-type(2)")!
// .textContent.match(/(\d+\/\d{4})/gm)?.[0]!,
// name: billIntroduced.querySelector("a")?.getAttribute("title")!,
// date_introduced: toIsoDate(
// billIntroduced
// .querySelector(".indv-bill .row:nth-of-type(2) div:nth-of-type(1)")!
// .textContent.match(/(\d{2}\.\d{2}\.\d{4})/gm)?.[0]!,
// ),
// second_reading_date_type: is_second_reading_next_available_seating
// ? "next_available_seating"
// : "explicit",
// second_reading_date: is_second_reading_next_available_seating
// ? null
// : toIsoDate(
// billIntroduced
// .querySelector(
// ".indv-bill .row:nth-of-type(2) div:nth-of-type(2)",
// )!
// .textContent.match(/(\d{2}\.\d{2}\.\d{4})/gm)?.[0]!,
// ),
// is_passed: passed_date !== undefined,
// passed_date: passed_date ? toIsoDate(passed_date) : null,
// pdf_url: sanitiseUrl(
// billIntroduced.querySelector("a")!.getAttribute("href")!,
// ),
// original_text: null,
// summary: null,
// };
//
// const billExists =
// (await supabase
// .from("bill")
// .select("bill_no")
// .eq("bill_no", scrapedData.bill_no)
// .maybeSingle()) != null;
//
// if (billExists) {
// // Make sure that we don't overwrite the original_text and summary values
// const {
// original_text: _original_text,
// summary: _summary,
// ...scrapedDataToUpdate
// } = scrapedData;
// const { error } = await supabase
// .from("bill")
// .update(scrapedDataToUpdate)
// .eq("bill_no", scrapedData.bill_no);
// if (error) throw error;
// updateCount++;
// console.info(
// `Updated bill ${scrapedData.bill_no} with the following data: ${JSON.stringify(scrapedDataToUpdate)}`,
// );
// } else {
// const { error } = await supabase.from("bill").insert(scrapedData);
// if (error) throw error;
// addCount++;
// console.info(
// `Added bill ${scrapedData.bill_no} with the following data: ${JSON.stringify(scrapedData)}`,
// );
// }
// }
//
// return buildResponseProxy({
// message: `Added ${addCount} new bills and updated ${updateCount} existing bills.`,
// });
return buildResponseProxy({
message: `Inserted sitting ${JSON.stringify(sittingReportMetadata)} with ${debateCount} debates and ${debateSpeechCount} speeches.`,
});
}

0 comments on commit fa4194f

Please sign in to comment.