Skip to content

Commit

Permalink
Add speaker name linking w/o aliasing
Browse files Browse the repository at this point in the history
  • Loading branch information
limdingwen committed Jul 26, 2024
1 parent fa4194f commit 8cc11f2
Showing 1 changed file with 36 additions and 3 deletions.
39 changes: 36 additions & 3 deletions supabase/functions/lib/shared-functions/scrape-sitting-report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@ import {
import buildResponseProxy from "../utils/build-response-proxy.ts";
import { format } from "https://deno.land/[email protected]/datetime/mod.ts";
import { SupabaseClient } from "https://esm.sh/v135/@supabase/[email protected]/dist/module/index.d.ts";
import removePrefix from "npm:[email protected]";

async function insertSpeech(
supabase: SupabaseClient,
orderNo: number,
debateId: number,
speakerId: number | null,
content: string,
) {
const debateSpeechData = {
order_no: orderNo,
debate_id: debateId,
speaker_id: speakerId,
content: content,
};
const { error: insertDebateSpeechError } = await supabase
Expand Down Expand Up @@ -123,23 +126,52 @@ export default async function scrapeSittingReport(req: Request) {
// We insert the content buffer one last time after the loop ends to ensure that the last speech is inserted.
// Also, beware not to insert an empty contentBuffer (e.g. if the first paragraph has a speaker's name).
let contentBuffer = "";
let speakerIdBuffer: number | null = null;
let debateSpeechOrderNo = 0;
for (const paragraph of paragraphs) {
const speaker = paragraph.querySelector("strong")?.textContent;
const speakerNameRaw = paragraph
.querySelector("strong")
?.textContent.trim();
const speakerName = speakerNameRaw
? removePrefix(speakerNameRaw, "Mr ", "Ms ", "Mx ", "Dr ")[0]
: null;
console.log(`Speaker name detected: ${speakerName}`);
const content = paragraph.textContent;
if (speaker && contentBuffer.trim() != "") {
console.log("Speaker name detected, inserting speech...");

if (speakerName && contentBuffer.trim() != "") {
console.log(
"Speaker name detected, inserting previously buffered speech...",
);
await insertSpeech(
supabase,
debateSpeechOrderNo,
debateId.id,
speakerIdBuffer,
contentBuffer,
);
debateSpeechCount++;

contentBuffer = "";
speakerIdBuffer = null;
debateSpeechOrderNo++;
}

if (speakerName) {
const { data: speakerIdData, error: speakerIdError } = await supabase
.from("mp")
.select("id")
.eq("full_name", speakerName)
.maybeSingle();
if (speakerIdError) throw speakerIdError;
if (speakerIdData) {
console.log(
`Speaker ${speakerName} found in database as ID ${speakerIdData.id}.`,
);
speakerIdBuffer = speakerIdData.id;
} else {
console.log(`Speaker ${speakerName} not found in database.`);
}
}
// If there's nothing, then don't insert the paragraph break
contentBuffer += (contentBuffer == "" ? "" : "\n\n") + content;
}
Expand All @@ -149,6 +181,7 @@ export default async function scrapeSittingReport(req: Request) {
supabase,
debateSpeechOrderNo,
debateId.id,
speakerIdBuffer,
contentBuffer,
);
debateSpeechCount++;
Expand Down

0 comments on commit 8cc11f2

Please sign in to comment.