From 31c1126af7c3d29fdb5eff51ccc742bf9e6fcfd3 Mon Sep 17 00:00:00 2001 From: justinsilvestre Date: Sun, 10 Dec 2023 12:27:00 +0100 Subject: [PATCH] Increase deploy script timeout, seed unihan data in batches --- fly.toml | 1 + prisma/external/seedKanjiDbVariants.ts | 44 +++++++++---------- prisma/external/seedUnihan14.ts | 2 +- prisma/external/seedUnihan15.ts | 13 +++--- prisma/kanjisense/inBatchesOf.ts | 11 +++-- .../seedKanjisenseFigureReadings.ts | 14 ------ prisma/seed.ts | 2 + prisma/seedScript.ts | 40 +++++++++++------ 8 files changed, 65 insertions(+), 62 deletions(-) diff --git a/fly.toml b/fly.toml index b6bf41a39..eade218b0 100644 --- a/fly.toml +++ b/fly.toml @@ -13,6 +13,7 @@ path = "/metrics" [deploy] release_command = "bash ./scripts/migrate.sh" +release_command_timeout = "30m" [experimental] allowed_public_ports = [ ] diff --git a/prisma/external/seedKanjiDbVariants.ts b/prisma/external/seedKanjiDbVariants.ts index 0cb44819b..d735c067d 100644 --- a/prisma/external/seedKanjiDbVariants.ts +++ b/prisma/external/seedKanjiDbVariants.ts @@ -42,25 +42,25 @@ function getKanjiDbVariantTmpId( return `${variant}@${base}@${variantType}`; } function registerVariant( - dbInput: Record, + dbInput: Map, variant: string, base: string, variantType: KanjiDbVariantType, ) { - dbInput[getKanjiDbVariantTmpId(variantType, variant, base)] = { + dbInput.set(getKanjiDbVariantTmpId(variantType, variant, base), { variant, base, variantType, - }; + }); } async function getOldStyleDbVariants(prisma: PrismaClient) { - const dbInput: Record = {}; + const dbInput = new Map(); await getkanjiDbOldStyleDbInput(dbInput); await getHyogaiDbInput(); await getJinmeiDbInput(dbInput); await prisma.kanjiDbVariant.createMany({ - data: Object.values(dbInput).map(({ variant, base, variantType }) => ({ + data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({ variant, base, variantType, @@ -69,7 +69,7 @@ async function getOldStyleDbVariants(prisma: PrismaClient) { } async function getKanjiDbBorrowedVariant(prisma: PrismaClient) { - const dbInput: Record = {}; + const dbInput = new Map(); await forEachLine(files.kanjiDbBorrowedInput, (line) => { if (!line || line.startsWith("#") || line.startsWith("jp")) return; @@ -80,7 +80,7 @@ async function getKanjiDbBorrowedVariant(prisma: PrismaClient) { }); await prisma.kanjiDbVariant.createMany({ - data: Object.values(dbInput).map(({ variant, base, variantType }) => ({ + data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({ variant, base, variantType, @@ -89,7 +89,7 @@ async function getKanjiDbBorrowedVariant(prisma: PrismaClient) { } async function getKanjiDbTwEduVariants(prisma: PrismaClient) { - const dbInput: Record = {}; + const dbInput = new Map(); await forEachLine(files.kanjiDbTwEduVariants, (line) => { if (!line || line.startsWith("#") || line.startsWith("tw")) return; @@ -100,7 +100,7 @@ async function getKanjiDbTwEduVariants(prisma: PrismaClient) { }); await prisma.kanjiDbVariant.createMany({ - data: Object.values(dbInput).map(({ variant, base, variantType }) => ({ + data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({ variant, base, variantType, @@ -109,7 +109,7 @@ async function getKanjiDbTwEduVariants(prisma: PrismaClient) { } async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) { - const dbInput: Record = {}; + const dbInput = new Map(); await forEachLine(files.kanjiDbHanyuDaCidianVariants, (line) => { if (!line || line.startsWith("#") || line.startsWith("hy")) return; @@ -135,7 +135,7 @@ async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) { }); await prisma.kanjiDbVariant.createMany({ - data: Object.values(dbInput).map(({ variant, base, variantType }) => ({ + data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({ variant, base, variantType, @@ -144,7 +144,7 @@ async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) { } async function getkanjiDbOldStyleDbInput( - dbInput: Record, + dbInput: Map, ) { await forEachLine(files.kanjiDbOldStyle, async (lineWithComments) => { if ( @@ -201,7 +201,7 @@ async function getkanjiDbOldStyleDbInput( } async function getHyogaiDbInput() { - const dbInput: Record = {}; + const dbInput = new Map(); await forEachLine(files.kanjiDbHyogaiVariants, (line) => { if (!line || line.startsWith("#") || line.startsWith("hyo")) return; @@ -214,7 +214,7 @@ async function getHyogaiDbInput() { } async function getJinmeiDbInput( - dbInput: Record, + dbInput: Map, ) { await forEachLine(files.kanjiDbJinmeiVariants, (line) => { if (!line || line.startsWith("#") || line.startsWith("jin")) return; @@ -227,7 +227,7 @@ async function getJinmeiDbInput( } function registerOldAndNewVariants( - dbInput: Record, + dbInput: Map, oldForm: string, newForm: string, ) { @@ -244,7 +244,7 @@ function registerOldAndNewVariants( } } function deregisterOldAndNewVariants( - dbInput: Record, + dbInput: Map, newForm: string, ) { const oldForms = Object.entries(dbInput).filter( @@ -252,11 +252,11 @@ function deregisterOldAndNewVariants( base === newForm && variantType === KanjiDbVariantType.OldStyle, ); for (const [, { variant: oldForm }] of oldForms) { - delete dbInput[ - getKanjiDbVariantTmpId(KanjiDbVariantType.OldStyle, oldForm, newForm) - ]; - delete dbInput[ - getKanjiDbVariantTmpId(KanjiDbVariantType.NewStyle, newForm, oldForm) - ]; + dbInput.delete( + getKanjiDbVariantTmpId(KanjiDbVariantType.OldStyle, oldForm, newForm), + ); + dbInput.delete( + getKanjiDbVariantTmpId(KanjiDbVariantType.NewStyle, newForm, oldForm), + ); } } diff --git a/prisma/external/seedUnihan14.ts b/prisma/external/seedUnihan14.ts index c0921863a..d586303d9 100644 --- a/prisma/external/seedUnihan14.ts +++ b/prisma/external/seedUnihan14.ts @@ -72,7 +72,7 @@ export async function seedUnihan14(prisma: PrismaClient, force = false) { registerVariant(dbInput, "倶", "kZVariant", "俱"); registerVariant(dbInput, "俱", "kZVariant", "倶"); - await inBatchesOf(1000, dbInput, async (batch) => { + await inBatchesOf(10000, dbInput, async (batch) => { await prisma.unihan14.createMany({ data: Array.from(batch, ([id, fields]) => ({ id, diff --git a/prisma/external/seedUnihan15.ts b/prisma/external/seedUnihan15.ts index d11bba7e3..b77aa75a2 100644 --- a/prisma/external/seedUnihan15.ts +++ b/prisma/external/seedUnihan15.ts @@ -13,8 +13,6 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) { }); if (seeded && !force) console.log(`unihan15 already seeded. 🌱`); else { - console.log(`seeding unihan15...`); - await prisma.unihan15.deleteMany({}); const dbInput = new Map>(); @@ -65,7 +63,7 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) { }); }); - await inBatchesOf(1000, dbInput, async (batch) => { + await inBatchesOf(10000, dbInput, async (batch) => { const data = Array.from(batch, ([id, fields]) => ({ id, kDefinition: fields.kDefinition || null, @@ -96,12 +94,11 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) { .findMany({ select: { id: true } }) .then((x) => x.map((x) => x.id)); for (const readingId of allReadings) { - if (dbInput.get(readingId)) { - if (dbInput.get(readingId)!.kRSUnicode.includes(" ")) + const reading = dbInput.get(readingId); + if (reading) { + if (reading.kRSUnicode.includes(" ")) console.log( - `Found more than one radical for ${readingId}: ${ - dbInput.get(readingId)!.kRSUnicode - }`, + `Found more than one radical for ${readingId}: ${reading.kRSUnicode}`, ); await prisma.kanjisenseFigureReading.update({ where: { diff --git a/prisma/kanjisense/inBatchesOf.ts b/prisma/kanjisense/inBatchesOf.ts index 1adc77452..bf1031077 100644 --- a/prisma/kanjisense/inBatchesOf.ts +++ b/prisma/kanjisense/inBatchesOf.ts @@ -19,13 +19,16 @@ export async function inBatchesOf( const batchStartTime = Date.now() / 1000; await action(batch); console.log( - `batch ${batchIndex + 1} of ${totalBatches} done in ${ - Date.now() / 1000 - batchStartTime - }s`, + ` | batch ${batchIndex + 1} of ${totalBatches} done in ${( + Date.now() / 1000 - + batchStartTime + ).toFixed(2)}s`, ); batch = []; batchIndex++; } } - console.log(`all batches done in ${Date.now() / 1000 - totalStartTime}s`); + console.log( + `all batches done in ${(Date.now() / 1000 - totalStartTime).toFixed(2)}s`, + ); } diff --git a/prisma/kanjisense/seedKanjisenseFigureReadings.ts b/prisma/kanjisense/seedKanjisenseFigureReadings.ts index af853bebf..baf6c8d0c 100644 --- a/prisma/kanjisense/seedKanjisenseFigureReadings.ts +++ b/prisma/kanjisense/seedKanjisenseFigureReadings.ts @@ -267,20 +267,6 @@ export async function seedKanjisenseFigureReadings( } } - const charactersTojmdictOnyomi = new Map>(); - class JmDictOnyomi { - constructor(public onyomi: string) { - const existing = charactersTojmdictOnyomi.get(onyomi); - if (existing) { - existing.add(this); - } else { - charactersTojmdictOnyomi.set(onyomi, new Set([this])); - } - } - - static cache = new Map(); - } - const selectedOnReadings: string[] = []; if ( isSingleCharacter(readingFigureId) && diff --git a/prisma/seed.ts b/prisma/seed.ts index 3fbc4c4e4..6fc5da1d1 100644 --- a/prisma/seed.ts +++ b/prisma/seed.ts @@ -1,7 +1,9 @@ import { PrismaClient } from "@prisma/client"; import { seed } from "./seedScript"; + const prisma = new PrismaClient(); + seed(prisma) .catch((e) => { console.error(e); diff --git a/prisma/seedScript.ts b/prisma/seedScript.ts index 38bcdfc67..124f2b5c5 100644 --- a/prisma/seedScript.ts +++ b/prisma/seedScript.ts @@ -2,7 +2,6 @@ import { PrismaClient } from "@prisma/client"; import bcrypt from "bcryptjs"; import { seedKanjiDbComposition } from "./external/seedKanjiDbComposition"; -import { seedKanjiDbSbgyNotes } from "./external/seedKanjiDbSbgyNotes"; import { seedKanjiDbVariants } from "./external/seedKanjiDbVariants"; import { seedKanjidic } from "./external/seedKanjidic"; import { seedSbgy } from "./external/seedSbgy"; @@ -28,22 +27,32 @@ import { seedJMDict } from "./seedJMDict"; export async function seed(prisma: PrismaClient) { const startTime = Date.now(); try { - await executeAndLogTime("seeding kanjidic", () => seedKanjidic(prisma)); - await executeAndLogTime("seeding unihan15", () => seedUnihan15(prisma)); - await executeAndLogTime("seeding unihan14", () => seedUnihan14(prisma)); - await executeAndLogTime("seeding unihan12", () => seedUnihan12(prisma)); + console.log( + "disk usage before:", + await prisma.$queryRaw`SELECT datname as db_name, pg_size_pretty(pg_database_size(datname)) as db_usage FROM pg_database`, + ); + + await executeAndLogTime("seeding kanjidic", () => + seedKanjidic(prisma, false), + ); + await executeAndLogTime("seeding unihan15", () => + seedUnihan15(prisma, false), + ); + await executeAndLogTime("seeding unihan14", () => + seedUnihan14(prisma, false), + ); + await executeAndLogTime("seeding unihan12", () => + seedUnihan12(prisma, false), + ); await executeAndLogTime("seeding kanjiDB composition data", () => - seedKanjiDbComposition(prisma), + seedKanjiDbComposition(prisma, false), ); await executeAndLogTime("seeding kanjiDB variants", () => - seedKanjiDbVariants(prisma), - ); - await executeAndLogTime("seeding sbgynotes", () => - seedKanjiDbSbgyNotes(prisma), + seedKanjiDbVariants(prisma, false), ); - await executeAndLogTime("seeding sbgy", () => seedSbgy(prisma)); + await executeAndLogTime("seeding sbgy", () => seedSbgy(prisma, false)); await executeAndLogTime("seeding aozora frequencies", () => - seedScriptinAozoraFrequencies(prisma), + seedScriptinAozoraFrequencies(prisma, false), ); await executeAndLogTime("seeding kanjisense variant groups", () => seedKanjisenseVariantGroups(prisma, false), @@ -85,7 +94,12 @@ export async function seed(prisma: PrismaClient) { ); await executeAndLogTime("seeding kanjisense figure search properties", () => - seedFigureSearchProperties(prisma, 1000, true), + seedFigureSearchProperties(prisma, 1000, false), + ); + + console.log( + "disk usage after:", + await prisma.$queryRaw`SELECT datname as db_name, pg_size_pretty(pg_database_size(datname)) as db_usage FROM pg_database`, ); } catch (error) { console.log(`❌ ${(Date.now() - startTime) / 1000}s.`);