Skip to content

Commit

Permalink
Increase deploy script timeout, seed unihan data in batches
Browse files Browse the repository at this point in the history
  • Loading branch information
justinsilvestre committed Dec 10, 2023
1 parent ddcbcc9 commit 31c1126
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 62 deletions.
1 change: 1 addition & 0 deletions fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ path = "/metrics"

[deploy]
release_command = "bash ./scripts/migrate.sh"
release_command_timeout = "30m"

[experimental]
allowed_public_ports = [ ]
Expand Down
44 changes: 22 additions & 22 deletions prisma/external/seedKanjiDbVariants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,25 @@ function getKanjiDbVariantTmpId(
return `${variant}@${base}@${variantType}`;
}
function registerVariant(
dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput>,
dbInput: Map<string, Prisma.KanjiDbVariantCreateManyInput>,
variant: string,
base: string,
variantType: KanjiDbVariantType,
) {
dbInput[getKanjiDbVariantTmpId(variantType, variant, base)] = {
dbInput.set(getKanjiDbVariantTmpId(variantType, variant, base), {
variant,
base,
variantType,
};
});
}

async function getOldStyleDbVariants(prisma: PrismaClient) {
const dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput> = {};
const dbInput = new Map<string, Prisma.KanjiDbVariantCreateManyInput>();
await getkanjiDbOldStyleDbInput(dbInput);
await getHyogaiDbInput();
await getJinmeiDbInput(dbInput);
await prisma.kanjiDbVariant.createMany({
data: Object.values(dbInput).map(({ variant, base, variantType }) => ({
data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({
variant,
base,
variantType,
Expand All @@ -69,7 +69,7 @@ async function getOldStyleDbVariants(prisma: PrismaClient) {
}

async function getKanjiDbBorrowedVariant(prisma: PrismaClient) {
const dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput> = {};
const dbInput = new Map<string, Prisma.KanjiDbVariantCreateManyInput>();
await forEachLine(files.kanjiDbBorrowedInput, (line) => {
if (!line || line.startsWith("#") || line.startsWith("jp")) return;

Expand All @@ -80,7 +80,7 @@ async function getKanjiDbBorrowedVariant(prisma: PrismaClient) {
});

await prisma.kanjiDbVariant.createMany({
data: Object.values(dbInput).map(({ variant, base, variantType }) => ({
data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({
variant,
base,
variantType,
Expand All @@ -89,7 +89,7 @@ async function getKanjiDbBorrowedVariant(prisma: PrismaClient) {
}

async function getKanjiDbTwEduVariants(prisma: PrismaClient) {
const dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput> = {};
const dbInput = new Map<string, Prisma.KanjiDbVariantCreateManyInput>();
await forEachLine(files.kanjiDbTwEduVariants, (line) => {
if (!line || line.startsWith("#") || line.startsWith("tw")) return;

Expand All @@ -100,7 +100,7 @@ async function getKanjiDbTwEduVariants(prisma: PrismaClient) {
});

await prisma.kanjiDbVariant.createMany({
data: Object.values(dbInput).map(({ variant, base, variantType }) => ({
data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({
variant,
base,
variantType,
Expand All @@ -109,7 +109,7 @@ async function getKanjiDbTwEduVariants(prisma: PrismaClient) {
}

async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) {
const dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput> = {};
const dbInput = new Map<string, Prisma.KanjiDbVariantCreateManyInput>();
await forEachLine(files.kanjiDbHanyuDaCidianVariants, (line) => {
if (!line || line.startsWith("#") || line.startsWith("hy")) return;

Expand All @@ -135,7 +135,7 @@ async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) {
});

await prisma.kanjiDbVariant.createMany({
data: Object.values(dbInput).map(({ variant, base, variantType }) => ({
data: Array.from(dbInput, ([, { variant, base, variantType }]) => ({
variant,
base,
variantType,
Expand All @@ -144,7 +144,7 @@ async function getKanjiDbHanyuDaCidianVariants(prisma: PrismaClient) {
}

async function getkanjiDbOldStyleDbInput(
dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput>,
dbInput: Map<string, Prisma.KanjiDbVariantCreateManyInput>,
) {
await forEachLine(files.kanjiDbOldStyle, async (lineWithComments) => {
if (
Expand Down Expand Up @@ -201,7 +201,7 @@ async function getkanjiDbOldStyleDbInput(
}

async function getHyogaiDbInput() {
const dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput> = {};
const dbInput = new Map<string, Prisma.KanjiDbVariantCreateManyInput>();
await forEachLine(files.kanjiDbHyogaiVariants, (line) => {
if (!line || line.startsWith("#") || line.startsWith("hyo")) return;

Expand All @@ -214,7 +214,7 @@ async function getHyogaiDbInput() {
}

async function getJinmeiDbInput(
dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput>,
dbInput: Map<string, Prisma.KanjiDbVariantCreateManyInput>,
) {
await forEachLine(files.kanjiDbJinmeiVariants, (line) => {
if (!line || line.startsWith("#") || line.startsWith("jin")) return;
Expand All @@ -227,7 +227,7 @@ async function getJinmeiDbInput(
}

function registerOldAndNewVariants(
dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput>,
dbInput: Map<string, Prisma.KanjiDbVariantCreateManyInput>,
oldForm: string,
newForm: string,
) {
Expand All @@ -244,19 +244,19 @@ function registerOldAndNewVariants(
}
}
function deregisterOldAndNewVariants(
dbInput: Record<string, Prisma.KanjiDbVariantCreateManyInput>,
dbInput: Map<string, Prisma.KanjiDbVariantCreateManyInput>,
newForm: string,
) {
const oldForms = Object.entries(dbInput).filter(
([, { base, variantType }]) =>
base === newForm && variantType === KanjiDbVariantType.OldStyle,
);
for (const [, { variant: oldForm }] of oldForms) {
delete dbInput[
getKanjiDbVariantTmpId(KanjiDbVariantType.OldStyle, oldForm, newForm)
];
delete dbInput[
getKanjiDbVariantTmpId(KanjiDbVariantType.NewStyle, newForm, oldForm)
];
dbInput.delete(
getKanjiDbVariantTmpId(KanjiDbVariantType.OldStyle, oldForm, newForm),
);
dbInput.delete(
getKanjiDbVariantTmpId(KanjiDbVariantType.NewStyle, newForm, oldForm),
);
}
}
2 changes: 1 addition & 1 deletion prisma/external/seedUnihan14.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ export async function seedUnihan14(prisma: PrismaClient, force = false) {
registerVariant(dbInput, "倶", "kZVariant", "俱");
registerVariant(dbInput, "俱", "kZVariant", "倶");

await inBatchesOf(1000, dbInput, async (batch) => {
await inBatchesOf(10000, dbInput, async (batch) => {
await prisma.unihan14.createMany({
data: Array.from(batch, ([id, fields]) => ({
id,
Expand Down
13 changes: 5 additions & 8 deletions prisma/external/seedUnihan15.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) {
});
if (seeded && !force) console.log(`unihan15 already seeded. 🌱`);
else {
console.log(`seeding unihan15...`);

await prisma.unihan15.deleteMany({});

const dbInput = new Map<string, Record<string, string>>();
Expand Down Expand Up @@ -65,7 +63,7 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) {
});
});

await inBatchesOf(1000, dbInput, async (batch) => {
await inBatchesOf(10000, dbInput, async (batch) => {
const data = Array.from(batch, ([id, fields]) => ({
id,
kDefinition: fields.kDefinition || null,
Expand Down Expand Up @@ -96,12 +94,11 @@ export async function seedUnihan15(prisma: PrismaClient, force = false) {
.findMany({ select: { id: true } })
.then((x) => x.map((x) => x.id));
for (const readingId of allReadings) {
if (dbInput.get(readingId)) {
if (dbInput.get(readingId)!.kRSUnicode.includes(" "))
const reading = dbInput.get(readingId);
if (reading) {
if (reading.kRSUnicode.includes(" "))
console.log(
`Found more than one radical for ${readingId}: ${
dbInput.get(readingId)!.kRSUnicode
}`,
`Found more than one radical for ${readingId}: ${reading.kRSUnicode}`,
);
await prisma.kanjisenseFigureReading.update({
where: {
Expand Down
11 changes: 7 additions & 4 deletions prisma/kanjisense/inBatchesOf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ export async function inBatchesOf<T, U>(
const batchStartTime = Date.now() / 1000;
await action(batch);
console.log(
`batch ${batchIndex + 1} of ${totalBatches} done in ${
Date.now() / 1000 - batchStartTime
}s`,
` | batch ${batchIndex + 1} of ${totalBatches} done in ${(
Date.now() / 1000 -
batchStartTime
).toFixed(2)}s`,
);
batch = [];
batchIndex++;
}
}
console.log(`all batches done in ${Date.now() / 1000 - totalStartTime}s`);
console.log(
`all batches done in ${(Date.now() / 1000 - totalStartTime).toFixed(2)}s`,
);
}
14 changes: 0 additions & 14 deletions prisma/kanjisense/seedKanjisenseFigureReadings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,20 +267,6 @@ export async function seedKanjisenseFigureReadings(
}
}

const charactersTojmdictOnyomi = new Map<string, Set<JmDictOnyomi>>();
class JmDictOnyomi {
constructor(public onyomi: string) {
const existing = charactersTojmdictOnyomi.get(onyomi);
if (existing) {
existing.add(this);
} else {
charactersTojmdictOnyomi.set(onyomi, new Set([this]));
}
}

static cache = new Map<string, JmDictOnyomi>();
}

const selectedOnReadings: string[] = [];
if (
isSingleCharacter(readingFigureId) &&
Expand Down
2 changes: 2 additions & 0 deletions prisma/seed.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { PrismaClient } from "@prisma/client";

import { seed } from "./seedScript";

const prisma = new PrismaClient();

seed(prisma)
.catch((e) => {
console.error(e);
Expand Down
40 changes: 27 additions & 13 deletions prisma/seedScript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { PrismaClient } from "@prisma/client";
import bcrypt from "bcryptjs";

import { seedKanjiDbComposition } from "./external/seedKanjiDbComposition";
import { seedKanjiDbSbgyNotes } from "./external/seedKanjiDbSbgyNotes";
import { seedKanjiDbVariants } from "./external/seedKanjiDbVariants";
import { seedKanjidic } from "./external/seedKanjidic";
import { seedSbgy } from "./external/seedSbgy";
Expand All @@ -28,22 +27,32 @@ import { seedJMDict } from "./seedJMDict";
export async function seed(prisma: PrismaClient) {
const startTime = Date.now();
try {
await executeAndLogTime("seeding kanjidic", () => seedKanjidic(prisma));
await executeAndLogTime("seeding unihan15", () => seedUnihan15(prisma));
await executeAndLogTime("seeding unihan14", () => seedUnihan14(prisma));
await executeAndLogTime("seeding unihan12", () => seedUnihan12(prisma));
console.log(
"disk usage before:",
await prisma.$queryRaw`SELECT datname as db_name, pg_size_pretty(pg_database_size(datname)) as db_usage FROM pg_database`,
);

await executeAndLogTime("seeding kanjidic", () =>
seedKanjidic(prisma, false),
);
await executeAndLogTime("seeding unihan15", () =>
seedUnihan15(prisma, false),
);
await executeAndLogTime("seeding unihan14", () =>
seedUnihan14(prisma, false),
);
await executeAndLogTime("seeding unihan12", () =>
seedUnihan12(prisma, false),
);
await executeAndLogTime("seeding kanjiDB composition data", () =>
seedKanjiDbComposition(prisma),
seedKanjiDbComposition(prisma, false),
);
await executeAndLogTime("seeding kanjiDB variants", () =>
seedKanjiDbVariants(prisma),
);
await executeAndLogTime("seeding sbgynotes", () =>
seedKanjiDbSbgyNotes(prisma),
seedKanjiDbVariants(prisma, false),
);
await executeAndLogTime("seeding sbgy", () => seedSbgy(prisma));
await executeAndLogTime("seeding sbgy", () => seedSbgy(prisma, false));
await executeAndLogTime("seeding aozora frequencies", () =>
seedScriptinAozoraFrequencies(prisma),
seedScriptinAozoraFrequencies(prisma, false),
);
await executeAndLogTime("seeding kanjisense variant groups", () =>
seedKanjisenseVariantGroups(prisma, false),
Expand Down Expand Up @@ -85,7 +94,12 @@ export async function seed(prisma: PrismaClient) {
);

await executeAndLogTime("seeding kanjisense figure search properties", () =>
seedFigureSearchProperties(prisma, 1000, true),
seedFigureSearchProperties(prisma, 1000, false),
);

console.log(
"disk usage after:",
await prisma.$queryRaw`SELECT datname as db_name, pg_size_pretty(pg_database_size(datname)) as db_usage FROM pg_database`,
);
} catch (error) {
console.log(`❌ ${(Date.now() - startTime) / 1000}s.`);
Expand Down

0 comments on commit 31c1126

Please sign in to comment.