Skip to content

Commit

Permalink
Add versioning fields, make migration including curator tables
Browse files Browse the repository at this point in the history
  • Loading branch information
justinsilvestre committed Mar 9, 2024
1 parent bafc3f7 commit d46e45e
Show file tree
Hide file tree
Showing 10 changed files with 238 additions and 86 deletions.
6 changes: 3 additions & 3 deletions app/features/curate/getCurationState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ export async function getCurationState(courseId: string, page: number) {
seenTexts
.flat()
.flatMap((t) =>
t.uniqueCharacters.flatMap((c) => c.figureId || []),
t.uniqueCharacters.flatMap((c) => c.character || []),
),
),
],
Expand Down Expand Up @@ -525,7 +525,7 @@ export async function getCurationState(courseId: string, page: number) {
.map((q) => ({ normalizedText: { contains: q } }))
: undefined,
},
figureId: {
character: {
notIn: charactersNotNeededAnymore,
in: course?.wantedCharacters.length
? course.wantedCharacters.split("")
Expand Down Expand Up @@ -635,7 +635,7 @@ export async function getCurationState(courseId: string, page: number) {
.map((q) => ({ normalizedText: { contains: q } }))
: undefined,
},
figureId: {
character: {
notIn: charactersNotNeededAnymore,
in: course?.wantedCharacters.length
? course.wantedCharacters.split("")
Expand Down
18 changes: 9 additions & 9 deletions app/isComponentFirstClass.ptest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,21 @@ describe("isComponentFirstClass", () => {
);

it("works with CDP-8CAB (left of 歸)", async () => {
const priorityFiguresIds = await prisma.kanjisenseFigure
const priorityFiguresKeys = await prisma.kanjisenseFigure
.findMany({
where: {
isPriority: true,
},
})
.then((fs) => fs.map((f) => f.id));
.then((fs) => fs.map((f) => f.key!));
const parent = "歸";
const component = "CDP-8CAB";
const componentsToDirectUsesPrimaryVariants = new Map<string, Set<string>>([
["CDP-8CAB", new Set(["帰"])],
]);
const figuresToVariantGroups = await getFiguresToVariantGroups(prisma);
const result = isComponentFirstClass(
new Set(priorityFiguresIds),
new Set(priorityFiguresKeys),
parent,
component,
componentsToDirectUsesPrimaryVariants,
Expand All @@ -76,7 +76,7 @@ describe("isComponentFirstClass", () => {
});

it("works with 𠚍", async () => {
const priorityFiguresIds = await prisma.kanjisenseFigure
const priorityFiguresKeys = await prisma.kanjisenseFigure
.findMany({
where: {
isPriority: true,
Expand All @@ -91,7 +91,7 @@ describe("isComponentFirstClass", () => {
]);
const figuresToVariantGroups = await getFiguresToVariantGroups(prisma);
const result = isComponentFirstClass(
new Set(priorityFiguresIds),
new Set(priorityFiguresKeys),
parent,
component,
componentsToDirectUsesPrimaryVariants,
Expand All @@ -102,7 +102,7 @@ describe("isComponentFirstClass", () => {
});

it("works with 旡", async () => {
const priorityFiguresIds = await prisma.kanjisenseFigure
const priorityFiguresKeys = await prisma.kanjisenseFigure
.findMany({
where: {
isPriority: true,
Expand All @@ -117,7 +117,7 @@ describe("isComponentFirstClass", () => {
]);
const figuresToVariantGroups = await getFiguresToVariantGroups(prisma);
const result = isComponentFirstClass(
new Set(priorityFiguresIds),
new Set(priorityFiguresKeys),
parent,
component,
componentsToDirectUsesPrimaryVariants,
Expand All @@ -128,7 +128,7 @@ describe("isComponentFirstClass", () => {
});

it("works with 卂", async () => {
const priorityFiguresIds = await prisma.kanjisenseFigure
const priorityFiguresKeys = await prisma.kanjisenseFigure
.findMany({
where: {
isPriority: true,
Expand All @@ -143,7 +143,7 @@ describe("isComponentFirstClass", () => {
]);
const figuresToVariantGroups = await getFiguresToVariantGroups(prisma);
const result = isComponentFirstClass(
new Set(priorityFiguresIds),
new Set(priorityFiguresKeys),
parent,
component,
componentsToDirectUsesPrimaryVariants,
Expand Down
8 changes: 8 additions & 0 deletions app/routes/_index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ const nichi: BadgeProps = {
id: "日",
image: {
id: "日",
key: "日",
version: 0,
type: "Kvg",
content: {
n: [
Expand Down Expand Up @@ -96,6 +98,8 @@ const getsu: BadgeProps = {
id: "月",
image: {
id: "月",
key: "月",
version: 0,
type: "Kvg",
content: {
n: [
Expand Down Expand Up @@ -125,6 +129,8 @@ const akarui: BadgeProps = {
id: "明",
image: {
id: "明",
key: "明",
version: 1,
type: "Kvg",
content: {
n: [
Expand Down Expand Up @@ -162,6 +168,8 @@ const mei: BadgeProps = {
id: "盟",
image: {
id: "盟",
key: "盟",
version: 1,
type: "Kvg",
content: {
n: [
Expand Down
36 changes: 24 additions & 12 deletions app/routes/curate.$courseId.tsx
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { writeFileSync } from "fs";

import type { BaseCorpusText, KanjisenseFigure } from "@prisma/client";
import { useActionData, useLoaderData, useSubmit } from "@remix-run/react";
import {
Expand Down Expand Up @@ -94,6 +96,16 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
? parseInt(queryStringParams.get("p")!)
: 1;

writeFileSync(
__dirname + "/curatorCoursesArchive.json",
JSON.stringify(
(await prisma.course.findMany()).map((c) => ({
id: c.id,
seenTexts: c.seenTexts,
})),
),
);
console.log(__dirname + "/curatorCoursesArchive.json");
const {
course,
seenTexts,
Expand Down Expand Up @@ -794,13 +806,13 @@ function TextUniqueComponents({
const newNonAtomic: React.ReactNode[] = [];

text.uniqueComponents.forEach((c) => {
const figure = getFigure(c.figureId);
const figure = getFigure(c.figureKey);
const badgeProps = figure && getBadgeProps(figure);
const newNode = !badgeProps ? null : (
<div key={c.figureId} className="inline-block align-middle">
<div key={c.figureKey} className="inline-block align-middle">
<FigureBadgeLink
width={3}
id={c.figureId}
id={c.figureKey}
newWindow
badgeProps={
figure.id.length === 1
Expand Down Expand Up @@ -878,7 +890,7 @@ function CopyYmlButton({
defaultTangReadings,
}: {
text: BaseCorpusText & {
uniqueCharacters: { figureId: string | null }[];
uniqueCharacters: { character: string | null }[];
};
defaultTangReadings?: string;
}) {
Expand Down Expand Up @@ -1084,7 +1096,7 @@ const useSeenTextsState = (
if (!text) return [t, oldSeenChars];
runningTotal = new Set([
...runningTotal,
...text.uniqueCharacters.flatMap((c) => c.figureId || []),
...text.uniqueCharacters.flatMap((c) => c.character || []),
]);
return [t, oldSeenChars];
});
Expand Down Expand Up @@ -1113,15 +1125,15 @@ const useSeenTextsState = (
const text = seenTextsFlat.find((t) => t.key === textKey);
if (!text) return;
text.uniqueCharacters.forEach((c) => {
if (!c.figureId) return;
if (seenSoFar.has(c.figureId)) return;
seenSoFar.add(c.figureId);
map.set(c.figureId, { textGroupIndex, textIndex, textKey });
if (!c.character) return;
if (seenSoFar.has(c.character)) return;
seenSoFar.add(c.character);
map.set(c.character, { textGroupIndex, textIndex, textKey });
});
text.uniqueComponents.forEach((c) => {
if (seenSoFar.has(c.figureId)) return;
seenSoFar.add(c.figureId);
map.set(c.figureId, { textGroupIndex, textIndex, textKey });
if (seenSoFar.has(c.figureKey)) return;
seenSoFar.add(c.figureKey);
map.set(c.figureKey, { textGroupIndex, textIndex, textKey });
});
});
});
Expand Down
8 changes: 4 additions & 4 deletions prisma/kanjisense/seedBaseCorpus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,11 @@ export async function seedCorpus(prisma: PrismaClient, corpusTextPath: string) {
data: batch.reduce(
(all, [, { uniqueChars }], i) => {
const uniqueComponents = uniquePriorityComponentsCache[i];
for (const figureId of uniqueComponents) {
for (const figureKey of uniqueComponents) {
all.push({
figureId,
figureKey,
baseCorpusTextId: hashCache[i],
frequencyScore: allFiguresFrequencyScores.get(figureId) ?? 0,
frequencyScore: allFiguresFrequencyScores.get(figureKey) ?? 0,
baseCorpusTextLength: lengthCache[i],
baseCorpusUniqueCharactersCount: uniqueChars.length,
baseCorpusUniqueComponentsCount: uniqueComponents.length,
Expand All @@ -207,7 +207,7 @@ export async function seedCorpus(prisma: PrismaClient, corpusTextPath: string) {
return all;
},
[] as {
figureId: string;
figureKey: string;
baseCorpusTextId: number;
frequencyScore: number;
baseCorpusTextLength: number;
Expand Down
7 changes: 6 additions & 1 deletion prisma/kanjisense/seedKanjisenseActiveSoundMarks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ export async function registerActiveSoundMarks(
function getPrimaryVariantId(id: string) {
return allVariantsToVariantGroupHead[id] || id;
}
for (const [id, tree] of componentsTrees.entries()) {
let visitedCount = 0;
for (const [id, tree] of componentsTrees) {
visitedCount++;
if (visitedCount % 500 === 0 || visitedCount === componentsTrees.size) {
console.log(`|| processed ${visitedCount} / ${componentsTrees.size}`);
}
const derivation =
(await prisma.kanjiDbCharacterDerivation.findUnique({
where: {
Expand Down
86 changes: 45 additions & 41 deletions prisma/kanjisense/seedKanjisenseFigures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ export async function seedKanjisenseFigures(
);
});

console.log("cleaning slate before creating figures");
await prisma.kanjisenseFigure.deleteMany({});
await executeAndLogTime("cleaning slate before creating figures", () =>
prisma.kanjisenseFigure.deleteMany({}),
);

await executeAndLogTime("seeding figures", async () => {
await inBatchesOf({
Expand Down Expand Up @@ -426,44 +427,49 @@ async function connectComponentsTreesEntries(
}
>,
) {
for (const [id, componentsTree] of componentsTreesInput) {
const figureUsesAsComponent = getComponentUses(id);
try {
const combinedAozoraAppearances =
(allAozoraCharacterFrequencies[id]?.appearances ?? 0) +
(figureUsesAsComponent
? setReduce(
figureUsesAsComponent,
(acc, parentId) =>
acc +
(allAozoraCharacterFrequencies[parentId]?.appearances ?? 0),
0,
)
: 0);

await prisma.kanjisenseFigure.update({
where: { id },
data: {
aozoraAppearances: combinedAozoraAppearances,
componentsTree: componentsTree.map((c) => c.toJSON()),
asComponent: figureUsesAsComponent?.size
? {
create: {
allUses: {
connect: Array.from(figureUsesAsComponent, (parentId) => ({
id: parentId,
})),
return await Promise.all(
Array.from(componentsTreesInput, async ([id, componentsTree]) => {
const figureUsesAsComponent = getComponentUses(id);
try {
const combinedAozoraAppearances =
(allAozoraCharacterFrequencies[id]?.appearances ?? 0) +
(figureUsesAsComponent
? setReduce(
figureUsesAsComponent,
(acc, parentId) =>
acc +
(allAozoraCharacterFrequencies[parentId]?.appearances ?? 0),
0,
)
: 0);

await prisma.kanjisenseFigure.update({
where: { id },
data: {
aozoraAppearances: combinedAozoraAppearances,
componentsTree: componentsTree.map((c) => c.toJSON()),
asComponent: figureUsesAsComponent?.size
? {
create: {
allUses: {
connect: Array.from(
figureUsesAsComponent,
(parentId) => ({
id: parentId,
}),
),
},
},
},
}
: undefined,
},
});
} catch (e) {
console.log({ id, componentsTree, figureUsesAsComponent });
throw e;
}
}
}
: undefined,
},
});
} catch (e) {
console.log({ id, componentsTree, figureUsesAsComponent });
throw e;
}
}),
);
}

async function prepareFiguresForMeaningAssignments(
Expand Down Expand Up @@ -681,10 +687,8 @@ async function getAllComponentsTrees(
visitedFigures++;
if (visitedFigures % 1000 === 0 || visitedFigures === figuresKeys.length) {
console.log(`|| ${visitedFigures} / ${figuresKeys.length} processed`);
console.dir(componentsTree);
}
});
console.log("WOP!");

return {
componentsTreesInput,
Expand Down
Loading

0 comments on commit d46e45e

Please sign in to comment.