Skip to content

Commit

Permalink
Setup basic component relations in KanjisenseFigure
Browse files Browse the repository at this point in the history
  • Loading branch information
justinsilvestre committed Nov 6, 2023
1 parent d284488 commit 0c332e0
Show file tree
Hide file tree
Showing 8 changed files with 374 additions and 180 deletions.
3 changes: 3 additions & 0 deletions app/lib/dic/componentsDictionary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3503,6 +3503,9 @@ GWS-U5BFD-G:
:
mnemonic: beat
standin:
𭃌:
mnemonic: engrave
reference:
#######
:
historical: tufts of hair
Expand Down
3 changes: 2 additions & 1 deletion app/lib/dic/kanjijumpSpecificVariants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,5 +207,6 @@ export const kanjijumpSpecificVariants = [
["徹", "GWS-U3054E-JV"],
["GWS-U7230-G", "爰"],
["歹", "歺", "𣦵"],
["GWS-U300EE", "𦥯"]
["GWS-U300EE", "𦥯"],
["臘","﨟","臈"],
]
3 changes: 2 additions & 1 deletion prisma/kanjisense/componentMeanings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export async function shouldComponentBeAssignedMeaning(
variantGroupId,
}: {
id: string;
/* includes self */
directUses: string[];
variantGroupId: string | null;
},
Expand All @@ -48,7 +49,7 @@ export async function shouldComponentBeAssignedMeaning(
directUses,
);
const isAtomic = false;
const minimumUsesInPriorityCandidates = isAtomic ? 1 : 2;
const minimumUsesInPriorityCandidates = isAtomic ? 2 : 3;

return Boolean(
usesInPriorityCandidates.size >= minimumUsesInPriorityCandidates,
Expand Down
70 changes: 59 additions & 11 deletions prisma/kanjisense/getFigureMeaningsText.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,73 @@
import { PrismaClient } from "@prisma/client";
import { KanjisenseFigureRelation, PrismaClient } from "@prisma/client";

const RADICAL_ENTRY_REGEX = /radical \(no\.|radical number/;
export async function getFigureMeaningsText(
prisma: PrismaClient,
figureId: string,
figure: KanjisenseFigureRelation,
componentsDictionaryEntry: ComponentMeaning | null,
) {
const unihanDefinition = prisma.unihan15.findUnique({
const figureId = figure.id;
const unihanDefinitionLookup = prisma.unihan15.findUnique({
where: { id: figureId },
select: { kDefinition: true },
});
const kanjidicEnglish = prisma.kanjidicEntry.findUnique({
const kanjidicEnglishLookup = prisma.kanjidicEntry.findUnique({
where: { id: figureId },
select: { definitions: true },
});

return {
unihanDefinitionText:
(await unihanDefinition)?.kDefinition?.join("; ") || null,
kanjidicEnglish:
(await kanjidicEnglish)?.definitions?.filter(
(e) => !RADICAL_ENTRY_REGEX.test(e),
) || [],
const mnemonicKeywords = componentsDictionaryEntry;
const historicalKeyword =
mnemonicKeywords?.historical && mnemonicKeywords.historical !== "(various)"
? mnemonicKeywords.historical
: null;
let mnemonicSource = "";
if (mnemonicKeywords?.reference)
mnemonicSource = ` {{cf. ${mnemonicKeywords.reference}}}`;
else if (mnemonicKeywords?.standin)
mnemonicSource = ` {{via ${mnemonicKeywords.standin}}}`;

const unihanDefinitionText =
(await unihanDefinitionLookup)?.kDefinition?.join("; ") || null;
const kanjidicEnglish =
(await kanjidicEnglishLookup)?.definitions?.filter(
(e) => !RADICAL_ENTRY_REGEX.test(e),
) || [];
const mnemonicKeyword = mnemonicKeywords?.mnemonic
? [mnemonicKeywords.mnemonic, mnemonicSource].join("")
: null;
const historicalKeywordOrDefinition =
(historicalKeyword ||
kanjidicEnglish?.[0] ||
unihanDefinitionText?.split("; ")?.[0]) ??
null;

const meaning = {
unihanDefinitionText,
kanjidicEnglish,
keyword: historicalKeywordOrDefinition || mnemonicKeyword,
mnemonicKeyword: !historicalKeywordOrDefinition ? mnemonicKeyword : null,
};

if (
!meaning.unihanDefinitionText &&
!meaning.kanjidicEnglish.length &&
!meaning.keyword &&
!meaning.mnemonicKeyword
)
return null;
return meaning;
}

export interface ComponentMeaning {
/** historical meaning */
historical?: string;
/** mnemonic keyword, if historical meaning is absent or different */
mnemonic?: string;
/** for this component's mnemonic keyword, it borrows the meaning of a common kanji containing it. */
standin?: string;
/** this component derives its mnemonic keyword from a common kanji using it. */
reference?: string;
/** for grouping components by meaning */
tag?: string | null;
}
41 changes: 22 additions & 19 deletions prisma/kanjisense/seedKanjisenseFigureRelation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import { registerSeeded } from "../seedUtils";

import { inBatchesOf } from "./inBatchesOf";


export async function seedKanjisenseFigureRelation(
prisma: PrismaClient,
force = false,
Expand Down Expand Up @@ -123,12 +122,14 @@ export async function seedKanjisenseFigureRelation(
},
);

await prisma.kanjisenseFigure.deleteMany({});
await prisma.kanjisenseFigureRelation.deleteMany({});
await inBatchesOf(1000, [...dbInput.values()], async (batch) => {
await prisma.kanjisenseFigureRelation.createMany({
data: batch.map((r) => ({
id: r.id,
idsText: r.idsText,
selectedIdsComponents: r.selectedIdsComponents,
directUses: [...r.directUses],
listsAsComponent: [...r.listsAsComponent],
isPriorityCandidate: r.isPriorityCandidate,
Expand All @@ -155,12 +156,13 @@ class CreateFigureRelationInput {
constructor(
id: string,
idsText: string,
selectedIdsComponents: string[],
isPriorityCandidate: boolean,
variantGroupId: string | null,
) {
this.id = id;
this.idsText = idsText;
this.selectedIdsComponents = [];
this.selectedIdsComponents = selectedIdsComponents;
this.directUses = new Set();
this.listsAsComponent = new Set();
this.isPriorityCandidate = isPriorityCandidate;
Expand All @@ -179,7 +181,16 @@ async function analyzeFiguresRelations(
) {
for (const figureId of figureIds) {
const cached = cache.get(figureId);
const idsText = cached?.idsText ?? patchedIds.getIds(figureId);
const idsText = cached?.idsText ?? (await patchedIds.getIds(figureId));
const ids = parseIds(figureId, idsText);
const jLocaleIndex = ids.locales["J"];
if (verbose && !jLocaleIndex && ids.sequences.length > 1) {
console.log(`Arbitrarily choosing first sequence for ${figureId}`);
}
const selectedIdsComponents = getComponentsFromIds(
ids.sequences[jLocaleIndex ?? 0],
).filter((c) => c !== figureId);

const variantGroupId =
cached?.variantGroupId ??
variantGroups.find((v) => v.includes(figureId))?.[0] ??
Expand All @@ -189,7 +200,8 @@ async function analyzeFiguresRelations(
cached ||
new CreateFigureRelationInput(
figureId,
await idsText,
idsText,
selectedIdsComponents,
options.isPriority,
variantGroupId,
);
Expand All @@ -199,22 +211,11 @@ async function analyzeFiguresRelations(
}

if (!cached) {
const ids = parseIds(figureId, await idsText);
const jLocaleIndex = ids.locales["J"];
if (verbose && !jLocaleIndex && ids.sequences.length > 1) {
console.log(`Arbitrarily choosing first sequence for ${figureId}`);
}

const components = getComponentsFromIds(
ids.sequences[jLocaleIndex ?? 0],
).filter((c) => c !== figureId);

if (components.length > 1) {
figureRelation.selectedIdsComponents = components;
if (selectedIdsComponents.length > 1) {
await analyzeFiguresRelations(
prisma,
variantGroups,
components,
selectedIdsComponents,
cache,
patchedIds,
{
Expand All @@ -226,14 +227,16 @@ async function analyzeFiguresRelations(
},
);
}
for (const componentKey of components) {
for (const componentKey of selectedIdsComponents) {
cache.get(componentKey)!.directUses.add(figureId);
}
}
}
}

/** returns figure keys */
/** returns figure keys for NON-ATOMIC components,
* and empty array for atomic components
*/
function getComponentsFromIds(ids: ParseIds.IDS): string[] {
return ParseIds.flatten(ids).map((component) => {
const key = component.type === "html" ? component.code : component.char;
Expand Down
Loading

0 comments on commit 0c332e0

Please sign in to comment.