Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LaTeX text formatter fixes #87

Merged
merged 11 commits into from
Jun 27, 2021
Merged
63 changes: 32 additions & 31 deletions build/bundle.js

Large diffs are not rendered by default.

14 changes: 4 additions & 10 deletions css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,17 @@ td {
overflow-x: auto;
}

.textFormatButton {
.latexButton {
visibility: hidden;
margin-left: 3%
}

.timeBlock:hover .textFormatButton {
.timeBlock:hover .latexButton {
display: block;
visibility: visible;
}

.untimedBlock:hover .textFormatButton {
.untimedBlock:hover .latexButton {
display: block;
visibility: visible;
}
Expand All @@ -199,20 +199,14 @@ td {

.tierSelectionSection {
background : rgb(215, 233, 252);
display: grid;
grid-template-rows: 1fr 1fr;
padding-bottom: 5%;
padding-left: 5%;
padding-right: 5%;
padding-top: 5%;
}

.tierSelectionRow {
/* display: grid;
grid-template-columns: 1fr 7fr;
overflow-x: scroll; */
padding-bottom: 5%;
padding-top: 5%;
padding-bottom: 12px;
}

.formatResultSection {
sciepsilon marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
98 changes: 98 additions & 0 deletions jsx/App/Stories/Story/Display/Latex/LatexResultContainer.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import { TranslatableText } from "~./jsx/App/locale/TranslatableText.jsx";
import {
latexStoryTitleText,
latexStoryIDText,
latexSentenceURLText,
latexLibraryText,
} from "~./jsx/App/locale/LocaleConstants.jsx";

export const LatexResultContainer = ({ sentenceId, processedMaterial }) => {
// <pre> means that its content is pre-formatted text. Newlines are preserved.
return (
<div className="latexResultContainer" sentenceId={sentenceId}>
<pre>
<TranslatableText dictionary={latexStoryTitleText}/> {processedMaterial["title"].replace(/\_/g, " ") + "\n"}
<TranslatableText dictionary={latexStoryIDText}/> {processedMaterial["storyId"].replace(/_/g, "\\_") + "\n"}
<TranslatableText dictionary={latexSentenceURLText}/> {processedMaterial["sentenceUrl"].replace(/_/g, "\\_") + "\n"}
</pre>
<pre><TranslatableText dictionary={latexLibraryText} /></pre>
<pre>{convertToLatex(processedMaterial)}</pre>
</div>
);
};

/* Convert a sentence into LaTeX format with gb4e-modified package style. */
function convertToLatex(material) {
const begin = "\\begin{exe} \n \\ex \\label{example} \n ";
const end = "\\end{exe} \n";

const morphLines = getMorphemeLines(material["morphemes"])
const glossLine = getMorphologicalAnalysisLine(material["gloss"]);
const translationLine = getSentenceTranslationLine(material["sentenceTranslation"]);
// Replace _ with \_ so that it is recognized as underscore in LaTeX
const storyTitle = material["title"].replace(/_/g, "\\_") + "\n";

const toDisplay = begin + morphLines + glossLine + translationLine + storyTitle + end;
return toDisplay;
}

/* Combines the glossing and morphological analysis into their corresponding lines. */
function getMorphemeLines(morphemes) {
const morphemeStart = "\\gll";
const morphemeEnd = "\\\\ \n ";

let wordList = []; // This will contain the complete sentence without - or ==
let morphemeList = [morphemeStart]; // This has each word decomposed into suffices and clitics.
for (const [id, entry] of Object.entries(morphemes)) {
for (const [wholeWord, morphs] of Object.entries(entry)) {
wordList.push(wholeWord);
morphemeList.push(morphs.join(""));
}
}
morphemeList.push(morphemeEnd);

return wordList.join(" ") + " \n " + morphemeList.join(" ");
}

/* Creates the line for the morpheme translations. */
function getMorphologicalAnalysisLine(gloss) {
// The \textsc tag is added for each suffix/clitic translation.
const textscStart = "\\textsc{";
const textscClose = "}";

let glossList = []; // This has the morphological analysis line.
for (const [id, entry] of Object.entries(gloss)) {
for (const [wholeWord, glossItems] of Object.entries(entry)) {
let glossForThisWord = [];
for (const [id, glossItem] of Object.entries(glossItems)) {
// Only the suffices and clitics need \textsc
if (isSuffix(glossItem)) {
glossForThisWord.push(textscStart + glossItem.toLowerCase() + textscClose);
} else {
glossForThisWord.push(glossItem);
}
}
// Reason for using the replace with "_" is that some glossed word is two words in
// the translation, but two words with a space in between will be recognized as two
// separate glossed word by the LaTeX package, so adding the underscore makes sure
// that a phrase made up with multiple words can still be grouped together after being rendered in LaTeX.
glossList.push(glossForThisWord.join("").replace(" ", "\\_"));
}
}
glossList.push("\\\\ \n ");

return glossList.join(" ");
}

/* Puts the sentence translation into LaTeX format. */
function getSentenceTranslationLine(sentenceObject) {
const sentence = sentenceObject[0]["value"];
const translationStart = "\\glt `";
const translationEnd = "' \\\\ \n ";
return translationStart + sentence + translationEnd;
}

/* Checks if an item is a suffix or clitic. */
function isSuffix(item) {
return item.startsWith("=") || item.startsWith("-") || item === item.toUpperCase();
}
176 changes: 176 additions & 0 deletions jsx/App/Stories/Story/Display/Latex/LatexResultWindow.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import React from 'react';
import { LatexResultContainer } from "./LatexResultContainer.jsx";
import { TranslatableText } from "~./jsx/App/locale/TranslatableText.jsx";
import {
latexSentenceTierName,
latexMorphemesTierName,
latexMorphemeTranslationsTierName,
latexSentenceTranslationsTierName,
} from "~./jsx/App/locale/LocaleConstants.jsx";
var htmlEscape = require("html-es6cape");

/*
This window displays the LaTeX conversion format result, along with
some other metadata of the story.
*/
export default class LatexResultWindow extends React.Component {

constructor(props) {
super(props);
}

/*
Calls individual helper functions to gather the words,
morphemes, morpheme translation, and metadata
of the selected sentence.
*/
processSentences() {
let dependents = this.props.sentence["dependents"];

// Get each Latex section's corresponding tier name.
const wordTier = this.props.tierMap.sentence;
const morphemeTier = this.props.tierMap.morphemes;
const glossTier = this.props.tierMap.morphemeTranslations;
const sentenceTranslationTier = this.props.tierMap.sentenceTranslations;

let wordList = [];
let morphemeList = [];
let glossList = [];
let sentenceTranslation = [];

// Loop through dependents to match each Latex section's tier name to the actual content of that tier.
for (var idx in dependents) {
// The selected tier names from the tierMap should have been escaped,
// so here we also need to escape each original tier name so that the selected
// tier name can be matched to one of the original tier names.
const escapedTierName = htmlEscape(dependents[idx]["tier"]);
if (escapedTierName === wordTier) {
wordList = dependents[idx]["values"];
}
if (escapedTierName === morphemeTier) {
morphemeList = dependents[idx]["values"];
}
if (escapedTierName === glossTier) {
glossList = dependents[idx]["values"];
}
if (escapedTierName === sentenceTranslationTier) {
sentenceTranslation = dependents[idx]["values"];
}

}

const morphAndGloss = this.organizeWords(wordList, morphemeList, glossList);
const morphemeMap = morphAndGloss["morphemes"];
const glossMap = morphAndGloss["gloss"];

// Retrieves some metadata to be displayed later.
const title = this.getTitle();
const storyId = this.getStoryId();
const sentenceUrl = this.getSentenceUrl();

return {
storyId : storyId,
title : title,
sentenceUrl : sentenceUrl,
morphemes : morphemeMap,
gloss : glossMap,
sentenceTranslation : sentenceTranslation
};
}

/* Returns a map between each word and all of its sub-components (core and clitics, and gloss, etc.) */
organizeWords(wordList, morphemeList, glossList) {
let wordListCounter = 0;
let morphemeListIndex = 0;

let word2Morpheme = {};
let word2Gloss = {};

while (wordListCounter < wordList.length) {
word2Morpheme[wordListCounter] = {};
word2Gloss[wordListCounter] = {};

const wordEntry = wordList[wordListCounter];
const word = wordEntry["value"];
const wordStartSlot = wordEntry["start_slot"];
const wordEndSlot = wordEntry["end_slot"];

let morphemes = [];
let gloss = [];
let flag = true;
// Find the morphemes belonging to the current word, and add them and their gloss
// into a list.
while (flag && morphemeListIndex < morphemeList.length) {
// Add the "Undefined" strings just in case that some texts have glossing misaligned.
// For texts with aligned morphemes and glossing, "Undefined" shouldn't show up,
// but adding "Undefined here" avoids an error being thrown and is a way of letting the user know
// that something is off with this sentence so they should edit the generated LaTeX code.
const morphemeEntry = morphemeList[morphemeListIndex] || "Undefined";;
const glossEntry = glossList[morphemeListIndex] || "Undefined";
if (morphemeEntry["start_slot"] >= wordStartSlot && morphemeEntry["end_slot"] <= wordEndSlot) {
// If a morpheme item has the whole word, eg. "cundyi-'je='fa", we need to
// split the current morpheme on = or -, so that each root or suffix or clitic is on its own.
const morphemeValue = morphemeEntry["value"] || "Undefined";
const glossValue = glossEntry["value"] || "Undefined";
// First, add a space in front of = and - so that we can split on space later and preserve both = and -
const currentMorpheme = morphemeValue.replace("=", " =").replace("-", " -");
const currentGloss = glossValue.replace("=", " =").replace("-", " -");;
const currentMorphemeSplit = currentMorpheme.split(" ");
const currentGlossSplit = currentGloss.split(" ");

for (const e of currentMorphemeSplit) {
if (e !== "") {
morphemes.push(e);
}
}
for (const e of currentGlossSplit) {
if (e !== "") {
gloss.push(e);
}
}
morphemeListIndex += 1;
} else {
flag = false;
}
}
word2Morpheme[wordListCounter][word] = morphemes;
word2Gloss[wordListCounter][word] = gloss;
wordListCounter += 1;
}

return {
"morphemes" : word2Morpheme,
"gloss" : word2Gloss
};
}

/* Retrives the title of the story from metadata. */
getTitle() {
const title = this.props.metadata["title"]["_default"];
return title;
}

/* Retrives the story ID. */
getStoryId() {
return this.props.metadata["story ID"];
}

/* Retrives the sentence's URL. */
getSentenceUrl() {
const isStoryTimed = this.props.metadata["timed"];
const indexID = isStoryTimed ? (this.props.sentence["start_time_ms"]-1) : (this.props.sentence["sentence_id"]);
const url = window.location.href.replace(/\?.*$/,'') + `?${indexID}`;
return url;
}

render() {
const processedMaterial = this.processSentences();
return (
<LatexResultContainer
sentenceId={this.props.sentenceId}
processedMaterial={processedMaterial}
/>
);
};

}
65 changes: 65 additions & 0 deletions jsx/App/Stories/Story/Display/Latex/TierButtonList.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { TranslatableText } from "~./jsx/App/locale/TranslatableText.jsx";

const htmlEscape = require("html-es6cape");

/*
A list of radio buttons for all the tier names.
The tierNames passed in here have not been escaped. They may contain special characters.
*/
export const TierButtonList = ({ sentenceId, tierNames, latexSectionId, latexSectionName }) => {
return (
<div className="tierSelectionRow">
<b><TranslatableText dictionary={latexSectionName} /></b>
<TierRadioButtons sentenceId={sentenceId} tierNames={tierNames} latexSectionId={latexSectionId} latexSectionName={latexSectionName} />
</div>
);
};

const TierRadioButtons = ({ sentenceId, tierNames, latexSectionId, latexSectionName }) => {
const children = [];

// Iterate through tier names and create a list of radio buttons corresponding to each tier.
for (let i = 0; i < tierNames.length; i++) {
// Call escape function on tier names so that special characters can be used as HTML property names.
const tierName = tierNames[i];
const escapedTierName = htmlEscape(tierName);

const buttonId = `button-${sentenceId}-${escapedTierName}-for-${latexSectionId}`;

children.push(
<TierRadioButton
sentenceId={sentenceId}
escapedTierName={escapedTierName}
latexSectionId={latexSectionId}
buttonId={buttonId}
isChecked={i == 0}
/>
);
children.push(
<TierRadioButtonLabel
tierName={tierName}
buttonId={buttonId}
/>
);
}

return (<div>{children}</div>);
};

const TierRadioButton = ({ sentenceId, escapedTierName, latexSectionId, buttonId, isChecked }) => {
const groupName = `button-${sentenceId}-for-${latexSectionId}`;

return (
<input
type="radio"
id={buttonId}
value={escapedTierName}
name={groupName}
defaultChecked={isChecked}
/>
);
};

const TierRadioButtonLabel = ({ tierName, buttonId }) => {
return (<label for={buttonId}>{tierName}</label>);
};
Loading