From 413ecad2a2c604a777086f03d8c7e580415745f3 Mon Sep 17 00:00:00 2001 From: David Straub Date: Sun, 5 Nov 2023 22:23:11 +0100 Subject: [PATCH] UI for OCR (#302) --- src/GrampsJs.js | 1 + src/components/GrampsjsMediaObject.js | 65 +++- src/components/GrampsjsTextRecognition.js | 346 ++++++++++++++++++++++ src/icons.js | 3 +- src/lang/en.json | 4 +- src/strings.js | 1 + src/views/GrampsjsViewMedia.js | 9 + 7 files changed, 426 insertions(+), 3 deletions(-) create mode 100644 src/components/GrampsjsTextRecognition.js diff --git a/src/GrampsJs.js b/src/GrampsJs.js index b6a2905b..d68cd4fa 100644 --- a/src/GrampsJs.js +++ b/src/GrampsJs.js @@ -701,6 +701,7 @@ export class GrampsJs extends LitElement { grampsId="${this._pageId}" .strings="${this._strings}" ?canEdit="${this.canEdit}" + .dbInfo="${this._dbInfo}" > + + ${this.dbInfo?.server?.ocr + ? html` +

+ ${renderIconSvg( + mdiTextRecognition, + 'var(--mdc-theme-on-primary)', + 0, + 'icon' + )} + ${this._('Text Recognition')} +

+ ` + : ''} ` } + _handleOcrClick() { + this._ocr = true + } + + _handleCloseOcrClick() { + this._ocr = false + } + + _renderOcr() { + return html`
+ + + + +
` + } + _handleFaceDeselect(e) { this.selectedRect = {} this._drawing = false diff --git a/src/components/GrampsjsTextRecognition.js b/src/components/GrampsjsTextRecognition.js new file mode 100644 index 00000000..14814c96 --- /dev/null +++ b/src/components/GrampsjsTextRecognition.js @@ -0,0 +1,346 @@ +import {html, LitElement, css} from 'lit' +import {GrampsjsTranslateMixin} from '../mixins/GrampsjsTranslateMixin.js' +import {sharedStyles} from '../SharedStyles.js' +import {apiPost} from '../api.js' +import {fireEvent} from '../util.js' + +const tesseractLanguages = { + afr: 'Afrikaans', + amh: 'አማርኛ', + ara: 'العربية', + asm: 'অসমীয়া', + aze: 'Azərbaycanca', + aze_cyrl: 'Азәрбајҹан', + bel: 'Беларуская', + ben: 'বাঙালি', + bod: 'བོད་ཡིག', + bos: 'Bosanski', + bre: 'Brezhoneg', + bul: 'Български', + cat: 'Català', + ceb: 'Sinugboanon', + ces: 'Čeština', + chi_sim: '简体中文', + chi_tra: '繁體中文', + chr: 'ᏣᎳᎩ', + cos: 'Corsu', + cym: 'Cymraeg', + dan: 'Dansk', + dan_frak: 'Dansk - Fraktur', + deu: 'Deutsch', + deu_frak: 'Deutsch - Fraktur', + dzo: 'རྫོང་ཁ', + ell: 'Ελληνικά', + eng: 'English', + enm: 'English, Middle', + epo: 'Esperanto', + est: 'Eesti', + eus: 'Euskara', + fao: 'Føroyskt', + fas: 'فارسی', + fil: 'Wikang Filipino', + fin: 'Suomi', + fra: 'Français', + frk: 'Deutsch - Fraktur', + frm: 'Français, Moyen', + fry: 'Frysk', + gla: 'Gàidhlig', + gle: 'Gaeilge', + glg: 'Galego', + grc: 'Ἑλληνική', + guj: 'ગુજરાતી', + hat: 'Kreyòl Ayisyen', + heb: 'עברית', + hin: 'हिन्दी', + hrv: 'Hrvatski', + hun: 'Magyar', + hye: 'Հայերեն', + iku: 'ᐃᓄᒃᑎᑐᑦ', + ind: 'Bahasa Indonesia', + isl: 'Íslenska', + ita: 'Italiano', + ita_old: 'Italiano - Vecchio', + jav: 'Basa Jawa', + jpn: '日本語', + kan: 'ಕನ್ನಡ', + kat: 'ქართული', + kat_old: 'ქართული - ძველი', + kaz: 'Қазақ', + khm: 'ភាសាខ្មែរ', + kir: 'Кыргызча', + kmr: 'Kurmanji', + kor: '한국어', + kor_vert: '한국어 (수직쓰기)', + kur: 'كوردی', + lao: 'ລາວ', + lat: 'Latina', + lav: 'Latviešu', + lit: 'Lietuvių', + ltz: 'Lëtzebuergesch', + mal: 'മലയാളം', + mar: 'मराठी', + mkd: 'Македонски', + mlt: 'Malti', + mon: 'Монгол', + mri: 'Te Reo Māori', + msa: 'Bahasa Malaysia', + mya: 'ဗမာ', + nep: 'नेपाली', + nld: 'Nederlands', + nor: 'Norsk', + oci: 'Occitan', + ori: 'ଓଡ଼ିଆ', + pan: 'ਪੰਜਾਬੀ', + pol: 'Polski', + por: 'Português', + pus: 'پښتو', + que: 'Runasimi', + ron: 'Română', + rus: 'Русский', + san: 'संस्कृतम्', + sin: 'සිංහල', + slk: 'Slovenčina', + slk_frak: 'Slovenčina - Fraktur', + slv: 'Slovenščina', + snd: 'سنڌي', + spa: 'Español', + spa_old: 'Español - Antiguo', + sqi: 'Shqip', + srp: 'Српски', + srp_latn: 'Srpski - Latinica', + sun: 'Basa Sunda', + swa: 'Kiswahili', + swe: 'Svenska', + syr: 'ܣܘܪܝ', + tam: 'தமிழ்', + tat: 'Татар', + tel: 'తెలుగు', + tgk: 'Тоҷикӣ', + tgl: 'Filipino', + tha: 'ภาษาไทย', + tir: 'ትግርኛ', + ton: 'lea faka-Tonga', + tur: 'Türkçe', + uig: 'ئۇيغۇرچە', + ukr: 'Українська', + urd: 'اردو', + uzb: 'Oʻzbek', + uzb_cyrl: 'Ўзбек', + vie: 'Tiếng Việt', + yid: 'ייִדיש', + yor: 'Yorùbá', +} + +// Gramps to tesseract language codes +const langMapping = { + ar: 'ara', + bg: 'bul', + br: 'bre', + ca: 'cat', + cs: 'ces', + da: 'dan', + de_AT: 'deu', + de: 'deu', + el: 'ell', + en_GB: 'eng', + en: 'eng', + eo: 'epo', + es: 'spa', + fi: 'fin', + fr: 'fra', + ga: 'gle', + he: 'heb', + hr: 'hrv', + hu: 'hun', + is: 'isl', + it: 'ita', + ja: 'jpn', + lt: 'lit', + lv: 'lav', + mk: 'mkd', + nb: 'nor', + nl: 'nld', + nn: 'nld', + pl: 'pol', + pt_BR: 'por', + pt_PT: 'por', + ro: 'ron', + ru: 'rus', + sk: 'slk', + sl: 'slv', + sq: 'sqi', + sr: 'srp', + sv: 'swe', + ta: 'tam', + tr: 'tur', + uk: 'ukr', + vi: 'vie', + zh_CN: 'chi_sim', + zh_HK: 'chi_tra', + zh_TW: 'chi_tra', +} + +export class GrampsjsTextRecognition extends GrampsjsTranslateMixin( + LitElement +) { + static get styles() { + return [ + sharedStyles, + css` + .result { + font-family: var( + --grampsjs-note-font-family, + var(--grampsjs-body-font-family) + ); + font-size: var(--grampsjs-note-font-size, 17px); + line-height: var(--grampsjs-note-line-height, 1.5em); + color: var(--grampsjs-note-color, #000000); + white-space: pre-wrap; + } + `, + ] + } + + static get properties() { + return { + handle: {type: String}, + options: {type: Object}, + languages: {type: Array}, + canEdit: {type: Boolean}, + _string: {type: String}, + } + } + + constructor() { + super() + this.options = {} + this.handle = '' + this.languages = [] + this.canEdit = false + this._string = '' + } + + render() { + return html` +

+ + ${this.languages.map( + lang => html` + ${tesseractLanguages[lang] || lang} + ` + )} + +

+

+ ${this._('Run')} + +

+

${this._string ?? ''}

+ ${this._string && this.canEdit + ? html` +

+ ${this._('Save as Note')} +

+ ` + : ''} + ` + } + + firstUpdated() { + this._setLangInitial() + } + + _setLangInitial() { + const locale = this.strings.__lang__ ?? '' + if (!locale) { + return + } + const lang = langMapping[locale] ?? '' + if (!lang) { + return + } + this.options = {...this.options, lang} + } + + _isDisabled() { + return !(this.options.lang && this.handle) + } + + _handleLangChange(e) { + this.options = {...this.options, lang: e.target.value} + } + + async _handleRun() { + const prog = this.renderRoot.querySelector('#indicator-ocr') + prog.reset() + prog.open = true + const queryParam = new URLSearchParams(this.options).toString() + const url = `/api/media/${this.handle}/ocr?${queryParam}` + const data = await apiPost(url) + if ('error' in data) { + prog.setError() + prog.errorMessage = data.error + } else if ('task' in data) { + // queued task + prog.taskId = data.task?.id || '' + } else { + // eagerly executed task + this._string = data.data || '' + prog.setComplete() + } + } + + _handleTaskComplete(e) { + const {status} = e.detail + const result = JSON.parse(status.result || {}) + this._string = result || '' + } + + async _handleSaveAsNote() { + const note = { + _class: 'Note', + type: {_class: 'NoteType', string: 'Transcript'}, + text: {_class: 'StyledText', string: this._string}, + } + const data = await apiPost('/api/notes/', note) + if ('error' in data) { + fireEvent(this, 'grampsjs:error', {message: data.error}) + } else { + const [txn] = data.data + const gid = txn?.new?.gramps_id + const handle = txn?.handle + if (gid && handle) { + fireEvent(this, 'edit:action', { + action: 'addNoteRef', + data: {data: [handle]}, + }) + } + } + } +} + +window.customElements.define( + 'grampsjs-text-recognition', + GrampsjsTextRecognition +) diff --git a/src/icons.js b/src/icons.js index 45a15f8a..96a04416 100644 --- a/src/icons.js +++ b/src/icons.js @@ -70,11 +70,12 @@ export function renderIcon(path, color = '#999999') { >` } -export function renderIconSvg(path, color = '#999999', rotate = 0) { +export function renderIconSvg(path, color = '#999999', rotate = 0, slot = '') { return html` diff --git a/src/lang/en.json b/src/lang/en.json index 41317c69..70d2d5b6 100644 --- a/src/lang/en.json +++ b/src/lang/en.json @@ -123,5 +123,7 @@ "Common ancestors": "Common ancestors", "Blocked": "Blocked", "Edit event type": "Edit event type", - "Edit place type": "Edit place type" + "Edit place type": "Edit place type", + "Text Recognition": "Text Recognition", + "Save as Note": "Save as Note" } \ No newline at end of file diff --git a/src/strings.js b/src/strings.js index 53ab9fab..ae9945fd 100644 --- a/src/strings.js +++ b/src/strings.js @@ -534,4 +534,5 @@ export const grampsStrings = [ 'Zoom', "Don't use call name", "Wives use husband's surname (from first family listed)", + 'Run', ] diff --git a/src/views/GrampsjsViewMedia.js b/src/views/GrampsjsViewMedia.js index 847f19dc..85c508f4 100644 --- a/src/views/GrampsjsViewMedia.js +++ b/src/views/GrampsjsViewMedia.js @@ -6,8 +6,15 @@ import {apiGet, apiPut} from '../api.js' import {objectTypeToEndpoint, arrayEqual} from '../util.js' export class GrampsjsViewMedia extends GrampsjsViewObject { + static get properties() { + return { + dbInfo: {type: Object}, + } + } + constructor() { super() + this.dbInfo = {} this._className = 'media' } @@ -22,6 +29,8 @@ export class GrampsjsViewMedia extends GrampsjsViewObject {