Skip to content

Commit

Permalink
chore: add more packages to conversion and language
Browse files Browse the repository at this point in the history
  • Loading branch information
bouassaba committed Jun 15, 2024
1 parent 778b06a commit fd84cc5
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 29 deletions.
264 changes: 247 additions & 17 deletions conversion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,42 +30,272 @@ RUN npm i -g @shopify/screenshot-glb
RUN apk add --no-cache \
libreoffice-writer \
libreoffice-calc \
libreoffice-impress
libreoffice-impress \
libreoffice-draw \
libreoffice-math

RUN apk add --no-cache \
font-dejavu \
font-droid \
font-droid-nonlatin \
font-awesome \
font-liberation \
font-liberation-sans-narrow \
font-ubuntu \
font-inconsolata \
font-roboto \
font-roboto-flex \
font-roboto-mono \
font-opensans \
font-hack \
font-cantarell \
font-jetbrains-mono \
font-adobe-100dpi \
font-adobe-100dpi-doc \
font-adobe-75dpi \
font-adobe-75dpi-doc \
font-adobe-source-code-pro \
font-adobe-utopia-100dpi \
font-adobe-utopia-100dpi-doc \
font-adobe-utopia-75dpi \
font-adobe-utopia-75dpi-doc \
font-adobe-utopia-type1 \
font-adobe-utopia-type1-doc \
font-alias \
font-alias-doc \
font-anonymous-pro-nerd \
font-arabic-misc \
font-arimo \
font-arimo-nerd \
font-awesome \
font-awesome-brands \
font-awesome-free \
font-b612 \
font-b612-mono \
font-bakoma \
font-bakoma-doc \
font-bakoma-otf \
font-bakoma-ttf \
font-barlow \
font-bh-100dpi \
font-bh-100dpi-doc \
font-bh-75dpi \
font-bh-75dpi-doc \
font-bitstream-100dpi \
font-bitstream-100dpi-doc \
font-bitstream-75dpi \
font-bitstream-75dpi-doc \
font-bitstream-type1 \
font-bitstream-type1-doc \
font-bitstrom-wera-sans-mono-nerd \
font-cantarell \
font-carlito \
font-cascadia-code-nerd \
font-comic-shanns-mono-nerd \
font-cronyx-cyrillic \
font-cronyx-cyrillic-doc \
font-croscore \
font-cursor-misc \
font-dec-misc \
font-degheest \
font-dejavu \
font-dejavu-sans-mono-nerd \
font-droid \
font-droid-nonlatin \
font-droid-sans-mono-nerd \
font-dseg \
font-eb-garamond \
font-fira-code-nerd \
font-fira-mono-nerd \
font-freefont \
font-freefont-doc \
font-go-mono-nerd \
font-hack \
font-hack-nerd \
font-happy-times \
font-hasklig-nerd \
font-hermit-nerd \
font-ia-writer-nerd \
font-ibm-plex-mono-nerd \
font-ibm-type1 \
font-ibm-type1-doc \
font-inconsolata \
font-inconsolata-nerd \
font-inter \
font-iosevka \
font-iosevka-aile \
font-iosevka-base \
font-iosevka-curly \
font-iosevka-curly-slab \
font-iosevka-slab \
font-ipa \
font-ipaex \
font-isas-misc \
font-isas-misc-doc \
font-jetbrains-mono \
font-jetbrains-mono-nerd \
font-jetbrains-mono-nl \
font-jetbrains-mono-vf \
font-jis-misc \
font-jis-misc-doc \
font-karrik \
font-liberation \
font-liberation-mono-nerd \
font-liberation-sans-narrow \
font-linux-libertine \
font-manager \
font-manager-common \
font-manager-doc \
font-manager-lang \
font-manager-nemo \
font-manager-thunar \
font-meslo-nerd \
font-micro-misc \
font-misc-cyrillic \
font-misc-cyrillic-doc \
font-misc-ethiopic \
font-misc-misc \
font-monofur-nerd \
font-mononoki \
font-mononoki-nerd \
font-montserrat \
font-mutt-misc \
font-noto \
font-noto-adlam \
font-noto-ahom \
font-noto-all \
font-noto-arabic \
font-noto-armenian \
font-noto-balinese \
font-noto-bamum \
font-noto-bassa-vah \
font-noto-batak \
font-noto-bengali \
font-noto-buginese \
font-noto-buhid \
font-noto-canadian-aboriginal \
font-noto-chakma \
font-noto-cham \
font-noto-cherokee \
font-noto-chorasmian \
font-noto-cjk \
font-noto-cjk-extra \
font-noto-common \
font-noto-coptic \
font-noto-cypro-minoan \
font-noto-devanagari \
font-noto-dives-akuru \
font-noto-duployan \
font-noto-elbasan \
font-noto-emoji \
font-noto-ethiopic \
font-noto-extra \
font-noto-fangsong \
font-noto-georgian \
font-noto-grantha \
font-noto-gujarati \
font-noto-gunjala-gondi \
font-noto-gurmukhi \
font-noto-hanifi-rohingya \
font-noto-hanunoo \
font-noto-hebrew \
font-noto-historical \
font-noto-indic-siyaq-numbers \
font-noto-javanese \
font-noto-kaithi \
font-noto-kannada \
font-noto-kawi \
font-noto-kayah-li \
font-noto-khitan-small-script \
font-noto-khmer \
font-noto-khojki \
font-noto-lao \
font-noto-lepcha \
font-noto-limbu \
font-noto-lisu \
font-noto-makasar \
font-noto-malayalam \
font-noto-masaram-gondi \
font-noto-math \
font-noto-mayan-numerals \
font-noto-medefaidrin \
font-noto-meetei-mayek \
font-noto-mende-kikakui \
font-noto-miao \
font-noto-modi \
font-noto-mongolian \
font-noto-mro \
font-noto-music \
font-noto-myanmar \
font-noto-nag-mundari \
font-noto-nandinagari \
font-noto-naskh-arabic \
font-noto-nastaliq-urdu \
font-noto-new-tai-lue \
font-noto-newa \
font-noto-nko \
font-noto-nushu \
font-noto-nyiakeng-puachue-hmong \
font-noto-ol-chiki \
font-noto-old-uyghur \
font-noto-oriya \
font-noto-osage \
font-noto-ottoman-siyaq \
font-noto-pahawh-hmong \
font-noto-pau-cin-hau \
font-noto-rashi-hebrew \
font-noto-rejang \
font-noto-samaritan \
font-noto-saurashtra \
font-noto-sharada \
font-noto-signwriting \
font-noto-sinhala \
font-noto-sora-sompeng \
font-noto-soyombo \
font-noto-sundanese \
font-noto-syloti-nagri \
font-noto-symbols \
font-noto-syriac \
font-noto-tagbanwa \
font-noto-tai \
font-noto-tamil \
font-noto-tangsa \
font-noto-telugu \
font-noto-test \
font-noto-thaana \
font-noto-thai \
font-noto-tibetan \
font-noto-tifinagh \
font-noto-tirhuta \
font-noto-toto \
font-noto-vai \
font-noto-vithkuqi \
font-noto-wancho \
font-noto-warang-citi \
font-noto-yezidi \
font-noto-yi \
font-nunito \
font-opensans \
font-overpass \
font-overpass-nerd \
font-parisienne \
font-roboto \
font-roboto-flex \
font-roboto-mono \
font-schumacher-misc \
font-screen-cyrillic \
font-screen-cyrillic-doc \
font-share-tech-mono-nerd \
font-sligoil \
font-sony-misc \
font-source-code-pro-nerd \
font-space-mono-nerd \
font-sun-misc \
font-terminus \
font-terminus-doc \
font-terminus-nerd \
font-tinos-nerd \
font-tlwg \
font-ubuntu \
font-ubuntu-mono-nerd \
font-ubuntu-nerd \
font-unifont \
font-urw-base35 \
font-util \
font-util-dev \
font-util-doc \
font-uw-ttyp0 \
font-victor-mono-nerd \
font-viewer \
font-vollkorn \
font-wqy-zenhei
font-winitzki-cyrillic \
font-wqy-zenhei \
font-xfree86-type1

RUN apk add --no-cache \
tesseract-ocr \
Expand Down
13 changes: 13 additions & 0 deletions language/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,21 @@ ENV PATH="/root/.local/bin:$PATH"
RUN pipx install pdm --python $(which python)

RUN pdm install --prod --no-editable

RUN .venv/bin/python3 -m ensurepip

RUN .venv/bin/python3 -m spacy download xx_ent_wiki_sm
RUN .venv/bin/python3 -m spacy download zh_core_web_trf
RUN .venv/bin/python3 -m spacy download de_core_news_lg
RUN .venv/bin/python3 -m spacy download en_core_web_trf
RUN .venv/bin/python3 -m spacy download fr_core_news_lg
RUN .venv/bin/python3 -m spacy download it_core_news_lg
RUN .venv/bin/python3 -m spacy download ja_core_news_trf
RUN .venv/bin/python3 -m spacy download nl_core_news_lg
RUN .venv/bin/python3 -m spacy download pt_core_news_lg
RUN .venv/bin/python3 -m spacy download ru_core_news_lg
RUN .venv/bin/python3 -m spacy download es_core_news_lg
RUN .venv/bin/python3 -m spacy download sv_core_news_lg

ENTRYPOINT ["pdm", "run", "flask", "run", "--host=0.0.0.0", "--port=8084"]

Expand Down
24 changes: 12 additions & 12 deletions language/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
nlp = None
iso_6393_to_model = {
"ara": "xx_ent_wiki_sm",
"chi_sim": "xx_ent_wiki_sm",
"chi_tra": "xx_ent_wiki_sm",
"deu": "xx_ent_wiki_sm",
"eng": "xx_ent_wiki_sm",
"fra": "xx_ent_wiki_sm",
"chi_sim": "zh_core_web_trf",
"chi_tra": "zh_core_web_trf",
"deu": "de_core_news_lg",
"eng": "en_core_web_trf",
"fra": "fr_core_news_lg",
"hin": "xx_ent_wiki_sm",
"ita": "xx_ent_wiki_sm",
"jpn": "xx_ent_wiki_sm",
"nld": "xx_ent_wiki_sm",
"por": "xx_ent_wiki_sm",
"rus": "xx_ent_wiki_sm",
"spa": "xx_ent_wiki_sm",
"swe": "xx_ent_wiki_sm",
"ita": "it_core_news_lg",
"jpn": "ja_core_news_trf",
"nld": "nl_core_news_lg",
"por": "pt_core_news_lg",
"rus": "ru_core_news_lg",
"spa": "es_core_news_lg",
"swe": "sv_core_news_lg",
}


Expand Down

0 comments on commit fd84cc5

Please sign in to comment.