From 8eaeb85d51e2f8e978056fccae756c568056fe2c Mon Sep 17 00:00:00 2001 From: Ho Kim Date: Sat, 31 Aug 2024 03:47:43 +0900 Subject: [PATCH] Initial commit --- .dockerignore | 18 +++++++++ .github/workflows/ci.yml | 76 +++++++++++++++++++++++++++++++++++++ .gitignore | 2 + Dockerfile | 34 +++++++++++++++++ LICENSE | 21 ++++++++++ README.md | 32 ++++++++++++++++ examples/latex-url.md | 26 +++++++++++++ md-translate.sh | 82 ++++++++++++++++++++++++++++++++++++++++ prompt.md | 11 ++++++ template.json | 9 +++++ 10 files changed, 311 insertions(+) create mode 100644 .dockerignore create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 examples/latex-url.md create mode 100755 md-translate.sh create mode 100644 prompt.md create mode 100644 template.json diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9de86ea --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +# Dockerfile +/.dockerignore +/Dockerfile + +# Documents +/*.md +/docs + +# Git +/.git +/.github +/.gitignore + +# IDE +/.vscode + +# Prompts +/!prompt.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..539f0d1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +--- +name: ci + +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + push: + pull_request: + branches: + - master + +env: + DEBIAN_FRONTEND: noninteractive + + REGISTRY: quay.io + REGISTRY_USER: kerryeon + REGISTRY_REPOSITORY: ulagbulag + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} + IMAGE_NAME: md-translate + IMAGE_REPO: docker.io/library/debian + IMAGE_VERSION: latest + +jobs: + build-container-image: + if: ${{ github.ref == 'refs/heads/master' }} + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies + run: > + sudo apt-get update + && sudo apt-get install -y podman + && mkdir -p /home/runner/.docker/ + && echo '{"auths":{"quay.io":{}}}' >/home/runner/.docker/config.json + + # TODO: Wait a buildah issue to be resolved: https://github.com/redhat-actions/buildah-build/issues/116 + - name: Disable container build cache + run: find ./ -name 'Dockerfile*' -exec sed -i '/--mount=type=cache[a-z0-9,=\/-]* \\$/ d' '{}' \; + + - name: Log in to ${{ env.REGISTRY }} + uses: redhat-actions/podman-login@v1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ env.REGISTRY_USER }} + password: ${{ env.REGISTRY_PASSWORD }} + + - name: Build Image + id: build-and-push-image + uses: redhat-actions/buildah-build@v2 + with: + image: ${{ env.IMAGE_NAME }} + tags: latest + context: "." + containerfiles: | + ./Dockerfile + build-args: | + IMAGE_REPO=${{ env.IMAGE_REPO }} + IMAGE_VERSION=${{ env.IMAGE_VERSION }} + + - name: Push To ${{ env.REGISTRY }} + id: push-to-quay + if: ${{ github.repository }} == ${{ env.REGISTRY_REPOSITORY }}/${{ env.IMAGE_NAME }} + uses: redhat-actions/push-to-registry@v2 + with: + image: ${{ steps.build-and-push-image.outputs.image }} + tags: ${{ steps.build-and-push-image.outputs.tags }} + registry: ${{ env.REGISTRY }}/${{ env.REGISTRY_REPOSITORY }} + + - name: Print image url + run: echo "Image pushed to ${{ steps.push-to-quay.outputs.registry-paths }}" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..34f643e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# IDE +/.vscode diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7c4a40c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +# Copyright (c) 2024 Ho Kim (ho.kim@ulagbulag.io). All rights reserved. +# Use of this source code is governed by MIT license that can be +# found in the LICENSE file. + +# Configure container image variables +ARG IMAGE_REPO="docker.io/library/debian" +ARG IMAGE_VERSION="latest" + +# Be ready for serving +FROM "${IMAGE_REPO}:${IMAGE_VERSION}" + +# Add License +ADD ./LICENSE /usr/local/share/licenses/md-translate/LICENSE + +# Install dependencies +RUN apt-get update && apt-get install -y \ + bash \ + coreutils \ + curl \ + jq \ + # Cleanup + && apt-get clean all \ + && rm -rf /var/lib/apt/lists/* + +# Install the package +ADD ./md-translate.sh /usr/local/bin/md-translate.sh +ADD ./prompt.md /usr/local/share/md-translate/prompt.md +ADD ./template.json /usr/local/share/md-translate/template.json +WORKDIR /usr/local/bin + +# Configure entrypoint configuration +ENV PROMPT_PATH="/usr/local/share/md-translate/prompt.md" +ENV TEMPLATE_PATH="/usr/local/share/md-translate/template.json" +ENTRYPOINT [ "/usr/bin/env", "/usr/local/bin/md-translate.sh" ] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..64e5ed7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Ho Kim + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..eada16c --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# Simple Markdown Translator + +Translate Markdown documents to your desired language conveniently using Docker and stdin pipes! + +## Usage + +```bash +cat examples/latex-url.md | ./md-translate.sh | tee output.md +``` + +### With Docker + +```bash +cat examples/latex-url.md | docker run --rm -i -e OPENAI_API_KEY quay.io/ulagbulag/md-translate:latest +``` + +## Configuration + +- **OPENAI_API_KEY**: See how to get API Key: https://help.openai.com/en/articles/7039783-how-can-i-access-the-chatgpt-api + +## Features + +- **TeX Live** markdown plugin support + +## License + +Please check the [LICENSE](/LICENSE) file. + +### External Resources + +- `examples`: Licenses are specified separately for each resource +- `prompt.md`: Derived from https://github.com/smikitky/chatgpt-md-translator (MIT) diff --git a/examples/latex-url.md b/examples/latex-url.md new file mode 100644 index 0000000..4883ea7 --- /dev/null +++ b/examples/latex-url.md @@ -0,0 +1,26 @@ +# Introduction + +이번에 소개할 분은 여러분들께서 정말로 기다리던 분들이십니다. +이 연극의 주인공, 본인 심영과 문예봉 동무를 소개합니다! +배우 황철 동무와 극작가이신 임선규 동무를 소개합니다! +참고로, 임선규 동무는 문예봉 동무의 남편이 되십니다. +그러니까 동무끼리 부부가 되겠습니다, 여러분! + +친애하는 학생, 시민 동지 여러분! +곧 이어서, 우리 공산주의 국가를 열렬히 찬양하는 애국 시민들의 늬우스를 전해 드리겠습니다! +그리고 곧 이어서, 사회주의 낙원을 건설하는 우리 모두의 염원을 연극에 담아 무대에서 보내 드리겠습니다! + +여러분, '님'이 무엇입니까? +언제나 그리운 이름입니다. +우리들의 가슴입니다. +우리가 사모하고 눈물 흘리며 오랜 세월을 목말라해 온 이름입니다. +'님'은 바로 사회주의 낙원을 말하는 것입니다, 여러분! + +오랫동안 기다리셨습니다. +이제 곧 늬우스를 상영하겠습니다! +기대해 주십시오. +오늘 여러분들은 그토록 고대하시던 여러분들의 님을 확실하게 만나고 확인하시게 될 것입니다, 여러분! + +## References + +- Plot: \url{https://namu.wiki/w/%EB%82%B4%EA%B0%80%20%EA%B3%A0%EC%9E%90%EB%9D%BC%EB%8B%88/%EB%8C%80%EB%B3%B8#s-4.1} diff --git a/md-translate.sh b/md-translate.sh new file mode 100755 index 0000000..3d45168 --- /dev/null +++ b/md-translate.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# Copyright (c) 2024 Ho Kim (ho.kim@ulagbulag.io). All rights reserved. +# Use of this source code is governed by MIT license that can be +# found in the LICENSE file. + +# Prehibit errors +set -e -o pipefail + +########################################################### +# Configuration # +########################################################### + +# Configure default environment variables +OPENAI_API_URL_DEFAULT="https://api.openai.com/v1/chat/completions" +PROMPT_PATH_DEFAULT="$(pwd)/prompt.md" +TEMPLATE_PATH_DEFAULT="$(pwd)/template.json" + +# Configure environment variables +OPENAI_API_URL="${OPENAI_API_URL:-$OPENAI_API_URL_DEFAULT}" +PROMPT_PATH="${PROMPT_PATH:-$PROMPT_PATH_DEFAULT}" +TEMPLATE_PATH="${TEMPLATE_PATH:-$TEMPLATE_PATH_DEFAULT}" + +########################################################### +# Check Environment Variables # +########################################################### + +function _assert_file() { + local key="$1" + + # try to parse from cache + if [ ! -f "${!key}" ]; then + echo "No such file (${key}): ${!key}" >&2 + exit 1 + fi +} + +function _assert_key() { + local key="$1" + + # try to parse from cache + if [ -z "${!key+x}" ]; then + echo "Environment variable \"${key}\" not set" >&2 + exit 1 + fi +} + +_assert_file 'PROMPT_PATH' +_assert_file 'TEMPLATE_PATH' + +_assert_key 'OPENAI_API_KEY' + +########################################################### +# Main Function # +########################################################### + +function main() { + input_file="$(mktemp)" + output_file="$(mktemp)" + + # Read stdin + cat - >"${input_file}" + + # Call translator + cat "${TEMPLATE_PATH}" | + jq -e | + jq -e "( .messages // [] | . ) += [{\"role\": \"user\", \"content\": \$input}]" --rawfile input "${PROMPT_PATH}" | + jq -e "( .messages // [] | . ) += [{\"role\": \"user\", \"content\": \$input}]" --rawfile input "${input_file}" | + curl -s -X POST "${OPENAI_API_URL}" \ + -H "Authorization: Bearer ${OPENAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d @- >"${output_file}" + + # Parse the outputs + if cat "${output_file}" | jq -e '.error.message' >/dev/null; then + echo "$(cat "${output_file}" | jq -e -r '.error.message')" >&2 + exit 1 + fi + exec cat "${output_file}" | jq -e -r '.choices[0].message.content' +} + +# Execute main function +main "$@" diff --git a/prompt.md b/prompt.md new file mode 100644 index 0000000..6fd18b9 --- /dev/null +++ b/prompt.md @@ -0,0 +1,11 @@ +I am translating the documentation for writing journal paper. +Translate the Markdown content into English, then I'll paste later into the paper. + +You must strictly follow the rules below. + +- Never change the Markdown markup structure. Don't add or remove links. Do not change any URL. +- Never change the contents of code blocks even if they appear to have a bug. +- Always preserve the original line breaks. Do not add or remove blank lines. +- Never touch the permalink such as `{/*examples*/}` at the end of each heading. +- Never touch HTML-like tags such as ``. +- Never touch LaTex tags such as `\start{document}`. diff --git a/template.json b/template.json new file mode 100644 index 0000000..bb00046 --- /dev/null +++ b/template.json @@ -0,0 +1,9 @@ +{ + "max_tokens": 2048, + "messages": [], + "model": "gpt-4o", + "n": 1, + "stop": null, + "stream": false, + "temperature": 0.7 +}