From 1a8b92c5547fad793b073b5df5c915d6cfbfd2ac Mon Sep 17 00:00:00 2001 From: Mikhail Dzianishchyts Date: Wed, 25 Dec 2024 15:56:36 +0300 Subject: [PATCH] Merge Upstream Workflow pt. 2 (#1709) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Что этот PR делает Улучшения мерж воркфлоу. - [x] Перевод чейнджлога через GPT 4o. - [x] Проверка на существование мерж ПРа. Чья-то работа не будет случайно сброшена форс-пушем. - [x] Отключение буферизации вывода логов. - [x] Использование GitHub App в проверке чейнджлога (в крысу). ## Тестирование Проверил на форке. --- .github/workflows/check_changelog.yml | 13 +- .github/workflows/master_ancestor.yml | 30 +++ .github/workflows/merge_upstream.yml | 8 +- tools/merge-upstream/merge_upstream.py | 254 +++++++++++++------ tools/merge-upstream/translation_context.txt | 109 ++++++++ 5 files changed, 327 insertions(+), 87 deletions(-) create mode 100644 .github/workflows/master_ancestor.yml create mode 100644 tools/merge-upstream/translation_context.txt diff --git a/.github/workflows/check_changelog.yml b/.github/workflows/check_changelog.yml index c19d6fc7800a..91398c7d47ac 100644 --- a/.github/workflows/check_changelog.yml +++ b/.github/workflows/check_changelog.yml @@ -15,20 +15,31 @@ jobs: if: github.base_ref == 'master' && github.event.pull_request.draft == false steps: + - id: create_token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.APP_ID }} + private-key: ${{ secrets.PRIVATE_KEY }} + + - run: echo "GH_TOKEN=${{ steps.create_token.outputs.token }}" >> "$GITHUB_ENV" + - name: Downloading scripts run: | wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.base_ref }}/tools/changelog/changelog_utils.py wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.base_ref }}/tools/changelog/check_changelog.py wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.base_ref }}/tools/changelog/tags.yml + - name: Installing Python uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 with: python-version: '3.x' + - name: Installing deps run: | python -m pip install --upgrade pip pip install ruamel.yaml PyGithub + - name: Changelog validation env: - GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + GITHUB_TOKEN: ${{ env.GH_TOKEN }} run: python check_changelog.py diff --git a/.github/workflows/master_ancestor.yml b/.github/workflows/master_ancestor.yml new file mode 100644 index 000000000000..716ed00891e4 --- /dev/null +++ b/.github/workflows/master_ancestor.yml @@ -0,0 +1,30 @@ +name: Check Master Merged + +on: + pull_request: + branches: + - master + +jobs: + check-master-merged: + runs-on: ubuntu-latest + if: github.head_ref == 'merge-upstream' + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.base_ref }} + + - name: Fetch head branch + run: git fetch origin ${{ github.head_ref }} + + - name: Check if master is merged + run: | + git checkout ${{ github.head_ref }} + if git merge-base --is-ancestor origin/${{ github.base_ref }} HEAD; then + exit 0 + else + echo "Base branch is NOT merged into the head branch. Merge base branch to keep CI checks relevant." + exit 1 + fi diff --git a/.github/workflows/merge_upstream.yml b/.github/workflows/merge_upstream.yml index 3f07d729d9c0..dfa49e4bf253 100644 --- a/.github/workflows/merge_upstream.yml +++ b/.github/workflows/merge_upstream.yml @@ -24,12 +24,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install PyGithub googletrans==4.0.0-rc1 + pip install PyGithub openai - name: Download the script run: | wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.ref_name }}/tools/changelog/changelog_utils.py wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.ref_name }}/tools/merge-upstream/merge_upstream.py + wget https://raw.githubusercontent.com/${{ github.repository }}/${{ github.ref_name }}/tools/merge-upstream/translation_context.txt - name: Run the script env: @@ -39,9 +40,10 @@ jobs: UPSTREAM_REPO: 'ParadiseSS13/Paradise' UPSTREAM_BRANCH: 'master' MERGE_BRANCH: 'merge-upstream' - TRANSLATE_CHANGES: 'true' CHANGELOG_AUTHOR: 'ParadiseSS13' + TRANSLATE_CHANGES: 'true' + OPENAI_API_KEY: ${{ secrets.ORG_EMPTY_TOKEN }} run: | git config --global user.email "action@github.com" git config --global user.name "Upstream Sync" - python3 merge_upstream.py + python3 -u merge_upstream.py diff --git a/tools/merge-upstream/merge_upstream.py b/tools/merge-upstream/merge_upstream.py index 62eb7690f241..0cb9101d7e7a 100644 --- a/tools/merge-upstream/merge_upstream.py +++ b/tools/merge-upstream/merge_upstream.py @@ -4,11 +4,19 @@ import subprocess import time import typing + from datetime import datetime -from concurrent.futures import ThreadPoolExecutor, as_completed +from concurrent.futures import ThreadPoolExecutor, Future, as_completed +from pathlib import Path +from re import Pattern +from subprocess import CompletedProcess + from github import Github +from github.PaginatedList import PaginatedList from github.PullRequest import PullRequest -from googletrans import Translator +from github.Repository import Repository +from openai import OpenAI +from openai.types.chat import ChatCompletion import changelog_utils @@ -23,12 +31,19 @@ class DownstreamLabel(str, enum.Enum): WIKI_CHANGE = ":page_with_curl: Требуется изменение WIKI" +class Change(typing.TypedDict): + tag: str + message: str + translated_message: typing.NotRequired[str] + pull: PullRequest + + class PullDetails(typing.TypedDict): - changelog: typing.Dict[str, list[str]] - merge_order: list[str] - config_changes: typing.Dict[str, PullRequest] - sql_changes: typing.Dict[str, PullRequest] - wiki_changes: typing.Dict[str, PullRequest] + changelog: typing.Dict[int, list[Change]] + merge_order: list[int] + config_changes: list[PullRequest] + sql_changes: list[PullRequest] + wiki_changes: list[PullRequest] LABEL_BLOCK_STYLE = { @@ -48,12 +63,17 @@ def check_env(): "UPSTREAM_BRANCH", "MERGE_BRANCH" ] + if TRANSLATE_CHANGES: + required_vars.append("OPENAI_API_KEY") missing_vars = [var for var in required_vars if not os.getenv(var)] if missing_vars: raise EnvironmentError(f"Missing required environment variables: {', '.join(missing_vars)}") # Environment variables +TRANSLATE_CHANGES = os.getenv("TRANSLATE_CHANGES", "False").lower() in ("true", "yes", "1") +CHANGELOG_AUTHOR = os.getenv("CHANGELOG_AUTHOR", "") + check_env() GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") TARGET_REPO = os.getenv("TARGET_REPO") @@ -61,14 +81,13 @@ def check_env(): UPSTREAM_REPO = os.getenv("UPSTREAM_REPO") UPSTREAM_BRANCH = os.getenv("UPSTREAM_BRANCH") MERGE_BRANCH = os.getenv("MERGE_BRANCH") -TRANSLATE_CHANGES = os.getenv("TRANSLATE_CHANGES", "False").lower() in ("true", "yes", "1") -CHANGELOG_AUTHOR = os.getenv("CHANGELOG_AUTHOR", "") +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -def run_command(command) -> str: +def run_command(command: str) -> str: """Run a shell command and return its output.""" try: - result = subprocess.run(command, shell=True, capture_output=True, text=True) + result: CompletedProcess[str] = subprocess.run(command, shell=True, capture_output=True, text=True) result.check_returncode() return result.stdout.strip() except subprocess.CalledProcessError as e: @@ -90,7 +109,7 @@ def update_merge_branch(): print(f"Fetching branch {UPSTREAM_BRANCH} from upstream...") run_command(f"git fetch upstream {UPSTREAM_BRANCH}") run_command(f"git fetch origin") - all_branches = run_command("git branch -a").split() + all_branches: list[str] = run_command("git branch -a").split() if f"remotes/origin/{MERGE_BRANCH}" not in all_branches: print(f"Branch '{MERGE_BRANCH}' does not exist. Creating it from upstream/{UPSTREAM_BRANCH}...") @@ -109,109 +128,159 @@ def update_merge_branch(): def detect_commits() -> list[str]: """Detect commits from upstream not yet in downstream.""" print("Detecting new commits from upstream...") - commit_log = run_command(f"git log {TARGET_BRANCH}..{MERGE_BRANCH} --pretty=format:'%h %s'").split("\n") + commit_log: list[str] = run_command(f"git log {TARGET_BRANCH}..{MERGE_BRANCH} --pretty=format:'%h %s'").split("\n") commit_log.reverse() return commit_log -def fetch_pull(pull_id) -> PullRequest | None: +def fetch_pull(github: Github, pull_number: int) -> PullRequest | None: """Fetch the pull request from GitHub.""" - github = Github(GITHUB_TOKEN) - repo = github.get_repo(UPSTREAM_REPO) + upstream_repo: Repository = github.get_repo(UPSTREAM_REPO) max_retries = 3 for attempt in range(max_retries): try: - return repo.get_pull(int(pull_id)) + return upstream_repo.get_pull(int(pull_number)) except Exception as e: - print(f"Error fetching PR #{pull_id}: {e}") + print(f"Error fetching PR #{pull_number}: {e}") if attempt + 1 < max_retries: time.sleep(2) else: return None -def build_details(commit_log: list[str]) -> PullDetails: +def build_details(github: Github, commit_log: list[str], + translate: typing.Optional[typing.Callable[[typing.Dict[int, list[Change]]], None]]) -> PullDetails: """Generate data from parsed commits.""" print("Building details...") + pull_number_pattern: Pattern[str] = re.compile("#(?P\\d+)") details = PullDetails( changelog={}, - merge_order=[match.group()[1:] for c in commit_log if (match := re.search("#\\d+", c))], - config_changes={}, - sql_changes={}, - wiki_changes={} + merge_order=[int(match.group("id")) for c in commit_log if (match := re.search(pull_number_pattern, c))], + config_changes=[], + sql_changes=[], + wiki_changes=[] ) - pull_cache = {} - translator = Translator() + pull_cache: dict[int, str] = {} with ThreadPoolExecutor() as executor: - futures = {} + futures: dict[Future, int] = {} for commit in commit_log: - match = re.search("#\\d+", commit) + match = re.search(pull_number_pattern, commit) if not match: print(f"Skipping {commit}") continue - pull_id = match.group()[1:] + pull_number = int(match.group("id")) - if pull_id in pull_cache: + if pull_number in pull_cache: print( f"WARNING: pull duplicate found.\n" - f"1: {pull_cache[pull_id]}\n" + f"1: {pull_cache[pull_number]}\n" f"2: {commit}" ) print(f"Skipping {commit}") continue - pull_cache[pull_id] = commit - futures[executor.submit(fetch_pull, pull_id)] = pull_id + pull_cache[pull_number] = commit + futures[executor.submit(fetch_pull, github, pull_number)] = pull_number for future in as_completed(futures): - pull_id = futures[future] + pull_number = futures[future] pull: PullRequest | None = future.result() - labels = [label.name for label in pull.get_labels()] - pull_changes = [] if not pull: + print(f"Pull {pull_number} was not fetched. Skipping.") continue - try: - for label in labels: - if label == UpstreamLabel.CONFIG_CHANGE.value: - details["config_changes"][pull_id] = pull - elif label == UpstreamLabel.SQL_CHANGE.value: - details["sql_changes"][pull_id] = pull - elif label == UpstreamLabel.WIKI_CHANGE.value: - details["wiki_changes"][pull_id] = pull - - parsed = changelog_utils.parse_changelog(pull.body) - if parsed and parsed["changes"]: - for change in parsed["changes"]: - tag = change["tag"] - message = change["message"] - if TRANSLATE_CHANGES: - translated_message = translator.translate(message, src="en", dest="ru").text - change = f"{tag}: {translated_message} " - else: - change = f"{tag}: {message} " - pull_changes.append(change) - - if pull_changes: - details["changelog"][pull_id] = pull_changes - except Exception as e: - print( - f"An error occurred while processing {commit}\n" - f"URL: {pull.html_url}\n" - f"Body: {pull.body}" - ) - raise e + process_pull(details, pull) + + if translate: + translate(details["changelog"]) return details +def process_pull(details: PullDetails, pull: PullRequest): + """Handle fetched pull request data during details building.""" + pull_number: int = pull.number + labels: list[str] = [label.name for label in pull.get_labels()] + pull_changes: list[Change] = [] + try: + for label in labels: + if label == UpstreamLabel.CONFIG_CHANGE.value: + details["config_changes"].append(pull) + elif label == UpstreamLabel.SQL_CHANGE.value: + details["sql_changes"].append(pull) + elif label == UpstreamLabel.WIKI_CHANGE.value: + details["wiki_changes"].append(pull) + + parsed = changelog_utils.parse_changelog(pull.body) + if parsed and parsed["changes"]: + for change in parsed["changes"]: + pull_changes.append(Change( + tag=change["tag"], + message=change["message"], + pull=pull + )) + + if pull_changes: + details["changelog"][pull_number] = pull_changes + except Exception as e: + print( + f"An error occurred while processing {pull.html_url}\n" + f"Body: {pull.body}" + ) + raise e + + +def translate_changelog(changelog: typing.Dict[int, list[Change]]): + """Translate changelog using OpenAI API.""" + print("Translating changelog...") + if not changelog: + return + + changes: list[Change] = [change for changes in changelog.values() for change in changes] + if not changes: + return + + script_dir = Path(__file__).resolve().parent + with open(script_dir.joinpath("translation_context.txt"), encoding="utf-8") as f: + context = "\n".join(f.readlines()).strip() + text = "\n".join([change["message"] for change in changes]) + + client = OpenAI( + base_url="https://models.inference.ai.azure.com", + api_key=OPENAI_API_KEY, + ) + response: ChatCompletion = client.chat.completions.create( + messages=[ + {"role": "system", "content": context}, + {"role": "user", "content": text} + ], + temperature=1.0, + top_p=1.0, + model="gpt-4o", + ) + translated_text: str | None = response.choices[0].message.content + + if not translated_text: + print("WARNING: changelog translation failed!") + print(response) + return + + for change, translated_message in zip(changes, translated_text.split("\n"), strict=True): + change["translated_message"] = translated_message + + +def silence_pull_url(pull_url: str) -> str: + """Reformat HTTP URL with 'www' prefix to prevent pull request linking.""" + return re.sub("https?://", "www.", pull_url) + + def prepare_pull_body(details: PullDetails) -> str: """Build new pull request body from the generated changelog.""" - pull_body = ( + pull_body: str = ( f"This pull request merges upstream/{UPSTREAM_BRANCH}. " f"Resolve possible conflicts manually and make sure all the changes are applied correctly.\n" ) @@ -219,46 +288,51 @@ def prepare_pull_body(details: PullDetails) -> str: if not details: return pull_body - label_to_changes = { + label_to_pulls: dict[UpstreamLabel, list[PullRequest]] = { UpstreamLabel.CONFIG_CHANGE: details["config_changes"], UpstreamLabel.SQL_CHANGE: details["sql_changes"], UpstreamLabel.WIKI_CHANGE: details["wiki_changes"] } - for label, changes in label_to_changes.items(): - if not changes: + for label, fetched_pulls in label_to_pulls.items(): + if not fetched_pulls: continue pull_body += ( f"\n> [!{LABEL_BLOCK_STYLE[label]}]\n" f"> {label.value}:\n" ) - for _, pull in sorted(changes.items()): - pull_body += f"> {pull.html_url}\n" + for fetched_pull in fetched_pulls: + pull_body += f"> {silence_pull_url(fetched_pull.html_url)}\n" if not details["changelog"]: return pull_body pull_body += f"\n## Changelog\n" pull_body += f":cl: {CHANGELOG_AUTHOR}\n" if CHANGELOG_AUTHOR else ":cl:\n" - for pull_id in details["merge_order"]: - if pull_id not in details["changelog"]: + for pull_number in details["merge_order"]: + if pull_number not in details["changelog"]: continue - pull_body += f"{'\n'.join(details["changelog"][pull_id])}\n" + for change in details["changelog"][pull_number]: + tag: str = change["tag"] + message: str = change["message"] + translated_message: str | None = change.get("translated_message") + pull_url: str = silence_pull_url(change["pull"].html_url) + if translated_message: + pull_body += f"{tag}: {translated_message} \n" + else: + pull_body += f"{tag}: {message} \n" pull_body += "/:cl:\n" return pull_body -def create_pr(details: PullDetails): +def create_pr(repo: Repository, details: PullDetails): """Create a pull request with the processed changelog.""" - pull_body = prepare_pull_body(details) - + pull_body: str = prepare_pull_body(details) print("Creating pull request...") - github = Github(GITHUB_TOKEN) - repo = github.get_repo(TARGET_REPO) # Create the pull request - pull = repo.create_pull( + pull: PullRequest = repo.create_pull( title=f"Merge Upstream {datetime.today().strftime('%d.%m.%Y')}", body=pull_body, head=MERGE_BRANCH, @@ -271,14 +345,28 @@ def create_pr(details: PullDetails): print("Pull request created successfully.") +def check_pull_exists(target_repo: Repository, base: str, head: str): + """Check if the merge pull request already exist. In this case, fail the action.""" + print("Checking on existing pull request...") + existing_pulls: PaginatedList[PullRequest] = target_repo.get_pulls(state="open", base=base, head=head) + for pull in existing_pulls: + print(f"Pull request already exists. {pull.html_url}") + + if existing_pulls.totalCount: + exit(1) + if __name__ == "__main__": + github = Github(GITHUB_TOKEN) + target_repo: Repository = github.get_repo(TARGET_REPO) + + check_pull_exists(target_repo, TARGET_BRANCH, MERGE_BRANCH) setup_repo() update_merge_branch() - commit_log = detect_commits() + commit_log: list[str] = detect_commits() if commit_log: - details = build_details(commit_log) - create_pr(details) + details: PullDetails = build_details(github, commit_log, translate_changelog if TRANSLATE_CHANGES else None) + create_pr(target_repo, details) else: print(f"No changes detected from {UPSTREAM_REPO}/{UPSTREAM_BRANCH}. Skipping pull request creation.") diff --git a/tools/merge-upstream/translation_context.txt b/tools/merge-upstream/translation_context.txt new file mode 100644 index 000000000000..09f771b5074b --- /dev/null +++ b/tools/merge-upstream/translation_context.txt @@ -0,0 +1,109 @@ +Ты работаешь над переводом чейнджлога проекта Space Station 13 "Paradise". Каждая строка описывает отдельное изменение или улучшение, внесённое в проект. + +Инструкции для перевода: + +Дословность с ясностью: Перевод должен быть максимально точным и дословным, но допускается перефразирование для лучшего понимания текста на русском языке. +Терминология: Сохраняй технические термины и названия (например, названия предметов, функций, ролей или систем) в оригинальном виде, если они общеупотребимы в сообществе игры. Если термин можно перевести без потери смысла, адаптируй его. Если термин можно транслитерировать на русском, сделай это. +Стиль: Придерживайся официального и нейтрального стиля, но сохраняй читабельность и естественность текста. +Формат: Сохраняй структуру исходного текста: каждая строка перевода должна соответствовать ровно одной строке исходного текста, равно как и обратное. Нумерация или форматирование в виде списка недопустимы. Добавление знаков препинания в предложениях допустимо. + +Основные понятия: + +Tajaran – Таяр / таяра +Unathi – Унати / унатх +Skrell – Скрелл +Slime People – Слаймолюди +Vox – Вокс +Drask – Драск +Grey – Серый +Dionae – Диона +IPC – КПБ +Kidan – Кидан +Plasmamen – Плазмамен +Vulpkanin – Вульпканин / вульпа +Nianae – Ниан + +Captain – Капитан +Head of Personnel (HoP) – Глава персонала (ХОП/ГП) +Head of Security (HoS) – Глава службы безопасности (ХОС/ГСБ) +Chief Engineer (CE) – Главный инженер (СЕ) +Research Director (RD) – Директор исследований (РД) +Chief Medical Officer (CMO) – Главный врач (СМО) +Quartermaster (QM) – Квартирмейстер (КМ) +Blueshield - Офицер "Синий Щит" +Nanotrasen Representative - Представитель Нанотрейзен +Warden – Надзиратель +Detective – Детектив +Security Officer – Офицер службы безопасности +Forensic Technician – Судмедэксперт +Atmospheric Technician – Атмосферный техник +Station Engineer – Станционный инженер +Scientist – Ученый +Roboticist – Робототехник +Geneticist – Генетик +Xenobiologist – Ксенобиолог +Medical Doctor – Врач +Paramedic – Парамедик +Chemist – Химик +Virologist – Вирусолог +Psychiatrist – Психиатр +Cargo Technician – Грузчик +Miner – Шахтер +Bartender – Бармен +Chef – Шеф-повар +Botanist – Ботаник +Janitor – Уборщик +Clown – Клоун +Mime – Мим +Chaplain – Священник +Assistant – Ассистент +Internal Affairs Agent (IAA) – Агент внутренних дел (АВД) +Librarian – Библиотекарь + +Traitor – Предатель +Changeling – Генокрад +Wizard – Волшебник +Revenant – Ревенант +Revolutionary – Революционер +Cultist – Культист +Blob – Блоб +Mindflayer – Пожиратель разума +Nuclear Operative – Ядерный оперативник +Xenomorph – Ксеноморф +Abductor – Абдуктор +Alien – Ксеноморф +Terror Spider / Terror – Паук ужаса + +AI Core – Ядро ИИ +Holopad – Голопад +Cryo – Криокапсула +MedHUD – MedHUD +SecHUD – SecHUD +Nanotrasen – Нанотрейзен +Syndicate – Синдикат +Supermatter (SM) – Кристалл суперматерии (СМ) +Singularity Engine – Сингулярный двигатель +Tesla Engine / Tesla – Двигатель Теслы +Rapid Construction Device (RCD) – Устройство быстрой постройки (РЦД/УБП) +Personal Data Assistant (PDA) – Карманный персональный компьютер (ПДА/КПК) +Bluespace – Блюспейс +Bluespace Anomaly – Блюспейс-аномалия +Plasma Tank – Баллон с плазмой +Chem Dispenser – Химический раздатчик +Circuit Imprinter – Принтер плат +Autolathe – Автолат +Protolathe – Протолат +Cloning Pod – Капсула клонирования +Borg Charger – Зарядное устройство для боргов +APC (Area Power Controller) – Локальный контроллер питания (АПЦ/ЛКП) +SMES (Substation Energy Storage) – Энергетический накопитель (СМЕС) +Tcomms (Telecommunications) – Телекоммуникации +Bolts (Airlock Bolts) – Болты шлюза +Gravitational Singularity – Гравитационная сингулярность +Spacesuit – Космический скафандр +Cyborg Module – Модуль киборга +Mech (Mecha) – Мех +Exosuit – Экзокостюм +Death Alarm Implant – Имплант-сигнализатор смерти +Mindshield Implant – Имплант защиты разума (майндщилд) +Loadout / loadout gear – Лодаут / предметы лодаута