Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/2.4' into 2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
daschuer committed Nov 30, 2024
2 parents 9209813 + 073acce commit bdd80f4
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 8 deletions.
13 changes: 5 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ repos:
- id: check-yaml
exclude: ^\.clang-format$
- id: end-of-file-fixer
exclude: ^.*(UTF-8-BOM\.txt|\.ts)$
exclude: ^(res/translations/.*\.ts|.*UTF-8-BOM\.txt)$
- id: mixed-line-ending
- id: trailing-whitespace
exclude: \.(c|cc|cxx|cpp|ts|frag|glsl|h|hpp|hxx|ih|ispc|ipp|java|js|m|mm|proto|tsv|vert)$
Expand All @@ -64,7 +64,7 @@ repos:
"\\W(?:m_p*(?=[A-Z])|m_(?=\\w)|pp*(?=[A-Z])|k(?=[A-Z])|s_(?=\\w))",
--write-changes,
]
exclude: ^(packaging\/wix\/LICENSE.rtf.in|src\/dialog\/dlgabout\.cpp|.*\.(?:pot?|(?<!\.d\.)ts|wxl|svg))$
exclude: ^(packaging/wix/LICENSE.rtf.in|src/dialog/dlgabout\.cpp|.*\.(?:pot?|(?<!\.d\.)ts|wxl|svg))$
- repo: https://github.com/pre-commit/mirrors-eslint
rev: v9.14.0
hooks:
Expand All @@ -82,7 +82,7 @@ repos:
- typescript-eslint
- eslint-plugin-jsdoc@^v50.4.3
- eslint-plugin-diff@^2.0.3
exclude: ^.*\.(?<!\.d\.)ts$
exclude: ^res/translations/.*\.ts$
- repo: local
hooks:
- id: clang-format
Expand Down Expand Up @@ -174,13 +174,10 @@ repos:
files: ^(CHANGELOG\.md|res/linux/org\.mixxx\.Mixxx\.metainfo.xml)$
- id: ts-source-copy
name: ts-source-copy
description: "Find translations literaly copied form the source language"
description: "Find translations literally copied from the source language"
entry: python tools/ts_source_copy_check.py
stages:
- commit
- manual
require_serial: true
language: python
additional_dependencies:
- lxml==4.9.3
- lxml==5.3.0
files: ^(res/translations/.*\.ts)$
197 changes: 197 additions & 0 deletions tools/ts_source_copy_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import logging
import os
import re
import sys
import typing
import fnmatch

from lxml import etree
import githelper

# Define the path to the allow list XML file
ALLOW_LIST_PATH = "res/translations/source_copy_allow_list.tsv"
PROPOSED_ALLOW_LIST_PATH = (
"res/translations/source_copy_allow_list_proposed.tsv"
)


def parse_line(line, info):
if not line.endswith("\n"):
print(f"Parse Error: No \\n at {info}")
return False
line = line[:-1]
try:
lang, source = line.split("\t", 1)
except Exception as e:
print(f"Parse Error at {info}")
print(e)
if lang is None or lang == "":
print(f"Parse Error: lang is empty at {info}")
return lang, source


def fn_match_comma_sep(language, pattern_cs):
for lang_pat in pattern_cs.split(","):
if fnmatch.fnmatchcase(language, lang_pat):
return True
return False


def is_untranstaled_allowed(source_text, language):
source_text = source_text.encode("unicode_escape").decode("utf-8")

if not os.path.exists(ALLOW_LIST_PATH):
return False

with open(ALLOW_LIST_PATH) as f:
for i, line in enumerate(f):
lang, source = parse_line(line, f"{ALLOW_LIST_PATH}:{i + 1}")
if source == source_text:
return fn_match_comma_sep(language, lang)
return False


def add_to_allow_list(source_text, language):
source_text = source_text.encode("unicode_escape").decode("utf-8")

allow_list_path = PROPOSED_ALLOW_LIST_PATH
if not os.path.exists(allow_list_path) and os.path.exists(ALLOW_LIST_PATH):
allow_list_path = ALLOW_LIST_PATH

tsv_lines = []
existing_txt = False
if os.path.exists(allow_list_path):
with open(allow_list_path) as f:
tsv_lines = f.readlines()

# Check if the source_text already exists
for i, line in enumerate(tsv_lines):
lang, source = parse_line(line, f"{allow_list_path}:{i + 1}")
if source == source_text:
if fn_match_comma_sep(language, lang):
return
lang = lang + "," + language
tsv_lines[i] = f"{lang}\t{source}\n"
existing_txt = True
break
else:
tsv_lines.append("lang\tsource\n")

if not existing_txt:
tsv_lines.append(f"{language}\t{source_text}\n")

with open(PROPOSED_ALLOW_LIST_PATH, "w", encoding="utf-8") as tsv_file:
tsv_file.write("".join(tsv_lines))


def check_copied_source_on_lines(rootdir, file_to_format, stylepath=None):
filename = os.path.join(rootdir, file_to_format.filename)

# Extract the language code from the filename
# (assuming format: mixxx_<lang_code>.ts)
match = re.search(r".*/mixxx_(\w+)\.ts", file_to_format.filename)
if not match:
return False

lang_code = match.group(1)

if lang_code.startswith("en_"):
# for Engish languages all source copies are allowed
return False

parser = etree.XMLParser(recover=True)

try:
tree = etree.parse(filename, parser)
except Exception as e:
print("XML parsing failed:")
print(e)
return False

root = tree.getroot()

found = False

for context in root.findall("context"):
for message in context.findall("message"):
source = message.find("source").text
translation_elem = message.find("translation")
if translation_elem is None:
continue
if translation_elem.get("type") == "unfinished":
continue
translation = translation_elem.text
if source != translation:
continue
line_number = translation_elem.sourceline
for start, end in file_to_format.lines:
if line_number >= start and line_number <= end:
if is_untranstaled_allowed(source, lang_code):
continue
print(
"Source copy found at: "
f"{file_to_format.filename}:{line_number}\n{source}",
file=sys.stderr,
)
add_to_allow_list(source, lang_code)
found = True
return found


def main(argv: typing.Optional[typing.List[str]] = None) -> int:
logging.basicConfig(
format="[%(levelname)s] %(message)s", level=logging.INFO
)

parser = argparse.ArgumentParser()
parser.add_argument("--from-ref", help="use changes changes since commit")
parser.add_argument("--to-ref", help="use changes until commit")
parser.add_argument("files", nargs="*", help="only check these files")
args = parser.parse_args(argv)

if not args.from_ref:
args.from_ref = os.getenv("PRE_COMMIT_FROM_REF") or os.getenv(
"PRE_COMMIT_SOURCE"
)

if not args.to_ref:
args.to_ref = os.getenv("PRE_COMMIT_TO_REF") or os.getenv(
"PRE_COMMIT_ORIGIN"
)

# Filter filenames
rootdir = githelper.get_toplevel_path()

files_with_added_lines = githelper.get_changed_lines_grouped(
from_ref=args.from_ref,
to_ref=args.to_ref,
filter_lines=lambda line: line.added,
include_files=args.files,
)

ret = 0
for changed_file in files_with_added_lines:
if check_copied_source_on_lines(rootdir, changed_file):
ret = 1

if ret:
print(
"\n" # For a distance to the list of findings
"All disallowed copied source translations need to be removed"
" at\n"
"https://app.transifex.com/mixxx-dj-software and pulled again.\n"
"If they are intended untranslated use:\n"
f"{PROPOSED_ALLOW_LIST_PATH}\n"
"to allow it.",
file=sys.stderr,
)

return ret


if __name__ == "__main__":
sys.exit(main())

0 comments on commit bdd80f4

Please sign in to comment.