From 5fd468fc5889e9e46c40c2965030ef623bb37996 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 18:27:11 +0000 Subject: [PATCH 01/20] Import python binding sources --- MANIFEST.in | 2 + bindings/CMakeLists.txt | 1 + bindings/python/CMakeLists.txt | 9 ++ bindings/python/__init__.py | 23 ++++ bindings/python/__main__.py | 20 +++ bindings/python/bergamot.cpp | 227 ++++++++++++++++++++++++++++++++ bindings/python/cmds.py | 177 +++++++++++++++++++++++++ bindings/python/repository.py | 185 ++++++++++++++++++++++++++ bindings/python/typing_utils.py | 5 + bindings/python/utils.py | 52 ++++++++ doc/python.rst | 87 ++++++++++++ setup.py | 206 +++++++++++++++++++++++++++++ 12 files changed, 994 insertions(+) create mode 100644 MANIFEST.in create mode 100644 bindings/CMakeLists.txt create mode 100644 bindings/python/CMakeLists.txt create mode 100644 bindings/python/__init__.py create mode 100644 bindings/python/__main__.py create mode 100644 bindings/python/bergamot.cpp create mode 100644 bindings/python/cmds.py create mode 100644 bindings/python/repository.py create mode 100644 bindings/python/typing_utils.py create mode 100644 bindings/python/utils.py create mode 100644 doc/python.rst create mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..ab836ddbc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md LICENSE.txt + diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt new file mode 100644 index 000000000..8e5f91a37 --- /dev/null +++ b/bindings/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(python) diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt new file mode 100644 index 000000000..4a51b68fd --- /dev/null +++ b/bindings/python/CMakeLists.txt @@ -0,0 +1,9 @@ +find_package(Python COMPONENTS Interpreter Development REQUIRED) + +message("Using Python: " ${Python_EXECUTABLE}) + +# pybind11 method: +pybind11_add_module(_bergamot SHARED bergamot.cpp) +target_link_libraries(_bergamot PUBLIC pybind11::module pybind11::headers bergamot-translator) +target_include_directories(_bergamot PUBLIC ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src + ${CMAKE_BINARY_DIR}/3rd_party/bergamot-translator/src) diff --git a/bindings/python/__init__.py b/bindings/python/__init__.py new file mode 100644 index 000000000..d56b767f2 --- /dev/null +++ b/bindings/python/__init__.py @@ -0,0 +1,23 @@ +import typing + +try: + from ._bergamot import * # type: ignore + from .repository import Aggregator, TranslateLocallyLike + + REPOSITORY = Aggregator( + [ + TranslateLocallyLike( + "browsermt", "https://translatelocally.com/models.json" + ), + TranslateLocallyLike( + "opus", "https://object.pouta.csc.fi/OPUS-MT-models/app/models.json" + ), + ] + ) + """ + REPOSITORY is a global object that aggregates multiple model-providers to + provide a (model-provider: str, model-code: str) based query mechanism to + get models. + """ +except ImportError: + raise diff --git a/bindings/python/__main__.py b/bindings/python/__main__.py new file mode 100644 index 000000000..35014c099 --- /dev/null +++ b/bindings/python/__main__.py @@ -0,0 +1,20 @@ +import argparse +import sys +from argparse import ArgumentParser + +from .cmds import CMDS, make_parser + + +def main() -> None: + parser = make_parser() + args = parser.parse_args() + + if args.action in CMDS: + CMDS[args.action].execute(args) + else: + parser.print_help(sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/bindings/python/bergamot.cpp b/bindings/python/bergamot.cpp new file mode 100644 index 000000000..1f3061824 --- /dev/null +++ b/bindings/python/bergamot.cpp @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace py = pybind11; + +using marian::bergamot::AnnotatedText; +using marian::bergamot::ByteRange; +using marian::bergamot::ConcatStrategy; +using marian::bergamot::Response; +using marian::bergamot::ResponseOptions; +using Service = marian::bergamot::AsyncService; +using _Model = marian::bergamot::TranslationModel; +using Model = std::shared_ptr<_Model>; +using Alignment = std::vector>; +using Alignments = std::vector; + +PYBIND11_MAKE_OPAQUE(std::vector); +PYBIND11_MAKE_OPAQUE(std::vector); +PYBIND11_MAKE_OPAQUE(Alignments); + +class ServicePyAdapter { +public: + ServicePyAdapter(const Service::Config &config) + : service_(make_service(config)) {} + + std::shared_ptr<_Model> modelFromConfig(const std::string &config) { + auto parsedConfig = marian::bergamot::parseOptionsFromString(config); + return service_.createCompatibleModel(parsedConfig); + } + + std::shared_ptr<_Model> modelFromConfigPath(const std::string &configPath) { + auto config = marian::bergamot::parseOptionsFromFilePath(configPath); + return service_.createCompatibleModel(config); + } + + std::vector translate(Model model, std::vector &inputs, + const ResponseOptions &options) { + py::scoped_ostream_redirect outstream( + std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output + ); + py::scoped_ostream_redirect errstream( + std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output + ); + + py::call_guard gil_guard; + + // Prepare promises, save respective futures. Have callback's in async set + // value to the promises. + std::vector> futures; + std::vector> promises; + promises.resize(inputs.size()); + + for (size_t i = 0; i < inputs.size(); i++) { + auto callback = [&promises, i](Response &&response) { + promises[i].set_value(std::move(response)); + }; + + service_.translate(model, std::move(inputs[i]), std::move(callback), + options); + + futures.push_back(std::move(promises[i].get_future())); + } + + // Wait on all futures to be ready. + std::vector responses; + for (size_t i = 0; i < futures.size(); i++) { + futures[i].wait(); + responses.push_back(std::move(futures[i].get())); + } + + return responses; + } + + std::vector pivot(Model first, Model second, + std::vector &inputs, + const ResponseOptions &options) { + // Prepare promises, save respective futures. Have callback's in async set + // value to the promises. + std::vector> futures; + std::vector> promises; + promises.resize(inputs.size()); + + for (size_t i = 0; i < inputs.size(); i++) { + auto callback = [&promises, i](Response &&response) { + promises[i].set_value(std::move(response)); + }; + + service_.pivot(first, second, std::move(inputs[i]), std::move(callback), + options); + + futures.push_back(std::move(promises[i].get_future())); + } + + // Wait on all futures to be ready. + std::vector responses; + for (size_t i = 0; i < futures.size(); i++) { + futures[i].wait(); + responses.push_back(std::move(futures[i].get())); + } + + return responses; + } + +private /*functions*/: + static Service make_service(const Service::Config &config) { + py::scoped_ostream_redirect outstream( + std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output + ); + py::scoped_ostream_redirect errstream( + std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output + ); + + py::call_guard gil_guard; + + return Service(config); + } + +private /*data*/: + Service service_; +}; + +PYBIND11_MODULE(_bergamot, m) { + m.doc() = "Bergamot pybind11 bindings"; + m.attr("__version__") = marian::bergamot::bergamotBuildVersion(); + py::class_(m, "ByteRange") + .def(py::init<>()) + .def_readonly("begin", &ByteRange::begin) + .def_readonly("end", &ByteRange::end) + .def("__repr__", [](const ByteRange &range) { + return "{" + std::to_string(range.begin) + ", " + + std::to_string(range.end) + "}"; + }); + + py::class_(m, "AnnotatedText") + .def(py::init<>()) + .def("numWords", &AnnotatedText::numWords) + .def("numSentences", &AnnotatedText::numSentences) + .def("word", + [](const AnnotatedText &annotatedText, size_t sentenceIdx, + size_t wordIdx) -> std::string { + auto view = annotatedText.word(sentenceIdx, wordIdx); + return std::string(view.data(), view.size()); + }) + .def("sentence", + [](const AnnotatedText &annotatedText, + size_t sentenceIdx) -> std::string { + auto view = annotatedText.sentence(sentenceIdx); + return std::string(view.data(), view.size()); + }) + .def("wordAsByteRange", &AnnotatedText::wordAsByteRange) + .def("sentenceAsByteRange", &AnnotatedText::sentenceAsByteRange) + .def_readonly("text", &AnnotatedText::text); + + py::class_(m, "Response") + .def(py::init<>()) + .def_readonly("source", &Response::source) + .def_readonly("target", &Response::target) + .def_readonly("alignments", &Response::alignments); + + py::bind_vector>(m, "VectorString"); + py::bind_vector>(m, "VectorResponse"); + + py::enum_(m, "ConcatStrategy") + .value("FAITHFUL", ConcatStrategy::FAITHFUL) + .value("SPACE", ConcatStrategy::SPACE) + .export_values(); + + py::class_(m, "ResponseOptions") + .def(py::init<>([](bool qualityScores, bool alignment, bool HTML, + bool sentenceMappings, ConcatStrategy strategy) { + return ResponseOptions{qualityScores, alignment, HTML, + sentenceMappings, strategy}; + }), + py::arg("qualityScores") = true, py::arg("alignment") = false, + py::arg("HTML") = false, py::arg("sentenceMappings") = true, + py::arg("concatStrategy") = ConcatStrategy::FAITHFUL) + .def_readwrite("qualityScores", &ResponseOptions::qualityScores) + .def_readwrite("HTML", &ResponseOptions::HTML) + .def_readwrite("alignment", &ResponseOptions::alignment) + .def_readwrite("concatStrategy", &ResponseOptions::concatStrategy) + .def_readwrite("sentenceMappings", &ResponseOptions::sentenceMappings); + + py::class_(m, "Service") + .def(py::init()) + .def("modelFromConfig", &ServicePyAdapter::modelFromConfig) + .def("modelFromConfigPath", &ServicePyAdapter::modelFromConfigPath) + .def("translate", &ServicePyAdapter::translate) + .def("pivot", &ServicePyAdapter::pivot); + + py::class_(m, "ServiceConfig") + .def(py::init<>([](size_t numWorkers, bool cacheEnabled, size_t cacheSize, + size_t cacheMutexBuckets, std::string logging) { + Service::Config config; + config.numWorkers = numWorkers; + config.cacheEnabled = cacheEnabled; + config.cacheSize = cacheSize; + config.cacheMutexBuckets = cacheMutexBuckets; + config.logger.level = logging; + return config; + }), + py::arg("numWorkers") = 1, py::arg("cacheEnabled") = false, + py::arg("cacheSize") = 20000, py::arg("cacheMutexBuckets") = 1, + py::arg("logLevel") = "off") + .def_readwrite("numWorkers", &Service::Config::numWorkers) + .def_readwrite("cacheEnabled", &Service::Config::cacheEnabled) + .def_readwrite("cacheSize", &Service::Config::cacheSize) + .def_readwrite("cacheMutexBuckets", &Service::Config::cacheMutexBuckets); + + py::class_<_Model, std::shared_ptr<_Model>>(m, "TranslationModel"); +} diff --git a/bindings/python/cmds.py b/bindings/python/cmds.py new file mode 100644 index 000000000..5949adaca --- /dev/null +++ b/bindings/python/cmds.py @@ -0,0 +1,177 @@ +import argparse +import sys +from collections import Counter, defaultdict + +from . import REPOSITORY, ResponseOptions, Service, ServiceConfig, VectorString + +CMDS = {} + + +def _register_cmd(cmd: str): + """ + Convenience decorator function, which populates the dictionary above with + commands created in a declarative fashion. + """ + + def __inner(cls): + CMDS[cmd] = cls + return cls + + return __inner + + +@_register_cmd("translate") +class Translate: + @staticmethod + def embed_subparser(key: str, subparsers: argparse._SubParsersAction): + translate = subparsers.add_parser( + key, + description="translate using a given model. Multiple models mean pivoting", + ) + + translate.add_argument( + "-m", + "--model", + type=str, + nargs="+", + help="Path to model file(s) to use in forward or pivot translation", + required=True, + ) + + translate.add_argument( + "-r", + "--repository", + type=str, + help="Repository to download model from", + choices=REPOSITORY.available(), + default="browsermt", + ) + + translate.add_argument( + "--num-workers", + type=int, + help="Number of worker threads to use to translate", + default=4, + ) + + translate.add_argument( + "--log-level", + type=str, + default="off", + help="Set verbosity level of logging: trace, debug, info, warn, err(or), critical, off", + ) + + # Tweak response-options for quick HTML in out via commandline + options = translate.add_argument_group("response-options") + options.add_argument("--html", type=bool, default=False) + options.add_argument("--alignment", type=bool, default=False) + options.add_argument("--quality-scores", type=bool, default=False) + + @staticmethod + def execute(args: argparse.Namespace): + # Build service + + config = ServiceConfig(numWorkers=args.num_workers, logLevel=args.log_level) + service = Service(config) + + models = [ + service.modelFromConfigPath( + REPOSITORY.modelConfigPath(args.repository, model) + ) + for model in args.model + ] + + # Configure a few options which require how a Response is constructed + options = ResponseOptions( + alignment=args.alignment, qualityScores=args.quality_scores, HTML=args.html + ) + + source = sys.stdin.read() + responses = None + if len(models) == 1: + [model] = models + responses = service.translate(model, VectorString([source]), options) + else: + [first, second] = models + responses = service.pivot(first, second, VectorString([source]), options) + + for response in responses: + print(response.target.text, end="") + + +@_register_cmd("download") +class Download: + @staticmethod + def embed_subparser(key: str, subparsers: argparse._SubParsersAction): + download = subparsers.add_parser( + key, description="Download models from the web." + ) + + download.add_argument( + "-m", + "--model", + type=str, + required=False, + default=None, + help="Fetch model with given code. Use ls to list available models. Optional, if none supplied all models are downloaded.", + ) + + download.add_argument( + "-r", + "--repository", + type=str, + help="Repository to download model from", + choices=REPOSITORY.available(), + default="browsermt", + ) + + @staticmethod + def execute(args: argparse.Namespace): + if args.model is not None: + REPOSITORY.download(args.repository, args.model) + else: + for model in REPOSITORY.models(args.repository, filter_downloaded=False): + REPOSITORY.download(args.repository, model) + + +@_register_cmd("ls") +class List: + @staticmethod + def embed_subparser(key: str, subparsers: argparse._SubParsersAction): + ls = subparsers.add_parser(key, description="List available models.") + ls.add_argument( + "-r", + "--repository", + type=str, + help="Repository to list models from", + choices=REPOSITORY.available(), + default="browsermt", + ) + + @staticmethod + def execute(args: argparse.Namespace): + print("Available models: ") + for counter, identifier in enumerate( + REPOSITORY.models(args.repository, filter_downloaded=True), 1 + ): + model = REPOSITORY.model(args.repository, identifier) + print( + " {}.".format(str(counter).rjust(4)), + model["code"], + model["name"], + ) + print() + + +def make_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser("bergamot") + subparsers = parser.add_subparsers( + title="actions", + description="The following actions are available through the bergamot package", + help="To obtain help on how to run these actions supply -h.", + dest="action", + ) + + for key, cls in CMDS.items(): + cls.embed_subparser(key, subparsers) + return parser diff --git a/bindings/python/repository.py b/bindings/python/repository.py new file mode 100644 index 000000000..7f89035c8 --- /dev/null +++ b/bindings/python/repository.py @@ -0,0 +1,185 @@ +import json +import os +import tarfile +import typing as t +from abc import ABC, abstractmethod +from functools import partial +from urllib.parse import urlparse + +import requests +from appdirs import AppDirs + +from .typing_utils import URL, PathLike +from .utils import download_resource, patch_marian_for_bergamot + +APP = "bergamot" + + +class Repository(ABC): + """ + An interface for several repositories. Intended to enable interchangable + use of translateLocally and Mozilla repositories for usage through python. + """ + + @property + @abstractmethod + def name(self): + pass + + @abstractmethod + def update(self): + """Updates the model list""" + pass + + @abstractmethod + def models(self) -> t.List[str]: + """returns identifiers for available models""" + pass + + @abstractmethod + def model(self, model_identifier: str) -> t.Any: + """returns entry for the for available models""" + pass + + @abstractmethod + def modelConfigPath(self, model_identifier: str) -> str: + """returns modelConfigPath for for a given model-identifier""" + pass + + @abstractmethod + def download(self, model_identifier: str): + pass + + +class TranslateLocallyLike(Repository): + """ + This class implements Repository to fetch models from translateLocally. + AppDirs is used to standardize directories and further specialization + happens with translateLocally identifier. + """ + + def __init__(self, name, url): + self.url = url + self._name = name + appDir = AppDirs(APP) + f = lambda *args: os.path.join(*args, self._name) + self.dirs = { + "cache": f(appDir.user_cache_dir), + "config": f(appDir.user_config_dir), + "data": f(appDir.user_data_dir), + "archive": f(appDir.user_data_dir, "archives"), + "models": f(appDir.user_data_dir, "models"), + } + + for directory in self.dirs.values(): + os.makedirs(directory, exist_ok=True) + + self.models_file_path = os.path.join(self.dirs["config"], "models.json") + self.update() + + @property + def name(self) -> str: + return self._name + + def update(self) -> None: + inventory = requests.get(self.url).text + with open(self.models_file_path, "w+") as models_file: + models_file.write(inventory) + self.data = json.loads(inventory) + + # Update inverse lookup. + self.data_by_code = {} + for model in self.data["models"]: + self.data_by_code[model["code"]] = model + + def models(self, filter_downloaded: bool = True) -> t.List[str]: + codes = [] + for model in self.data["models"]: + if filter_downloaded: + fprefix = self._archive_name_without_extension(model["url"]) + model_dir = os.path.join(self.dirs["models"], fprefix) + if os.path.exists(model_dir): + codes.append(model["code"]) + else: + codes.append(model["code"]) + return codes + + def modelConfigPath(self, model_identifier: str) -> str: + model = self.model(model_identifier) + fprefix = self._archive_name_without_extension(model["url"]) + model_dir = os.path.join(self.dirs["models"], fprefix) + return os.path.join(model_dir, "config.bergamot.yml") + + def model(self, model_identifier: str) -> t.Any: + return self.data_by_code[model_identifier] + + def download(self, model_identifier: str): + # Download path + model = self.model(model_identifier) + model_archive = "{}.tar.gz".format(model["shortName"]) + save_location = os.path.join(self.dirs["archive"], model_archive) + download_resource(model["url"], save_location) + + with tarfile.open(save_location) as model_archive: + model_archive.extractall(self.dirs["models"]) + fprefix = self._archive_name_without_extension(model["url"]) + model_dir = os.path.join(self.dirs["models"], fprefix) + symlink = os.path.join(self.dirs["models"], model["code"]) + + print( + "Downloading and extracting {} into ... {}".format( + model["code"], model_dir + ), + end=" ", + ) + + if not os.path.exists(symlink): + os.symlink(model_dir, symlink) + + config_path = os.path.join(symlink, "config.intgemm8bitalpha.yml") + bergamot_config_path = os.path.join(symlink, "config.bergamot.yml") + + # Finally patch so we don't have to reload this again. + patch_marian_for_bergamot(config_path, bergamot_config_path) + + print("Done.") + + def _archive_name_without_extension(self, url: URL): + o = urlparse(url) + fname = os.path.basename(o.path) # something tar.gz. + fname_without_extension = fname.replace(".tar.gz", "") + return fname_without_extension + + +class Aggregator: + def __init__(self, repositories: t.List[Repository]): + self.repositories = {} + for repository in repositories: + if repository.name in self.repositories: + raise ValueError("Duplicate repository found.") + self.repositories[repository.name] = repository + + # Default is self.repostiory + self.default_repository = repositories[0] + + def update(self, name: str) -> None: + self.repositories.get(name, self.default_repository).update() + + def modelConfigPath(self, name: str, code: str) -> PathLike: + return self.repositories.get(name, self.default_repository).modelConfigPath( + code + ) + + def models(self, name: str, filter_downloaded: bool = True) -> t.List[str]: + return self.repositories.get(name, self.default_repository).models() + + def model(self, name: str, model_identifier: str) -> t.Any: + return self.repositories.get(name, self.default_repository).model( + model_identifier + ) + + def available(self): + return list(self.repositories.keys()) + + def download(self, name: str, model_identifier: str) -> None: + self.repositories.get(name, self.default_repository).download(model_identifier) diff --git a/bindings/python/typing_utils.py b/bindings/python/typing_utils.py new file mode 100644 index 000000000..3e1682cff --- /dev/null +++ b/bindings/python/typing_utils.py @@ -0,0 +1,5 @@ +import pathlib +import typing as t + +PathLike = t.TypeVar("PathLike", str, pathlib.Path) +URL = str diff --git a/bindings/python/utils.py b/bindings/python/utils.py new file mode 100644 index 000000000..3164c171c --- /dev/null +++ b/bindings/python/utils.py @@ -0,0 +1,52 @@ +import os + +import requests +import yaml + +from .typing_utils import URL, PathLike + + +def download_resource(url: URL, save_location: PathLike, force_download=False): + """ + Downloads a resource from url into save_location, overwrites only if + force_download is true. + """ + if force_download or not os.path.exists(save_location): + response = requests.get(url, stream=True) + # Throw an error for bad status codes + response.raise_for_status() + with open(save_location, "wb") as handle: + for block in response.iter_content(1024): + handle.write(block) + + +def patch_marian_for_bergamot( + marian_config_path: PathLike, bergamot_config_path: PathLike, quality: bool = False +): + """ + Accepts path to a config-file from marian-training and followign + quantization and adjusts parameters for use in bergamot. + """ + # Load marian_config_path + data = None + with open(marian_config_path) as fp: + data = yaml.load(fp, Loader=yaml.FullLoader) + + # Update a few entries. Things here are hardcode. + data.update( + { + "ssplit-prefix-file": "", + "ssplit-mode": "paragraph", + "max-length-break": 128, + "mini-batch-words": 1024, + "workspace": 128, # shipped models use big workspaces. We'd prefer to keep it low. + "alignment": "soft", + } + ) + + if quality: + data.update({"quality": quality, "skip-cost": False}) + + # Write-out. + with open(bergamot_config_path, "w") as output_file: + print(yaml.dump(data, sort_keys=False), file=output_file) diff --git a/doc/python.rst b/doc/python.rst new file mode 100644 index 000000000..4c8574dee --- /dev/null +++ b/doc/python.rst @@ -0,0 +1,87 @@ +.. Bergamot documentation master file, created by + sphinx-quickstart on Tue Jan 18 17:26:57 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +bergamot-translator +==================================== + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + +This document describes python bindings from bergamot-translator and a +batteries included python package supplied for easy use. The library also +provides entry point via a command-line making it easier for the average user +to get started. + +As bergamot-translator is built on top of marian, the python API should also +work as python bindings for marian trained models, if they need to be +integrated into python code-bases. + +*Disclaimer*: The package is still in early stages and unstable. Functions and +classes might move around quite fast. Use at your own risk. + +Command Line Interface +---------------------- + +.. argparse:: + :ref: bergamot.cmds.make_parser + :prog: bergamot + + +Module Documentation +-------------------- + +.. automodule:: bergamot + :members: + :undoc-members: + +bergamot-translator ++++++++++++++++++++ + +The following components are exported from C++ via python-bindings and form +library primitives that can be used to build translation workflows. + +.. autoclass:: bergamot.ServiceConfig + :members: + :undoc-members: + +.. autoclass:: bergamot.Service + :members: + :undoc-members: + + +.. autoclass:: bergamot.TranslationModel + :members: + :undoc-members: + +.. autoclass:: bergamot.ResponseOptions + :members: + :undoc-members: + +Model Inventory ++++++++++++++++ + +.. autoclass:: bergamot.repository.Repository + :members: + :undoc-members: + +.. autoclass:: bergamot.repository.TranslateLocallyLike + :members: + :undoc-members: + +Utilities ++++++++++ + +.. autofunction:: bergamot.utils.patch_marian_for_bergamot + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..a8a593ba2 --- /dev/null +++ b/setup.py @@ -0,0 +1,206 @@ +import io +import os +import re +import subprocess +import sys + +from setuptools import Command, Extension, find_packages, setup +from setuptools.command.build_ext import build_ext +from setuptools.command.build_py import build_py as _build_py + +# Convert distutils Windows platform specifiers to CMake -A arguments +PLAT_TO_CMAKE = { + "win32": "Win32", + "win-amd64": "x64", + "win-arm32": "ARM", + "win-arm64": "ARM64", +} + +# A CMakeExtension needs a sourcedir instead of a file list. +# The name must be the _single_ output extension from the CMake build. +# If you need multiple extensions, see scikit-build. +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=""): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + + # required for auto-detection & inclusion of auxiliary "native" libs + if not extdir.endswith(os.path.sep): + extdir += os.path.sep + + debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug + cfg = "Debug" if debug else "Release" + + # CMake lets you override the generator - we need to check this. + # Can be set with Conda-Build, for example. + cmake_generator = os.environ.get("CMAKE_GENERATOR", "") + build_arch = os.environ.get("BUILD_ARCH", "native") + + # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON + # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code + # from Python. + cmake_args = [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", + f"-DPYTHON_EXECUTABLE={sys.executable}", + f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm + f"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + f"-DCMAKE_C_COMPILER_LAUNCHER=ccache", + f"-DCOMPILE_PYTHON=ON", + f"-DSSPLIT_USE_INTERNAL_PCRE2=ON", + f"-DBUILD_ARCH={build_arch}", + ] + build_args = ["-t", "_bergamot"] + # Adding CMake arguments set as environment variable + # (needed e.g. to build for ARM OSx on conda-forge) + if "CMAKE_ARGS" in os.environ: + cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] + + # In this example, we pass in the version to C++. You might not need to. + cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"] + + if self.compiler.compiler_type != "msvc": + # Using Ninja-build since it a) is available as a wheel and b) + # multithreads automatically. MSVC would require all variables be + # exported for Ninja to pick it up, which is a little tricky to do. + # Users can override the generator with CMAKE_GENERATOR in CMake + # 3.15+. + if not cmake_generator: + try: + import ninja # noqa: F401 + + cmake_args += ["-GNinja"] + except ImportError: + pass + + else: + + # Single config generators are handled "normally" + single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) + + # CMake allows an arch-in-generator style for backward compatibility + contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) + + # Specify the arch if using MSVC generator, but only if it doesn't + # contain a backward-compatibility arch spec already in the + # generator name. + if not single_config and not contains_arch: + cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] + + # Multi-config generators have a different way to specify configs + if not single_config: + cmake_args += [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}" + ] + build_args += ["--config", cfg] + + if sys.platform.startswith("darwin"): + # Cross-compile support for macOS - respect ARCHFLAGS if set + archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", "")) + if archs: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level + # across all generators. + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + # self.parallel is a Python 3 only way to set parallel jobs by hand + # using -j in the build_ext call, not supported by pip or PyPA-build. + if hasattr(self, "parallel") and self.parallel: + # CMake 3.12+ only. + build_args += [f"-j{self.parallel}"] + + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + + print("cmake", ext.sourcedir, " ".join(cmake_args)) + + subprocess.check_call( + ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp + ) + subprocess.check_call( + ["cmake", "--build", "."] + build_args, cwd=self.build_temp + ) + + +here = os.path.abspath(os.path.dirname(__file__)) + +# Import the README and use it as the long-description. +# Note: this will only work if 'README.md' is present in your MANIFEST.in file! +with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f: + long_description = "\n" + f.read() + +version = None +with open(os.path.join(here, "lemonade.version")) as f: + version = f.read().strip() + + +class UploadCommand(Command): + """Support setup.py upload.""" + + description = "Build and publish the package." + user_options = [] + + @staticmethod + def status(s): + """Prints things in bold.""" + print("\033[1m{0}\033[0m".format(s)) + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + try: + self.status("Removing previous builds…") + rmtree(os.path.join(here, "dist")) + except OSError: + pass + + self.status("Building Source and Wheel (universal) distribution…") + os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable)) + + self.status("Pushing git tags…") + os.system("git push --tags") + + self.status("Uploading the package to PyPI via Twine…") + os.system("twine upload dist/*") + + sys.exit() + + +class build_py(_build_py): + def run(self): + self.run_command("build_ext") + return super().run() + + +# The information here can also be placed in setup.cfg - better separation of +# logic and declaration, and simpler if you include description/version in a file. +setup( + name="bergamot", + version=version, + author="Jerin Philip", + author_email="jerinphilip@live.in", + description="Bergamot translator python binding.", + long_description="", + ext_modules=[CMakeExtension("bergamot/_bergamot")], + cmdclass={"build_py": build_py, "build_ext": CMakeBuild}, + zip_safe=False, + extras_require={"test": ["pytest>=6.0"]}, + license_files=("LICENSE.txt",), + python_requires=">=3.6", + packages=["bergamot"], + package_dir={"bergamot": "bindings/python"}, + install_requires=["requests", "pyyaml", "appdirs"], + entry_points={ + "console_scripts": [ + "bergamot = bergamot.__main__:main", + ], + }, +) From 54066c54533f873cb34370ca40a50a3831a121b7 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 18:28:31 +0000 Subject: [PATCH 02/20] Add python yml file --- .github/workflows/python.yml | 281 +++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 .github/workflows/python.yml diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 000000000..653964a8a --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,281 @@ +name: "Python Bindings" +'on': + push: + branches: + - main + tags: + - "v*.*.*" + pull_request: + branches: + - '**' +env: + qt_version: "6.2.1" # only used by build-macos + ccache_basedir: ${{ github.workspace }} + ccache_dir: "${{ github.workspace }}/.ccache" + ccache_compilercheck: content + ccache_compress: 'true' + ccache_compresslevel: 9 + ccache_maxsize: 200M + ccache_cmake: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache + +jobs: + python-ubuntu: + strategy: + fail-fast: false + matrix: + include: + - name: "Ubuntu 18.04 / py3.6" + os: "ubuntu-18.04" + python-version: "3.6" + - name: "Ubuntu 18.04 / py3.7" + os: "ubuntu-18.04" + python-version: "3.7" + - name: "Ubuntu 20.04 / py3.8" + os: "ubuntu-20.04" + python-version: "3.8" + - name: "Ubuntu 20.04 / py3.9" + os: "ubuntu-20.04" + python-version: "3.9" + - name: "Ubuntu 20.04 / py3.10" + os: "ubuntu-20.04" + python-version: "3.10" + + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + + - name: Install Dependencies + run: |- + sudo apt-get update + sudo apt-get install -y \ + ccache libprotobuf-dev protobuf-compiler \ + python3-setuptools python3-pybind11 + + - name: Install MKL + run: |- + wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - + sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 + + - name: Generate ccache_vars for ccache based on machine + shell: bash + id: ccache_vars + run: |- + echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})" + echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')" + + - name: Cache-op for build-cache through ccache + uses: actions/cache@v2 + with: + path: ${{ env.ccache_dir }} + key: ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }} + restore-keys: |- + ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }} + ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }} + ccache-${{ matrix.name }} + - name: ccache environment setup + run: |- + echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV + echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV + echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV + echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV + + - name: ccache prolog + run: |- + ccache -s # Print current cache stats + ccache -z # Zero cache entry + - name: setup.py + run: |- + python3 -m pip install wheel + BUILD_ARCH=core-avx-i python3 setup.py bdist_wheel --universal + + # We're happy with just compile for the moment, so cache gets some seeding. + - name: Install onto root python lib + run: |- + python3 -m pip install --ignore-installed dist/bergamot-*.whl + + - name: Fetch models from translateLocally repository. + run: |- + python3 -m bergamot download -m en-de-tiny + python3 -m bergamot download -m de-en-tiny + python3 -m bergamot ls + + - name: Fetch models from opus repository. + run: |- + python3 -m bergamot download -m eng-fin-tiny -r opus + python3 -m bergamot ls -r opus + + - name: Run the sample python script shipped with module + run: |- + python3 -m bergamot translate --model en-de-tiny <<< "Hello World" + python3 -m bergamot translate --model en-de-tiny de-en-tiny <<< "Hello World" + python3 -m bergamot translate --model eng-fin-tiny --repository opus <<< "Hello World" + + - name: ccache epilog + run: 'ccache -s # Print current cache stats' + + - uses: actions/upload-artifact@v2 + with: + path: ${{github.workspace}}/dist/bergamot-*.whl + + + python-macos: + name: "MacOS 10.15 / py3.10" + runs-on: "macos-10.15" + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + - name: Install Dependencies + run: |- + brew update + brew install openblas protobuf ccache boost pybind11 + brew install coreutils findutils libarchive + + - name: Generate ccache_vars for ccache based on machine + shell: bash + id: ccache_vars + run: |- + echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})" + echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')" + - name: Cache-op for build-cache through ccache + uses: actions/cache@v2 + with: + path: ${{ env.ccache_dir }} + key: ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }} + restore-keys: |- + ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }} + ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }} + ccache-${{ job.id }} + + - name: ccache environment setup + run: |- + echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV + echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV + echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV + echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV + + - name: ccache prolog + run: |- + ccache -s # Print current cache stats + ccache -z # Zero cache entry + + - name: Apply required patches + run: |- + patch -p1 < patches/01-marian-fstream-for-macos.patch + + # Appears to be required per GitHub CI; + - name: Set MACOSX DEPLOYMENT TARGET via environment variable + run: |- + echo "MACOSX_DEPLOYMENT_TARGET=10.15" >> $GITHUB_ENV + + - name: setup.py + run: |- + python3 -m pip install --upgrade packaging wheel + BUILD_ARCH=core-avx-i python3 setup.py bdist_wheel --universal + + # We're happy with just compile for the moment, so cache gets some seeding. + - name: Install onto root python lib + run: |- + python3 -m pip install dist/bergamot-*.whl + + - name: Fetch models from translateLocally repository. + run: |- + python3 -m bergamot download -m en-de-tiny + python3 -m bergamot download -m de-en-tiny + + - name: Fetch models from opus repository. + run: |- + python3 -m bergamot download -m eng-fin-tiny -r opus + python3 -m bergamot ls -r opus + + - name: Run the sample python script shipped with module + run: |- + python3 -m bergamot translate --model en-de-tiny <<< "Hello World" + python3 -m bergamot translate --model en-de-tiny de-en-tiny <<< "Hello World" + python3 -m bergamot translate --model eng-fin-tiny --repository opus <<< "Hello World" + + - name: ccache epilog + run: 'ccache -s # Print current cache stats' + + - uses: actions/upload-artifact@v2 + with: + path: ${{github.workspace}}/dist/bergamot-*.whl + + # Try to upload a release using https://github.com/marvinpinto/actions/issues/177#issuecomment-917605585 as a model + release-latest: + name: Release Latest Build + runs-on: ubuntu-latest + needs: [python-ubuntu, python-macos] + if: github.ref == 'refs/heads/main' + steps: + - name: Download artifacts + uses: actions/download-artifact@v2 + + - name: Update GitHub prerelease + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + automatic_release_tag: latest + prerelease: true + title: "Latest Build" + files: | + ${{github.workspace}}/artifact/*.whl + + release-version: + name: Release version + runs-on: ubuntu-latest + needs: [python-ubuntu, python-macos] + permissions: + contents: "write" + packages: "write" + pull-requests: "read" + if: startsWith(github.ref, 'refs/tags/v') + steps: + - name: Download artifacts + uses: actions/download-artifact@v2 + + - name: Update GitHub release + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + automatic_release_tag: ${{ github.ref_name }} + prerelease: false + title: "${{ github.ref_name }}" + files: | + ${{github.workspace}}/artifact/*.whl + + python-checks: + name: "formatting and typechecks" + runs-on: "ubuntu-latest" + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + - name: Install Dependencies + run: |- + python3 -m pip install black isort pytype + - name: "Formatting checks: black, isort" + run: | + python3 -m black --check bindings/python/ setup.py + python3 -m isort --profile black --diff --check bindings/python setup.py + - name: "Static typing checks: pytype" + run: |- + python3 -m pytype bindings/python From 73e1ba3706393c3abc3951995230488701b013e1 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 18:54:49 +0000 Subject: [PATCH 03/20] pybind11, CmakeLists and buildability --- .gitmodules | 3 +++ 3rd_party/CMakeLists.txt | 4 ++++ 3rd_party/pybind11 | 1 + CMakeLists.txt | 4 ++++ setup.py | 2 +- 5 files changed, 13 insertions(+), 1 deletion(-) create mode 160000 3rd_party/pybind11 diff --git a/.gitmodules b/.gitmodules index 8aa101494..cfedde289 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "bergamot-translator-tests"] path = bergamot-translator-tests url = https://github.com/browsermt/bergamot-translator-tests +[submodule "3rd_party/pybind11"] + path = 3rd_party/pybind11 + url = https://github.com/pybind/pybind11.git diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index b84a37b80..72a49e83a 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -23,3 +23,7 @@ get_directory_property(CMAKE_C_FLAGS DIRECTORY marian-dev DEFINITION CMAKE_C_FLA get_directory_property(CMAKE_CXX_FLAGS DIRECTORY marian-dev DEFINITION CMAKE_CXX_FLAGS) set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} PARENT_SCOPE) set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} PARENT_SCOPE) + +if(COMPILE_PYTHON) + add_subdirectory(pybind11) +endif(COMPILE_PYTHON) diff --git a/3rd_party/pybind11 b/3rd_party/pybind11 new file mode 160000 index 000000000..9ec1128c7 --- /dev/null +++ b/3rd_party/pybind11 @@ -0,0 +1 @@ +Subproject commit 9ec1128c7aac3d069a4ec2bd1dfc7f57c6526d1c diff --git a/CMakeLists.txt b/CMakeLists.txt index f8e50d4ac..bc698e5f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,3 +131,7 @@ else() add_subdirectory(app) endif(COMPILE_WASM) +if(COMPILE_PYTHON) + add_subdirectory(bindings/python) +endif(COMPILE_PYTHON) + diff --git a/setup.py b/setup.py index a8a593ba2..60cc086bd 100644 --- a/setup.py +++ b/setup.py @@ -134,7 +134,7 @@ def build_extension(self, ext): long_description = "\n" + f.read() version = None -with open(os.path.join(here, "lemonade.version")) as f: +with open(os.path.join(here, "BERGAMOT_VERSION")) as f: version = f.read().strip() From ec62842e73c506e68d6fa1bd19d71dbdef7aacc3 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 19:07:11 +0000 Subject: [PATCH 04/20] MacOS patches --- patches/01-marian-fstream-for-macos.patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 patches/01-marian-fstream-for-macos.patch diff --git a/patches/01-marian-fstream-for-macos.patch b/patches/01-marian-fstream-for-macos.patch new file mode 100644 index 000000000..49fd8bddc --- /dev/null +++ b/patches/01-marian-fstream-for-macos.patch @@ -0,0 +1,13 @@ +diff --git a/src/3rd_party/zstr/strict_fstream.hpp b/src/3rd_party/zstr/strict_fstream.hpp +index 7b1173931df977e69021f3995fa064a492f89d38..948e91eaf99b6b29ce41cf793fba6717f3b5f5b5 100644 +--- a/3rd_party/bergamot-translator/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp ++++ b/3rd_party/bergamot-translator/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp +@@ -27,7 +27,7 @@ static std::string strerror() + { + buff = "Unknown error"; + } +-#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) && ! _GNU_SOURCE ++#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) + // XSI-compliant strerror_r() + if (strerror_r(errno, &buff[0], buff.size()) != 0) + { From 53bf43454f19e30e17ca00f0fb296d33d6f73b67 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 19:14:45 +0000 Subject: [PATCH 05/20] Adapting patch to skip one repository --- patches/01-marian-fstream-for-macos.patch | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/patches/01-marian-fstream-for-macos.patch b/patches/01-marian-fstream-for-macos.patch index 49fd8bddc..5219227d9 100644 --- a/patches/01-marian-fstream-for-macos.patch +++ b/patches/01-marian-fstream-for-macos.patch @@ -1,7 +1,7 @@ -diff --git a/src/3rd_party/zstr/strict_fstream.hpp b/src/3rd_party/zstr/strict_fstream.hpp +diff --git a/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp b/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp index 7b1173931df977e69021f3995fa064a492f89d38..948e91eaf99b6b29ce41cf793fba6717f3b5f5b5 100644 ---- a/3rd_party/bergamot-translator/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp -+++ b/3rd_party/bergamot-translator/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp +--- a/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp ++++ b/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp @@ -27,7 +27,7 @@ static std::string strerror() { buff = "Unknown error"; From b12561d7be216344dc0db58f7cd4ca78cc483f32 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 19:22:51 +0000 Subject: [PATCH 06/20] Skipping repository in CMakeLists.txt as well --- bindings/python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 4a51b68fd..70b1a2535 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -6,4 +6,4 @@ message("Using Python: " ${Python_EXECUTABLE}) pybind11_add_module(_bergamot SHARED bergamot.cpp) target_link_libraries(_bergamot PUBLIC pybind11::module pybind11::headers bergamot-translator) target_include_directories(_bergamot PUBLIC ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src - ${CMAKE_BINARY_DIR}/3rd_party/bergamot-translator/src) + ${CMAKE_BINARY_DIR}/src) From 49267342a22a96e617c6a65fe224b3c808490149 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 24 Jan 2022 21:16:48 +0000 Subject: [PATCH 07/20] Connecting C++ and Python sphinx (#311) * Remove doc for moving into python * Include python.rst in toc * Use 18.04 cp3.7 wheel * Integrating python documentation --- .github/workflows/doc.yml | 89 ------------------------------------ .github/workflows/python.yml | 86 ++++++++++++++++++++++++++++++++++ doc/conf.py | 79 +++++++++++++++++--------------- doc/index.rst | 1 + doc/python.rst | 4 +- doc/requirements.txt | 1 + 6 files changed, 132 insertions(+), 128 deletions(-) delete mode 100644 .github/workflows/doc.yml diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml deleted file mode 100644 index 3874822b8..000000000 --- a/.github/workflows/doc.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: Documentation - -on: - push: - branches: [ main, ci-sandbox ] - tags: ['v[0-9]+.[0-9]+.[0-9]+'] - pull_request: - branches: [ '**' ] - -jobs: - api-documentation: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - # Runs javascript to extract push events from both tags and branch (only main, due to workflow trigger) - # converts refs/<>/ -> - # eg: - # refs/head/main -> main - # refs/tags/v0.1.0 -> v0.1.0 - # - - name: Extract tag name - id: tag - uses: actions/github-script@0.2.0 - if: ${{ github.event_name == 'push' }} - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const args = context.payload.ref.split("/"); - [refs, category, ...rest] = args; - return rest.join("/"); - - # Patches the BERGAMOT_VERSION file used by sphinx-docs at run time to - # obtain names like 'main' or 'ci-sandbox' to not confuse with version - # based documentation built separately. - - name: Deploy-time patch version - run: | - echo ${{steps.tag.outputs.result }} > BERGAMOT_VERSION - - - name: Set up Doxygen - run: sudo apt-get install -y doxygen - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - - name: Set up dependency cache - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('doc/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Install dependencies - working-directory: ./doc - run: python3 -m pip install -r requirements.txt - - - name: Build documentation - working-directory: ./doc - run: sphinx-build -b html ./ build/ - - - - name: Deploy 🚀 - uses: JamesIves/github-pages-deploy-action@4.1.3 - if: ${{ github.event_name == 'push' && github.repository == 'browsermt/bergamot-translator' }} - with: - repository-name: 'browsermt/docs' - branch: gh-pages # The branch the action should deploy to. - folder: './doc/build/' # The folder the action should deploy. - target-folder: '${{ steps.tag.outputs.result }}' - ssh-key: ${{ secrets.BERGAMOT_SSH_PRIVATE_KEY }} - - # This artifact contains the HTML output of Sphinx only. - # With index.html at the root of the produced zip file. - # For use for maintainers to download the zip and check render of - # documentation while generated at pull-request. - - name: Upload documentation - uses: actions/upload-artifact@v2 - if: ${{ github.event_name == 'pull_request'}} - with: - name: api-docs - path: ./doc/build/ - if-no-files-found: error - diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 653964a8a..ef18b3ffa 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -3,6 +3,7 @@ name: "Python Bindings" push: branches: - main + - ci-sandbox tags: - "v*.*.*" pull_request: @@ -279,3 +280,88 @@ jobs: - name: "Static typing checks: pytype" run: |- python3 -m pytype bindings/python + + docs: + runs-on: ubuntu-18.04 + needs: [python-ubuntu] + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + # Runs javascript to extract push events from both tags and branch (only main, due to workflow trigger) + # converts refs/<>/ -> + # eg: + # refs/head/main -> main + # refs/tags/v0.1.0 -> v0.1.0 + # + - name: Download artifacts + uses: actions/download-artifact@v2 + - name: Extract tag name + id: tag + uses: actions/github-script@0.2.0 + if: ${{ github.event_name == 'push' }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const args = context.payload.ref.split("/"); + [refs, category, ...rest] = args; + return rest.join("/"); + + # Patches the BERGAMOT_VERSION file used by sphinx-docs at run time to + # obtain names like 'main' or 'ci-sandbox' to not confuse with version + # based documentation built separately. + - name: Deploy-time patch version + run: | + echo ${{steps.tag.outputs.result }} > BERGAMOT_VERSION + + - name: Set up Doxygen + run: sudo apt-get install -y doxygen + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + + - name: Set up dependency cache + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('doc/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + working-directory: ./doc + run: | + python3 -m pip install -r requirements.txt + python3 -m pip install ${{github.workspace}}/artifact/bergamot-*-cp37*.whl + + - name: Build documentation + working-directory: ./doc + run: sphinx-build -b html ./ build/ + + + - name: Deploy 🚀 + uses: JamesIves/github-pages-deploy-action@4.1.3 + if: ${{ github.event_name == 'push' && github.repository == 'browsermt/bergamot-translator' }} + with: + repository-name: 'browsermt/docs' + branch: gh-pages # The branch the action should deploy to. + folder: './doc/build/' # The folder the action should deploy. + target-folder: '${{ steps.tag.outputs.result }}' + ssh-key: ${{ secrets.BERGAMOT_SSH_PRIVATE_KEY }} + + # This artifact contains the HTML output of Sphinx only. + # With index.html at the root of the produced zip file. + # For use for maintainers to download the zip and check render of + # documentation while generated at pull-request. + - name: Upload documentation + uses: actions/upload-artifact@v2 + if: ${{ github.event_name == 'pull_request'}} + with: + name: api-docs + path: ./doc/build/ + if-no-files-found: error + diff --git a/doc/conf.py b/doc/conf.py index 8a8f4224c..7c4bfd50c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -14,21 +14,23 @@ import datetime import sys -sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath(".")) # -- Project information ----------------------------------------------------- -project = 'Bergamot Translator' -copyright = '2021, Bergamot Translator Team' -author = 'Bergamot Translator Team' +project = "Bergamot Translator" +copyright = "2021, Bergamot Translator Team" +author = "Bergamot Translator Team" # The full version, including alpha/beta/rc tags # TODO: add GitHub commit hash to the version -version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'BERGAMOT_VERSION') +version_file = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "BERGAMOT_VERSION" +) with open(os.path.abspath(version_file)) as f: version = f.read().strip() -release = version + ' ' + str(datetime.date.today()) +release = version + " " + str(datetime.date.today()) # -- General configuration --------------------------------------------------- @@ -37,24 +39,26 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.mathjax', - 'sphinx.ext.todo', - 'breathe', - 'exhale', - 'recommonmark', + "sphinx.ext.mathjax", + "sphinx.ext.todo", + "breathe", + "exhale", + "recommonmark", + "sphinx.ext.autodoc", + "sphinxarg.ext", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [ - 'build', - 'doxygen', - 'venv', - 'README.md', + "build", + "doxygen", + "venv", + "README.md", ] @@ -63,23 +67,23 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -htmlhelp_basename = 'bergamot-translator' +html_theme = "sphinx_rtd_theme" +htmlhelp_basename = "bergamot-translator" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ['css/custom.css'] +html_static_path = ["_static"] +html_css_files = ["css/custom.css"] # The base URL which points to the root of the HTML documentation -html_baseurl = 'http://jerinphilip.github.io/bergamot-translator' +html_baseurl = "http://jerinphilip.github.io/bergamot-translator" # -- Extension configuration ------------------------------------------------- -breathe_projects = { 'bergamot-translator': './doxygen/xml' } -breathe_default_project = 'bergamot-translator' +breathe_projects = {"bergamot-translator": "./doxygen/xml"} +breathe_default_project = "bergamot-translator" doxygen_config = """ INPUT = ../src ../app @@ -94,27 +98,28 @@ """ exhale_args = { - 'containmentFolder' : './api', - 'rootFileName' : 'library_index.rst', - 'rootFileTitle' : 'Library API', - 'doxygenStripFromPath' : '..', - 'createTreeView' : True, - 'exhaleExecutesDoxygen' : True, - 'exhaleDoxygenStdin' : doxygen_config.strip(), + "containmentFolder": "./api", + "rootFileName": "library_index.rst", + "rootFileTitle": "Library API", + "doxygenStripFromPath": "..", + "createTreeView": True, + "exhaleExecutesDoxygen": True, + "exhaleDoxygenStdin": doxygen_config.strip(), } -primary_domain = 'cpp' -highlight_language = 'cpp' +primary_domain = "cpp" +highlight_language = "cpp" # A trick to include markdown files from outside the source directory using # 'mdinclude'. Warning: all other markdown files not included via 'mdinclude' # will be rendered using recommonmark as recommended by Sphinx from m2r import MdInclude + def setup(app): # from m2r to make `mdinclude` work - app.add_config_value('no_underscore_emphasis', False, 'env') - app.add_config_value('m2r_parse_relative_links', False, 'env') - app.add_config_value('m2r_anonymous_references', False, 'env') - app.add_config_value('m2r_disable_inline_math', False, 'env') - app.add_directive('mdinclude', MdInclude) + app.add_config_value("no_underscore_emphasis", False, "env") + app.add_config_value("m2r_parse_relative_links", False, "env") + app.add_config_value("m2r_anonymous_references", False, "env") + app.add_config_value("m2r_disable_inline_math", False, "env") + app.add_directive("mdinclude", MdInclude) diff --git a/doc/index.rst b/doc/index.rst index 5be3857a3..54dc1e8dc 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,6 +17,7 @@ This is developer documentation. marian-integration wasm-example api/library_index + python diff --git a/doc/python.rst b/doc/python.rst index 4c8574dee..0426f349f 100644 --- a/doc/python.rst +++ b/doc/python.rst @@ -3,8 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -bergamot-translator -==================================== +Python +======= .. toctree:: :maxdepth: 3 diff --git a/doc/requirements.txt b/doc/requirements.txt index 28e6e70ca..d95cc684c 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -5,3 +5,4 @@ sphinx_rtd_theme mistune<2.0.0 recommonmark m2r +sphinx-argparse From 6e6739a85da05b4c904f207cd533e631b2890ed5 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 14:54:21 +0000 Subject: [PATCH 08/20] Adding URL with github link to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 60cc086bd..a6905b357 100644 --- a/setup.py +++ b/setup.py @@ -187,6 +187,7 @@ def run(self): version=version, author="Jerin Philip", author_email="jerinphilip@live.in", + url="https://github.com/browsermt/bergamot-translator/", description="Bergamot translator python binding.", long_description="", ext_modules=[CMakeExtension("bergamot/_bergamot")], From c153aa9c4c23e29d2fb9e1abd2a0742c5806e6bc Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 14:56:50 +0000 Subject: [PATCH 09/20] setup.py LICENSE.txt to LICENSE --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a6905b357..5b8f76c07 100644 --- a/setup.py +++ b/setup.py @@ -194,7 +194,7 @@ def run(self): cmdclass={"build_py": build_py, "build_ext": CMakeBuild}, zip_safe=False, extras_require={"test": ["pytest>=6.0"]}, - license_files=("LICENSE.txt",), + license_files=("LICENSE",), python_requires=">=3.6", packages=["bergamot"], package_dir={"bergamot": "bindings/python"}, From da699f327177d1663a8a6a8b8ac9369d02b566e2 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 14:57:16 +0000 Subject: [PATCH 10/20] MANIFEST.in: LICENSE.txt -> LICENSE --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index ab836ddbc..009fd4e31 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include README.md LICENSE.txt +include README.md LICENSE From c51ae04b6264d9102635be909c2f7bc7fd83ab22 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 14:59:44 +0000 Subject: [PATCH 11/20] ServicePyAdapter::pivot - release GIL, redirect stdout/stderr --- bindings/python/bergamot.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bindings/python/bergamot.cpp b/bindings/python/bergamot.cpp index 1f3061824..828d8bfc8 100644 --- a/bindings/python/bergamot.cpp +++ b/bindings/python/bergamot.cpp @@ -89,6 +89,16 @@ class ServicePyAdapter { std::vector pivot(Model first, Model second, std::vector &inputs, const ResponseOptions &options) { + py::scoped_ostream_redirect outstream( + std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output + ); + py::scoped_ostream_redirect errstream( + std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output + ); + + py::call_guard gil_guard; // Prepare promises, save respective futures. Have callback's in async set // value to the promises. std::vector> futures; From 8fa9894d895405fbda6e79910459f03893570f83 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:02:22 +0000 Subject: [PATCH 12/20] Add bindings/python to adhere to *bergamot* cpp style --- .github/workflows/coding-styles.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coding-styles.yml b/.github/workflows/coding-styles.yml index 0bff2ec79..81bdf3361 100644 --- a/.github/workflows/coding-styles.yml +++ b/.github/workflows/coding-styles.yml @@ -26,7 +26,7 @@ jobs: - name: Run clang-format run: - python3 run-clang-format.py --style file -r src wasm + python3 run-clang-format.py --style file -r src wasm bindings/python - name: Prepare build, compilation database etc. From 933d64a83b77afe8140eec8c8f60ae0c1f55056c Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:06:24 +0000 Subject: [PATCH 13/20] __init__: Remove redundant try... except to bring code out --- bindings/python/__init__.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/bindings/python/__init__.py b/bindings/python/__init__.py index d56b767f2..9f4641150 100644 --- a/bindings/python/__init__.py +++ b/bindings/python/__init__.py @@ -1,23 +1,20 @@ import typing -try: - from ._bergamot import * # type: ignore - from .repository import Aggregator, TranslateLocallyLike +from ._bergamot import * # type: ignore +from .repository import Aggregator, TranslateLocallyLike - REPOSITORY = Aggregator( - [ - TranslateLocallyLike( - "browsermt", "https://translatelocally.com/models.json" - ), - TranslateLocallyLike( - "opus", "https://object.pouta.csc.fi/OPUS-MT-models/app/models.json" - ), - ] - ) - """ - REPOSITORY is a global object that aggregates multiple model-providers to - provide a (model-provider: str, model-code: str) based query mechanism to - get models. - """ -except ImportError: - raise +REPOSITORY = Aggregator( + [ + TranslateLocallyLike( + "browsermt", "https://translatelocally.com/models.json" + ), + TranslateLocallyLike( + "opus", "https://object.pouta.csc.fi/OPUS-MT-models/app/models.json" + ), + ] +) +""" +REPOSITORY is a global object that aggregates multiple model-providers to +provide a (model-provider: str, model-code: str) based query mechanism to +get models. +""" From ba83d68e936d20d244ea425cc4b6d6273789a407 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:10:33 +0000 Subject: [PATCH 14/20] clang-format bindings/python/bergamot.cpp --- bindings/python/bergamot.cpp | 91 ++++++++++++++---------------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/bindings/python/bergamot.cpp b/bindings/python/bergamot.cpp index 828d8bfc8..97ae0614b 100644 --- a/bindings/python/bergamot.cpp +++ b/bindings/python/bergamot.cpp @@ -32,9 +32,8 @@ PYBIND11_MAKE_OPAQUE(std::vector); PYBIND11_MAKE_OPAQUE(Alignments); class ServicePyAdapter { -public: - ServicePyAdapter(const Service::Config &config) - : service_(make_service(config)) {} + public: + ServicePyAdapter(const Service::Config &config) : service_(make_service(config)) {} std::shared_ptr<_Model> modelFromConfig(const std::string &config) { auto parsedConfig = marian::bergamot::parseOptionsFromString(config); @@ -46,15 +45,12 @@ class ServicePyAdapter { return service_.createCompatibleModel(config); } - std::vector translate(Model model, std::vector &inputs, - const ResponseOptions &options) { - py::scoped_ostream_redirect outstream( - std::cout, // std::ostream& - py::module_::import("sys").attr("stdout") // Python output + std::vector translate(Model model, std::vector &inputs, const ResponseOptions &options) { + py::scoped_ostream_redirect outstream(std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output ); - py::scoped_ostream_redirect errstream( - std::cerr, // std::ostream& - py::module_::import("sys").attr("stderr") // Python output + py::scoped_ostream_redirect errstream(std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output ); py::call_guard gil_guard; @@ -66,12 +62,9 @@ class ServicePyAdapter { promises.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { - auto callback = [&promises, i](Response &&response) { - promises[i].set_value(std::move(response)); - }; + auto callback = [&promises, i](Response &&response) { promises[i].set_value(std::move(response)); }; - service_.translate(model, std::move(inputs[i]), std::move(callback), - options); + service_.translate(model, std::move(inputs[i]), std::move(callback), options); futures.push_back(std::move(promises[i].get_future())); } @@ -86,16 +79,13 @@ class ServicePyAdapter { return responses; } - std::vector pivot(Model first, Model second, - std::vector &inputs, + std::vector pivot(Model first, Model second, std::vector &inputs, const ResponseOptions &options) { - py::scoped_ostream_redirect outstream( - std::cout, // std::ostream& - py::module_::import("sys").attr("stdout") // Python output + py::scoped_ostream_redirect outstream(std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output ); - py::scoped_ostream_redirect errstream( - std::cerr, // std::ostream& - py::module_::import("sys").attr("stderr") // Python output + py::scoped_ostream_redirect errstream(std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output ); py::call_guard gil_guard; @@ -106,12 +96,9 @@ class ServicePyAdapter { promises.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { - auto callback = [&promises, i](Response &&response) { - promises[i].set_value(std::move(response)); - }; + auto callback = [&promises, i](Response &&response) { promises[i].set_value(std::move(response)); }; - service_.pivot(first, second, std::move(inputs[i]), std::move(callback), - options); + service_.pivot(first, second, std::move(inputs[i]), std::move(callback), options); futures.push_back(std::move(promises[i].get_future())); } @@ -126,15 +113,13 @@ class ServicePyAdapter { return responses; } -private /*functions*/: + private /*functions*/: static Service make_service(const Service::Config &config) { - py::scoped_ostream_redirect outstream( - std::cout, // std::ostream& - py::module_::import("sys").attr("stdout") // Python output + py::scoped_ostream_redirect outstream(std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output ); - py::scoped_ostream_redirect errstream( - std::cerr, // std::ostream& - py::module_::import("sys").attr("stderr") // Python output + py::scoped_ostream_redirect errstream(std::cerr, // std::ostream& + py::module_::import("sys").attr("stderr") // Python output ); py::call_guard gil_guard; @@ -142,7 +127,7 @@ private /*functions*/: return Service(config); } -private /*data*/: + private /*data*/: Service service_; }; @@ -154,8 +139,7 @@ PYBIND11_MODULE(_bergamot, m) { .def_readonly("begin", &ByteRange::begin) .def_readonly("end", &ByteRange::end) .def("__repr__", [](const ByteRange &range) { - return "{" + std::to_string(range.begin) + ", " + - std::to_string(range.end) + "}"; + return "{" + std::to_string(range.begin) + ", " + std::to_string(range.end) + "}"; }); py::class_(m, "AnnotatedText") @@ -163,14 +147,12 @@ PYBIND11_MODULE(_bergamot, m) { .def("numWords", &AnnotatedText::numWords) .def("numSentences", &AnnotatedText::numSentences) .def("word", - [](const AnnotatedText &annotatedText, size_t sentenceIdx, - size_t wordIdx) -> std::string { + [](const AnnotatedText &annotatedText, size_t sentenceIdx, size_t wordIdx) -> std::string { auto view = annotatedText.word(sentenceIdx, wordIdx); return std::string(view.data(), view.size()); }) .def("sentence", - [](const AnnotatedText &annotatedText, - size_t sentenceIdx) -> std::string { + [](const AnnotatedText &annotatedText, size_t sentenceIdx) -> std::string { auto view = annotatedText.sentence(sentenceIdx); return std::string(view.data(), view.size()); }) @@ -193,14 +175,12 @@ PYBIND11_MODULE(_bergamot, m) { .export_values(); py::class_(m, "ResponseOptions") - .def(py::init<>([](bool qualityScores, bool alignment, bool HTML, - bool sentenceMappings, ConcatStrategy strategy) { - return ResponseOptions{qualityScores, alignment, HTML, - sentenceMappings, strategy}; - }), - py::arg("qualityScores") = true, py::arg("alignment") = false, - py::arg("HTML") = false, py::arg("sentenceMappings") = true, - py::arg("concatStrategy") = ConcatStrategy::FAITHFUL) + .def( + py::init<>([](bool qualityScores, bool alignment, bool HTML, bool sentenceMappings, ConcatStrategy strategy) { + return ResponseOptions{qualityScores, alignment, HTML, sentenceMappings, strategy}; + }), + py::arg("qualityScores") = true, py::arg("alignment") = false, py::arg("HTML") = false, + py::arg("sentenceMappings") = true, py::arg("concatStrategy") = ConcatStrategy::FAITHFUL) .def_readwrite("qualityScores", &ResponseOptions::qualityScores) .def_readwrite("HTML", &ResponseOptions::HTML) .def_readwrite("alignment", &ResponseOptions::alignment) @@ -215,8 +195,8 @@ PYBIND11_MODULE(_bergamot, m) { .def("pivot", &ServicePyAdapter::pivot); py::class_(m, "ServiceConfig") - .def(py::init<>([](size_t numWorkers, bool cacheEnabled, size_t cacheSize, - size_t cacheMutexBuckets, std::string logging) { + .def(py::init<>([](size_t numWorkers, bool cacheEnabled, size_t cacheSize, size_t cacheMutexBuckets, + std::string logging) { Service::Config config; config.numWorkers = numWorkers; config.cacheEnabled = cacheEnabled; @@ -225,9 +205,8 @@ PYBIND11_MODULE(_bergamot, m) { config.logger.level = logging; return config; }), - py::arg("numWorkers") = 1, py::arg("cacheEnabled") = false, - py::arg("cacheSize") = 20000, py::arg("cacheMutexBuckets") = 1, - py::arg("logLevel") = "off") + py::arg("numWorkers") = 1, py::arg("cacheEnabled") = false, py::arg("cacheSize") = 20000, + py::arg("cacheMutexBuckets") = 1, py::arg("logLevel") = "off") .def_readwrite("numWorkers", &Service::Config::numWorkers) .def_readwrite("cacheEnabled", &Service::Config::cacheEnabled) .def_readwrite("cacheSize", &Service::Config::cacheSize) From 8a078ffed644747529eb51311d7524d4ce6cb072 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:14:56 +0000 Subject: [PATCH 15/20] python files consistent styling - include doc/conf.py --- .github/workflows/python.yml | 4 ++-- bindings/python/__init__.py | 4 +--- doc/conf.py | 3 ++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ef18b3ffa..53d23dfe3 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -275,8 +275,8 @@ jobs: python3 -m pip install black isort pytype - name: "Formatting checks: black, isort" run: | - python3 -m black --check bindings/python/ setup.py - python3 -m isort --profile black --diff --check bindings/python setup.py + python3 -m black --check bindings/python/ setup.py doc/conf.py + python3 -m isort --profile black --diff --check bindings/python setup.py doc/conf.py - name: "Static typing checks: pytype" run: |- python3 -m pytype bindings/python diff --git a/bindings/python/__init__.py b/bindings/python/__init__.py index 9f4641150..5855a4faf 100644 --- a/bindings/python/__init__.py +++ b/bindings/python/__init__.py @@ -5,9 +5,7 @@ REPOSITORY = Aggregator( [ - TranslateLocallyLike( - "browsermt", "https://translatelocally.com/models.json" - ), + TranslateLocallyLike("browsermt", "https://translatelocally.com/models.json"), TranslateLocallyLike( "opus", "https://object.pouta.csc.fi/OPUS-MT-models/app/models.json" ), diff --git a/doc/conf.py b/doc/conf.py index 7c4bfd50c..0a3f3ec41 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -6,12 +6,13 @@ # -- Path setup -------------------------------------------------------------- +import datetime + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os -import datetime import sys sys.path.insert(0, os.path.abspath(".")) From 55aca3d6d7e25717e80edc01c0c0e4dcaaa40599 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:19:34 +0000 Subject: [PATCH 16/20] personal github page testing artefact -> browser.mt/docs --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 0a3f3ec41..9776bd0c4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -78,7 +78,7 @@ html_css_files = ["css/custom.css"] # The base URL which points to the root of the HTML documentation -html_baseurl = "http://jerinphilip.github.io/bergamot-translator" +html_baseurl = "https://browser.mt/docs" # -- Extension configuration ------------------------------------------------- From 78f7d4ba913d1265fede93aade426fa32fde61f8 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 25 Jan 2022 15:30:33 +0000 Subject: [PATCH 17/20] copyright now claimed for 2022 --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 9776bd0c4..a86f4cbea 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- project = "Bergamot Translator" -copyright = "2021, Bergamot Translator Team" +copyright = "2021-2022 Bergamot Translator Team" author = "Bergamot Translator Team" # The full version, including alpha/beta/rc tags From 6445a45b2538adc2fa6e40d77d29498c7a962085 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Wed, 26 Jan 2022 15:59:25 +0000 Subject: [PATCH 18/20] option(COMPILE_PYTHON...) with some string documentation --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc698e5f5..f6e6af4f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,7 @@ else() add_subdirectory(app) endif(COMPILE_WASM) +option(COMPILE_PYTHON "Compile python bindings. Intended to be activated with setup.py" OFF) if(COMPILE_PYTHON) add_subdirectory(bindings/python) endif(COMPILE_PYTHON) From 0ebabf923530288e35e1b7aae47d018df837380e Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Wed, 26 Jan 2022 19:40:53 +0000 Subject: [PATCH 19/20] Prepare setup.py to pick PYTHON_LOCAL_VERSION_IDENTIFIER from environment --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 5b8f76c07..85fa685ff 100644 --- a/setup.py +++ b/setup.py @@ -136,6 +136,9 @@ def build_extension(self, ext): version = None with open(os.path.join(here, "BERGAMOT_VERSION")) as f: version = f.read().strip() + suffix = os.environ.get("PYTHON_LOCAL_VERSION_IDENTIFIER", None) + if suffix is not None: + version = "{}+{}".format(version, suffix) class UploadCommand(Command): From f8e42a8689b6240f4b5c5efaa745675c4470cf09 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Wed, 26 Jan 2022 19:41:20 +0000 Subject: [PATCH 20/20] Set local identifier for non-tagged (semantic) versions --- .github/workflows/python.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 53d23dfe3..2924061df 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -98,6 +98,12 @@ jobs: run: |- ccache -s # Print current cache stats ccache -z # Zero cache entry + + - name: Inject local version identifier for non tag builds + if: ${{ !startsWith(github.ref, 'refs/tags/v') }} + run: |- + echo "PYTHON_LOCAL_VERSION_IDENTIFIER=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + - name: setup.py run: |- python3 -m pip install wheel @@ -186,6 +192,11 @@ jobs: run: |- echo "MACOSX_DEPLOYMENT_TARGET=10.15" >> $GITHUB_ENV + - name: Inject local version identifier for non tag builds + if: ${{ !startsWith(github.ref, 'refs/tags/v') }} + run: |- + echo "PYTHON_LOCAL_VERSION_IDENTIFIER=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + - name: setup.py run: |- python3 -m pip install --upgrade packaging wheel