diff --git a/examples/pdm_project/.gitignore b/examples/pdm_project/.gitignore new file mode 100644 index 0000000..4b3ab30 --- /dev/null +++ b/examples/pdm_project/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/examples/pdm_project/README.md b/examples/pdm_project/README.md new file mode 100644 index 0000000..b8f042a --- /dev/null +++ b/examples/pdm_project/README.md @@ -0,0 +1 @@ +# pdm_project diff --git a/examples/pdm_project/launcher.py b/examples/pdm_project/launcher.py new file mode 100644 index 0000000..f6fe2fa --- /dev/null +++ b/examples/pdm_project/launcher.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +from absl import app +from absl import flags + +from lxm3 import xm +from lxm3 import xm_cluster +from lxm3.contrib import ucl + +_LAUNCH_ON_CLUSTER = flags.DEFINE_boolean( + "launch_on_cluster", False, "Launch on cluster" +) + + +def main(_): + with xm_cluster.create_experiment(experiment_title="basic") as experiment: + job_requirements = xm_cluster.JobRequirements(ram=8 * xm.GB) + if _LAUNCH_ON_CLUSTER.value: + # This is a special case for using SGE in UCL where we use generic + # job requirements and translate to SGE specific requirements. + # Non-UCL users, use `xm_cluster.GridEngine directly`. + executor = ucl.UclGridEngine( + job_requirements, + walltime=10 * xm.Min, + ) + else: + executor = xm_cluster.Local(job_requirements) + + spec = xm_cluster.PDMProject( + # This is a relative path to the launcher that contains + # your python package (i.e. the directory that contains pyproject.toml) + path=".", + base_image="python:3.10-slim", + # Entrypoint is the python module that you would like to + # In the implementation, this is translated to + # python3 -m py_package.main + entrypoint=xm_cluster.ModuleName("pdm_project.main"), + ) + + [executable] = experiment.package( + [xm.Packageable(spec, executor_spec=executor.Spec())] + ) + + experiment.add(xm.Job(executable=executable, executor=executor)) + + +if __name__ == "__main__": + app.run(main) diff --git a/examples/pdm_project/pdm.lock b/examples/pdm_project/pdm.lock new file mode 100644 index 0000000..315e547 --- /dev/null +++ b/examples/pdm_project/pdm.lock @@ -0,0 +1,8 @@ +# This file is @generated by PDM. +# It is not intended for manual editing. + +[metadata] +groups = ["default"] +strategy = ["cross_platform"] +lock_version = "4.4" +content_hash = "sha256:283d196fda874e154c8cb9a8d9c42e4fbb7d4740fea54df382447ec3c49bb5ce" diff --git a/examples/pdm_project/pyproject.toml b/examples/pdm_project/pyproject.toml new file mode 100644 index 0000000..2881cd8 --- /dev/null +++ b/examples/pdm_project/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "pdm_project" +version = "0.1.0" +description = "" +authors = [ + {name = "Yicheng Luo", email = "ethanluoyc@gmail.com"}, +] +dependencies = [] +requires-python = ">=3.10" +readme = "README.md" +license = {text = "MIT"} + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" diff --git a/examples/pdm_project/src/pdm_project/__init__.py b/examples/pdm_project/src/pdm_project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/pdm_project/src/pdm_project/main.py b/examples/pdm_project/src/pdm_project/main.py new file mode 100644 index 0000000..44159b3 --- /dev/null +++ b/examples/pdm_project/src/pdm_project/main.py @@ -0,0 +1 @@ +print("Hello world") diff --git a/examples/pdm_project/tests/__init__.py b/examples/pdm_project/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/python_container/.gitignore b/examples/python_container/.gitignore new file mode 100644 index 0000000..4b3ab30 --- /dev/null +++ b/examples/python_container/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/examples/python_container/README.md b/examples/python_container/README.md new file mode 100644 index 0000000..b8f042a --- /dev/null +++ b/examples/python_container/README.md @@ -0,0 +1 @@ +# pdm_project diff --git a/examples/python_container/launcher.py b/examples/python_container/launcher.py new file mode 100644 index 0000000..8688d55 --- /dev/null +++ b/examples/python_container/launcher.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +from absl import app +from absl import flags + +from lxm3 import xm +from lxm3 import xm_cluster +from lxm3.contrib import ucl + +_LAUNCH_ON_CLUSTER = flags.DEFINE_boolean( + "launch_on_cluster", False, "Launch on cluster" +) + + +def main(_): + with xm_cluster.create_experiment(experiment_title="basic") as experiment: + job_requirements = xm_cluster.JobRequirements(ram=8 * xm.GB) + if _LAUNCH_ON_CLUSTER.value: + # This is a special case for using SGE in UCL where we use generic + # job requirements and translate to SGE specific requirements. + # Non-UCL users, use `xm_cluster.GridEngine directly`. + executor = ucl.UclGridEngine( + job_requirements, + walltime=10 * xm.Min, + ) + else: + executor = xm_cluster.Local(job_requirements) + + spec = xm_cluster.PythonContainer( + path=".", + base_image="python:3.10-slim", + entrypoint=xm_cluster.ModuleName("py_package.main"), + ) + + [executable] = experiment.package( + [xm.Packageable(spec, executor_spec=executor.Spec())] + ) + + experiment.add(xm.Job(executable=executable, executor=executor)) + + +if __name__ == "__main__": + app.run(main) diff --git a/examples/python_container/pyproject.toml b/examples/python_container/pyproject.toml new file mode 100644 index 0000000..2881cd8 --- /dev/null +++ b/examples/python_container/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "pdm_project" +version = "0.1.0" +description = "" +authors = [ + {name = "Yicheng Luo", email = "ethanluoyc@gmail.com"}, +] +dependencies = [] +requires-python = ">=3.10" +readme = "README.md" +license = {text = "MIT"} + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" diff --git a/examples/python_container/requirements.txt b/examples/python_container/requirements.txt new file mode 100644 index 0000000..b998a06 --- /dev/null +++ b/examples/python_container/requirements.txt @@ -0,0 +1 @@ +absl-py diff --git a/examples/python_container/src/py_package/__init__.py b/examples/python_container/src/py_package/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/python_container/src/py_package/main.py b/examples/python_container/src/py_package/main.py new file mode 100644 index 0000000..44159b3 --- /dev/null +++ b/examples/python_container/src/py_package/main.py @@ -0,0 +1 @@ +print("Hello world") diff --git a/lxm3/xm_cluster/__init__.py b/lxm3/xm_cluster/__init__.py index 30087d3..a982a7c 100644 --- a/lxm3/xm_cluster/__init__.py +++ b/lxm3/xm_cluster/__init__.py @@ -4,6 +4,8 @@ from lxm3.xm_cluster.executable_specs import DockerContainer from lxm3.xm_cluster.executable_specs import Fileset from lxm3.xm_cluster.executable_specs import ModuleName +from lxm3.xm_cluster.executable_specs import PDMProject +from lxm3.xm_cluster.executable_specs import PythonContainer from lxm3.xm_cluster.executable_specs import PythonPackage from lxm3.xm_cluster.executable_specs import SingularityContainer from lxm3.xm_cluster.executable_specs import UniversalPackage diff --git a/lxm3/xm_cluster/executable_specs.py b/lxm3/xm_cluster/executable_specs.py index 7cc69b9..e97bd58 100644 --- a/lxm3/xm_cluster/executable_specs.py +++ b/lxm3/xm_cluster/executable_specs.py @@ -219,6 +219,44 @@ def __attrs_post_init__(self): self.image_path = image_path +@attr.s(auto_attribs=True) +class PDMProject(job_blocks.ExecutableSpec): + entrypoint: Union[CommandList, ModuleName] + base_image: str + lock_file: str = attr.ib( + converter=utils.resolve_path_relative_to_launcher, default="pdm.lock" + ) + path: str = attr.ib(converter=utils.resolve_path_relative_to_launcher, default=".") + resources: List[Fileset] = attr.ib(converter=list, default=attr.Factory(list)) + extra_packages: List[str] = attr.ib(converter=list, default=attr.Factory(list)) + pip_args: List[str] = attr.ib( + converter=list, default=attr.Factory(lambda: ["--no-deps", "--no-compile"]) + ) + + @property + def name(self) -> str: + return name_from_path(self.path) + + +@attr.s(auto_attribs=True) +class PythonContainer(job_blocks.ExecutableSpec): + entrypoint: Union[CommandList, ModuleName] + base_image: str + requirements: str = attr.ib( + converter=utils.resolve_path_relative_to_launcher, default="requirements.txt" + ) + path: str = attr.ib(converter=utils.resolve_path_relative_to_launcher, default=".") + resources: List[Fileset] = attr.ib(converter=list, default=attr.Factory(list)) + extra_packages: List[str] = attr.ib(converter=list, default=attr.Factory(list)) + pip_args: List[str] = attr.ib( + converter=list, default=attr.Factory(lambda: ["--no-deps", "--no-compile"]) + ) + + @property + def name(self) -> str: + return name_from_path(self.path) + + @attr.s(auto_attribs=True) class DockerContainer(job_blocks.ExecutableSpec): """An executable that can be executed in a Singularity container. diff --git a/lxm3/xm_cluster/packaging/__init__.py b/lxm3/xm_cluster/packaging/__init__.py index 7542a7d..2c2921c 100644 --- a/lxm3/xm_cluster/packaging/__init__.py +++ b/lxm3/xm_cluster/packaging/__init__.py @@ -2,6 +2,7 @@ import functools import os import shutil +import subprocess import tempfile from typing import Any, Sequence @@ -73,6 +74,148 @@ def _package_universal_package( ) +_PDM_DOCKERFILE_TEMPLATE = """\ +FROM {base_image} as builder +RUN pip install pdm +ADD pdm.lock /app/pdm.lock +ADD pyproject.toml /app/pyproject.toml +ADD README.md /app/README.md +ADD pdm.lock /app/pdm.lock + +WORKDIR /app +RUN pdm install && pdm export > /requirements.txt + +FROM {base_image} +COPY --from=builder /requirements.txt /requirements.txt +RUN pip install --no-cache-dir -r /requirements.txt +""" + + +def _package_pdm_project( + pdm_project: cluster_executable_specs.PDMProject, + packageable: xm.Packageable, + artifact_store: artifacts.ArtifactStore, +): + py_package = cluster_executable_specs.PythonPackage( + pdm_project.entrypoint, + path=pdm_project.path, + ) + with tempfile.TemporaryDirectory() as staging: + shutil.copy( + os.path.join(pdm_project.lock_file), + os.path.join(staging, "pdm.lock"), + ) + shutil.copy( + os.path.join(pdm_project.path, "pyproject.toml"), + os.path.join(staging, "pyproject.toml"), + ) + shutil.copy( + os.path.join(pdm_project.path, "README.md"), + os.path.join(staging, "README.md"), + ) + with open(os.path.join(staging, "Dockerfile"), "w") as f: + f.write(_PDM_DOCKERFILE_TEMPLATE.format(base_image=pdm_project.base_image)) + subprocess.run(["docker", "buildx", "build", "-t", py_package.name, staging]) + + singularity_image = "docker-daemon://{}:latest".format(py_package.name) + + # Try building singularity image using cache + cached_singularity_image = ( + singularity_builder.build_singularity_image_from_docker_daemon( + singularity_image + ) + ) + + staging = tempfile.mkdtemp(dir=_staging_directory()) + archive_name = archive_builder.create_python_archive(staging, py_package) + local_archive_path = os.path.join(staging, archive_name) + deployed_archive_path = artifact_store.deploy_resource_archive(local_archive_path) + + cache_image_path = singularity_builder.build_singularity_image_from_docker_daemon( + singularity_image + ) + deploy_container_path = artifact_store.singularity_image_path( + os.path.basename(cache_image_path) + ) + artifact_store.deploy_singularity_container(cached_singularity_image) + + entrypoint_cmd = archive_builder.ENTRYPOINT_SCRIPT + executable = cluster_executables.Command( + py_package.name, + entrypoint_command=entrypoint_cmd, + resource_uri=deployed_archive_path, + args=packageable.args, + env_vars=packageable.env_vars, + ) + + executable.singularity_image = deploy_container_path + return executable + + +_PYTHON_CONTAINER_DOCKER_TEMPLATE = """\ +FROM {base_image} +COPY requirements.txt /requirements.txt +RUN pip install --no-cache-dir -r /requirements.txt +""" + + +def _package_python_container( + python_container: cluster_executable_specs.PythonContainer, + packageable: xm.Packageable, + artifact_store: artifacts.ArtifactStore, +): + py_package = cluster_executable_specs.PythonPackage( + python_container.entrypoint, + path=python_container.path, + ) + with tempfile.TemporaryDirectory() as staging: + shutil.copy( + os.path.join(python_container.requirements), + os.path.join(staging, "requirements.txt"), + ) + with open(os.path.join(staging, "Dockerfile"), "w") as f: + f.write( + _PYTHON_CONTAINER_DOCKER_TEMPLATE.format( + base_image=python_container.base_image + ) + ) + subprocess.run(["docker", "buildx", "build", "-t", py_package.name, staging]) + + singularity_image = "docker-daemon://{}:latest".format(py_package.name) + + # Try building singularity image using cache + cached_singularity_image = ( + singularity_builder.build_singularity_image_from_docker_daemon( + singularity_image + ) + ) + + staging = tempfile.mkdtemp(dir=_staging_directory()) + archive_name = archive_builder.create_python_archive(staging, py_package) + local_archive_path = os.path.join(staging, archive_name) + deployed_archive_path = artifact_store.deploy_resource_archive(local_archive_path) + + cache_image_path = singularity_builder.build_singularity_image_from_docker_daemon( + singularity_image + ) + deploy_container_path = artifact_store.singularity_image_path( + os.path.basename(cache_image_path) + ) + artifact_store.deploy_singularity_container(cached_singularity_image) + + entrypoint_cmd = archive_builder.ENTRYPOINT_SCRIPT + executable = cluster_executables.Command( + py_package.name, + entrypoint_command=entrypoint_cmd, + resource_uri=deployed_archive_path, + args=packageable.args, + env_vars=packageable.env_vars, + ) + + executable.singularity_image = deploy_container_path + return executable + + def _package_singularity_container( container: cluster_executable_specs.SingularityContainer, packageable: xm.Packageable, @@ -131,6 +274,8 @@ def _throw_on_unknown_executable( _PACKAGING_ROUTER = pattern_matching.match( _package_python_package, _package_universal_package, + _package_pdm_project, + _package_python_container, _package_singularity_container, _package_docker_container, _throw_on_unknown_executable,