diff --git a/.github/workflows/pyslurm.yml b/.github/workflows/pyslurm.yml index 86bcb9d8..d39e0e92 100644 --- a/.github/workflows/pyslurm.yml +++ b/.github/workflows/pyslurm.yml @@ -13,9 +13,9 @@ jobs: fail-fast: false steps: - name: Checkout repository code - uses: actions/checkout@v2 + uses: actions/checkout@main - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@main with: python-version: ${{ matrix.python-version }} - name: Install Dependencies diff --git a/.gitignore b/.gitignore index 6bea6e8b..ef44eef6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Ignore Build Directory # build/ +site # Ignore Docs Directory # #doc/ @@ -15,6 +16,9 @@ pyslurm/*.pxi~ pyslurm/*.pxd~ pyslurm/*.so pyslurm/*.c +pyslurm/**/*.c +pyslurm/**/*.so +pyslurm/**/__pycache__ # Ignore vim swap files *.swp @@ -24,6 +28,7 @@ tests/*.pyc # Ignore pycache (Python 3) */__pycache__ +*/**/__pycache__ # Ignore job output files *.out diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..7b83eb65 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,100 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased on the [23.2.x](https://github.com/PySlurm/pyslurm/tree/23.2.x) branch + +- New Classes to interact with Database Associations (WIP) + - `pyslurm.db.Association` + - `pyslurm.db.Associations` +- New Classes to interact with Database QoS (WIP) + - `pyslurm.db.QualityOfService` + - `pyslurm.db.QualitiesOfService` +- Add `truncate_time` option to `pyslurm.db.JobFilter`, which is the same as -T / + --truncate from sacct. +- Add new Attributes to `pyslurm.db.Jobs` that help gathering statistics for a + collection of Jobs more convenient. +- Fix `allocated_gres` attribute in the `pyslurm.Node` Class returning nothing. +- Add new `idle_memory` and `allocated_tres` attributes to `pyslurm.Node` class +- Fix Node State being displayed as `ALLOCATED` when it should actually be + `MIXED`. + +## [23.2.2](https://github.com/PySlurm/pyslurm/releases/tag/v23.2.2) - 2023-07-18 + +### Added + +- Ability to modify Database Jobs +- New classes to interact with the Partition API + - [pyslurm.Partition][] + - [pyslurm.Partitions][] +- New attributes for a Database Job: + - `extra` + - `failed_node` +- Added a new Base Class [MultiClusterMap][pyslurm.xcollections.MultiClusterMap] that some Collections inherit from. +- Added `to_json` function to all Collections + +### Fixed + +- Fixes a problem that prevented loading specific Jobs from the Database if + the following two conditions were met: + - no start/end time was specified + - the Job was older than a day + +### Changed + +- Improved Docs +- Renamed `JobSearchFilter` to [pyslurm.db.JobFilter][] +- Renamed `as_dict` function of some classes to `to_dict` + +## [23.2.1](https://github.com/PySlurm/pyslurm/releases/tag/v23.2.1) - 2023-05-18 + +### Added + +- Classes to interact with the Job and Submission API + - [pyslurm.Job](https://pyslurm.github.io/23.2/reference/job/#pyslurm.Job) + - [pyslurm.Jobs](https://pyslurm.github.io/23.2/reference/job/#pyslurm.Jobs) + - [pyslurm.JobStep](https://pyslurm.github.io/23.2/reference/jobstep/#pyslurm.JobStep) + - [pyslurm.JobSteps](https://pyslurm.github.io/23.2/reference/jobstep/#pyslurm.JobSteps) + - [pyslurm.JobSubmitDescription](https://pyslurm.github.io/23.2/reference/jobsubmitdescription/#pyslurm.JobSubmitDescription) +- Classes to interact with the Database Job API + - [pyslurm.db.Job](https://pyslurm.github.io/23.2/reference/db/job/#pyslurm.db.Job) + - [pyslurm.db.Jobs](https://pyslurm.github.io/23.2/reference/db/job/#pyslurm.db.Jobs) + - [pyslurm.db.JobStep](https://pyslurm.github.io/23.2/reference/db/jobstep/#pyslurm.db.JobStep) + - [pyslurm.db.JobFilter](https://pyslurm.github.io/23.2/reference/db/jobsearchfilter/#pyslurm.db.JobFilter) +- Classes to interact with the Node API + - [pyslurm.Node](https://pyslurm.github.io/23.2/reference/node/#pyslurm.Node) + - [pyslurm.Nodes](https://pyslurm.github.io/23.2/reference/node/#pyslurm.Nodes) +- Exceptions added: + - [pyslurm.PyslurmError](https://pyslurm.github.io/23.2/reference/exceptions/#pyslurm.PyslurmError) + - [pyslurm.RPCError](https://pyslurm.github.io/23.2/reference/exceptions/#pyslurm.RPCError) +- [Utility Functions](https://pyslurm.github.io/23.2/reference/utilities/#pyslurm.utils) + +### Changed + +- Completely overhaul the documentation, switch to mkdocs +- Rework the tests: Split them into unit and integration tests + +### Deprecated + +- Following classes are superseded by new ones: + - [pyslurm.job](https://pyslurm.github.io/23.2/reference/old/job/#pyslurm.job) + - [pyslurm.node](https://pyslurm.github.io/23.2/reference/old/node/#pyslurm.node) + - [pyslurm.jobstep](https://pyslurm.github.io/23.2/reference/old/jobstep/#pyslurm.jobstep) + - [pyslurm.slurmdb_jobs](https://pyslurm.github.io/23.2/reference/old/db/job/#pyslurm.slurmdb_jobs) + +## [23.2.0](https://github.com/PySlurm/pyslurm/releases/tag/v23.2.0) - 2023-04-07 + +### Added + +- Support for Slurm 23.02.x ([f506d63](https://github.com/PySlurm/pyslurm/commit/f506d63634a9b20bfe475534589300beff4a8843)) + +### Removed + +- `Elasticsearch` debug flag from `get_debug_flags` +- `launch_type`, `launch_params` and `slurmctld_plugstack` keys from the + `config.get()` output +- Some constants (mostly `ESLURM_*` constants that do not exist + anymore) diff --git a/MANIFEST.in b/MANIFEST.in index d33c1276..a8e09b2b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,8 @@ -include README.rst +include README.md include COPYING.txt graft examples graft tests -graft doc +graft docs graft pyslurm/slurm graft pyslurm/pydefines -include pyslurm/alps_cray.h +recursive-include pyslurm *.pyx *.px[di] *.h diff --git a/README.md b/README.md index 1bebb43e..1db47f45 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,23 @@ -# PySlurm +# PySlurm Logo -[![PySlurm](https://github.com/PySlurm/pyslurm/actions/workflows/pyslurm.yml/badge.svg?branch=main)](https://github.com/PySlurm/pyslurm/actions/workflows/pyslurm.yml) +pyslurm is the Python client library for the [Slurm Workload Manager](https://slurm.schedmd.com) -## Overview - -PySlurm is the Python client library for the [Slurm](https://slurm.schedmd.com) HPC Scheduler. - -## Prerequisites +## Requirements * [Slurm](https://slurm.schedmd.com) - Slurm shared library and header files * [Python](https://www.python.org) - >= 3.6 * [Cython](https://cython.org) - >= 0.29.30 but < 3.0 -This PySlurm branch is for the Slurm Major-Release 22.05 +This Version is for Slurm 23.02.x + +## Versioning + +In pyslurm, the versioning scheme follows the official Slurm versioning. The +first two numbers (`MAJOR.MINOR`) always correspond to Slurms Major-Release, +for example `23.02`. +The last number (`MICRO`) is however not tied in any way to Slurms `MICRO` +version, but is instead PySlurm's internal Patch-Level. For example, any +pyslurm 23.02.X version should work with any Slurm 23.02.X release. ## Installation @@ -21,24 +26,18 @@ By default, it is searched inside `/usr/include` for the Header files and in For Slurm installations in different locations, you will need to provide the corresponding paths to the necessary files. -You can specify these Paths with environment variables, for example: - -```shell -export SLURM_INCLUDE_DIR=/opt/slurm/22.05/include -export SLURM_LIB_DIR=/opt/slurm/22.05/lib -``` - -Then you can proceed to install PySlurm, for example: +You can specify those with environment variables (recommended), for example: ```shell -pip install pyslurm==22.05.0 +export SLURM_INCLUDE_DIR=/opt/slurm/23.02/include +export SLURM_LIB_DIR=/opt/slurm/23.02/lib ``` -Or by cloning the repository: +Then you can proceed to install pyslurm, for example by cloning the Repository: ```shell git clone https://github.com/PySlurm/pyslurm.git && cd pyslurm -python setup.py install +scripts/build.sh # Or simply with pip pip install . @@ -46,105 +45,14 @@ pip install . Also see `python setup.py --help` -## Release Versioning - -PySlurm's versioning scheme follows the official Slurm versioning. The first -two numbers (MAJOR.MINOR) always correspond to Slurms Major-Release, for example -`22.05`. The last number (MICRO) is however not tied in any way to Slurms -MICRO version. For example, any PySlurm 22.05.X version should work with any -Slurm 22.05.X release. - -## Documentation - -The API documentation is hosted at . - -To build the docs locally, use [Sphinx](http://www.sphinx-doc.org) to generate -the documentation from the reStructuredText based docstrings found in the -pyslurm module once it is built: - -```shell -cd doc -make clean -make html -``` - -## Testing - -PySlurm requires an installation of Slurm. - -### Using a Test Container - -To run tests locally without an existing Slurm cluster, `docker` and -`docker-compose` is required. - -Clone the project: - -```shell -git clone https://github.com/PySlurm/pyslurm.git -cd pyslurm -``` - -Start the Slurm container in the background: - -```shell -docker-compose up -d -``` - -The cluster takes a few seconds to start all the required Slurm services. Tail -the logs: - -```shell -docker-compose logs -f -``` - -When the cluster is ready, you will see the following log message: - -```text -Cluster is now available -``` - -Press CTRL+C to stop tailing the logs. Slurm is now running in a container in -detached mode. `docker-compose` also bind mounds the git directory inside the -container at `/pyslurm` so that the container has access to the test cases. - -Install test dependencies: - -```shell -pipenv sync --dev -``` - -Execute the tests inside the container: - -```shell -pipenv run pytest -sv scripts/run_tests_in_container.py -``` - -When testing is complete, stop the running Slurm container: - -```shell -docker-compose down -``` - -### Testing on an Existing Slurm Cluster - -You may also choose to clone the project and run tests on a node where Slurm is -already compiled and installed: - -```shell -git clone https://github.com/PySlurm/pyslurm.git -cd pyslurm -pip install . -./scripts/configure.sh -pipenv sync --dev -pipenv run pytest -sv -``` - ## Contributors -PySlurm is made by [contributors like +pyslurm is made by [contributors like you](https://github.com/PySlurm/pyslurm/graphs/contributors). -## Help +## Support + +Feel free to ask questions in the [GitHub +Discussions](https://github.com/orgs/PySlurm/discussions) -Ask questions on the [PySlurm Google -Group](https://groups.google.com/forum/#!forum/pyslurm) +Found a bug or you are missing a feature? Feel free to [open an Issue!](https://github.com/PySlurm/pyslurm/issues/new) diff --git a/UPGRADE_C_API.rst b/UPGRADE_C_API.rst index 6e7c84ae..3a47b2ed 100644 --- a/UPGRADE_C_API.rst +++ b/UPGRADE_C_API.rst @@ -51,13 +51,8 @@ Then, simply generate the header definitions like in this example: scripts/pyslurm_bindgen.py -D /directoy/with/slurm/headers > pyslurm/slurm/header.pxi The script outputs everything to `stdout`. Simply redirect the output to the file: :code:`pyslurm/slurm/header.pxi`. +The headers should now be fully translated. -Now, 99% of the work is done for generating the headers. For the 1% left, you now need to open the generated file, search for the two follwowing statements and comment them out: - -- `slurm_addr_t control_addr` -- `phtread_mutex_t lock` - -The compiler will otherwise complain that these are incomplete type definitions. Compiling, Updating, Testing ---------------------------- diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index eb9f6570..00000000 --- a/doc/Makefile +++ /dev/null @@ -1,130 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - -rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PySLURM.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PySLURM.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/PySLURM" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PySLURM" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - make -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/doctrees/environment.pickle b/doc/doctrees/environment.pickle deleted file mode 100644 index 3b329421..00000000 Binary files a/doc/doctrees/environment.pickle and /dev/null differ diff --git a/doc/doctrees/index.doctree b/doc/doctrees/index.doctree deleted file mode 100644 index 063112f0..00000000 Binary files a/doc/doctrees/index.doctree and /dev/null differ diff --git a/doc/source/conf.py b/doc/source/conf.py deleted file mode 100644 index 96f93a6e..00000000 --- a/doc/source/conf.py +++ /dev/null @@ -1,233 +0,0 @@ -# -*- coding: utf-:8 -*- -# -# PySlurm documentation build configuration file, created by -# sphinx-quickstart on Thu Sep 8 18:50:27 2011. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os -from datetime import datetime - -from pyslurm import __version__ - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) -arch = os.uname()[-1] -pyver = str(sys.version_info.major) + "." + str(sys.version_info.minor) -sys.path.insert(0, os.path.abspath('../../build/lib.linux-' + arch + '-' + pyver)) - -# -- General configuration ----------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.1' - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.todo', - 'sphinx.ext.intersphinx' -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'PySlurm' -copyright = '%s, PySlurm Developers' % datetime.now().year - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = __version__ -# The full version, including alpha/beta/rc tags. -release = version - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "_static/pyslurm-docs.png" - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'PySlurmDoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'PySlurm.tex', 'PySlurm Documentation', - 'Mark Roberts, Giovanni Torres', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'pyslurm', 'PySlurm Documentation', - ['Mark Roberts, Giovanni Torres'], 1) -] - - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/doc/source/index.rst b/doc/source/index.rst deleted file mode 100644 index b76e46c6..00000000 --- a/doc/source/index.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. PySlurm documentation master file, created by - sphinx-quickstart on Thu Sep 8 18:50:27 2011. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -PySlurm: Slurm Interface to python -=================================== - -:Authors: Mark Roberts, Giovanni Torres -:Date: |today| -:Version: |version| - -This module provides a low-level Python wrapper around the Slurm C-API using Cython. - -Contents -======== - -.. toctree:: - :maxdepth: 2 - :numbered: - -Config Class -************ - -.. autoclass:: pyslurm.config - :members: - -FrontEnd Class -************** - -.. autoclass:: pyslurm.front_end - :members: - -HostList Class -************** - -.. autoclass:: pyslurm.hostlist - :members: - -Job Class -********* - -.. autoclass:: pyslurm.job - :members: - -JobStep Class -************* - -.. autoclass:: pyslurm.jobstep - :members: - -Node Class -********** - -.. autoclass:: pyslurm.node - :members: - -Partition Class -*************** - -.. autoclass:: pyslurm.partition - :members: - -Reservation Class -***************** - -.. autoclass:: pyslurm.reservation - :members: - -Slurmdb Events Class -******************** - -.. autoclass:: pyslurm.slurmdb_events - :members: - -Slurmdb Reservations Class -************************** - -.. autoclass:: pyslurm.slurmdb_reservations - :members: - -Slurmdb Clusters Class -********************** - -.. autoclass:: pyslurm.slurmdb_clusters - :members: - -Slurmdb Jobs Class -****************** - -.. autoclass:: pyslurm.slurmdb_jobs - :members: - -Statistics Class -**************** - -.. autoclass:: pyslurm.statistics - :members: - -Topology Class -************** - -.. autoclass:: pyslurm.topology - :members: - -Trigger Class -************* - -.. autoclass:: pyslurm.trigger - :members: - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - diff --git a/doc_requirements.txt b/doc_requirements.txt new file mode 100644 index 00000000..d7e92631 --- /dev/null +++ b/doc_requirements.txt @@ -0,0 +1,7 @@ +cython>=3.0.0b1 +wheel +setuptools +mkdocstrings[python] +mike +mkdocs-material +mkdocs-awesome-pages-plugin diff --git a/docker-compose-github.yml b/docker-compose-github.yml index 9087ab4b..8d460a18 100644 --- a/docker-compose-github.yml +++ b/docker-compose-github.yml @@ -2,7 +2,7 @@ version: "3.8" services: slurm: - image: giovtorres/docker-centos7-slurm:21.08.6 + image: ghcr.io/itkovian/rocky8-slurm:main hostname: slurmctl container_name: slurmctl stdin_open: true diff --git a/docker-compose.yml b/docker-compose.yml index 4061801c..2e1763ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.8" services: slurm: - image: giovtorres/docker-centos7-slurm:21.08.0 + image: ghcr.io/itkovian/rocky8-slurm:main hostname: slurmctl container_name: slurmctl stdin_open: true diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..786b75d5 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +--8<-- "CHANGELOG.md" diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..612c7a5e --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +--8<-- "README.md" diff --git a/docs/logo.png b/docs/logo.png new file mode 120000 index 00000000..a9c1a7c8 --- /dev/null +++ b/docs/logo.png @@ -0,0 +1 @@ +../logo.png \ No newline at end of file diff --git a/docs/reference/.pages b/docs/reference/.pages new file mode 100644 index 00000000..4c3f8599 --- /dev/null +++ b/docs/reference/.pages @@ -0,0 +1,3 @@ +title: API Reference +nav: + - ... diff --git a/docs/reference/config.md b/docs/reference/config.md new file mode 100644 index 00000000..a461aba5 --- /dev/null +++ b/docs/reference/config.md @@ -0,0 +1,9 @@ +--- +title: Config +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.config diff --git a/docs/reference/constants.md b/docs/reference/constants.md new file mode 100644 index 00000000..65301afb --- /dev/null +++ b/docs/reference/constants.md @@ -0,0 +1,5 @@ +--- +title: constants +--- + +::: pyslurm.constants diff --git a/docs/reference/db/.pages b/docs/reference/db/.pages new file mode 100644 index 00000000..b7263357 --- /dev/null +++ b/docs/reference/db/.pages @@ -0,0 +1,3 @@ +title: Database +nav: + - ... diff --git a/docs/reference/db/cluster.md b/docs/reference/db/cluster.md new file mode 100644 index 00000000..219988d5 --- /dev/null +++ b/docs/reference/db/cluster.md @@ -0,0 +1,9 @@ +--- +title: Cluster +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.slurmdb_clusters diff --git a/docs/reference/db/connection.md b/docs/reference/db/connection.md new file mode 100644 index 00000000..7d77639e --- /dev/null +++ b/docs/reference/db/connection.md @@ -0,0 +1,5 @@ +--- +title: Connection +--- + +::: pyslurm.db.Connection diff --git a/docs/reference/db/event.md b/docs/reference/db/event.md new file mode 100644 index 00000000..2816aaae --- /dev/null +++ b/docs/reference/db/event.md @@ -0,0 +1,9 @@ +--- +title: Event +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.slurmdb_events diff --git a/docs/reference/db/index.md b/docs/reference/db/index.md new file mode 100644 index 00000000..98f3b38e --- /dev/null +++ b/docs/reference/db/index.md @@ -0,0 +1,4 @@ +# pyslurm.db + +The `pyslurm.db` package contains all functionality to interact with the Slurm +Database Daemon (slurmdbd) diff --git a/docs/reference/db/job.md b/docs/reference/db/job.md new file mode 100644 index 00000000..e806cc1f --- /dev/null +++ b/docs/reference/db/job.md @@ -0,0 +1,10 @@ +--- +title: Job +--- + +!!! note + This supersedes the [pyslurm.slurmdb_job](../old/db/job.md) class, which + will be removed in a future release + +::: pyslurm.db.Job +::: pyslurm.db.Jobs diff --git a/docs/reference/db/jobfilter.md b/docs/reference/db/jobfilter.md new file mode 100644 index 00000000..523d7c9c --- /dev/null +++ b/docs/reference/db/jobfilter.md @@ -0,0 +1,5 @@ +--- +title: JobFilter +--- + +::: pyslurm.db.JobFilter diff --git a/docs/reference/db/jobstats.md b/docs/reference/db/jobstats.md new file mode 100644 index 00000000..1bc17d20 --- /dev/null +++ b/docs/reference/db/jobstats.md @@ -0,0 +1,5 @@ +--- +title: JobStatistics +--- + +::: pyslurm.db.JobStatistics diff --git a/docs/reference/db/jobstep.md b/docs/reference/db/jobstep.md new file mode 100644 index 00000000..a7bdc720 --- /dev/null +++ b/docs/reference/db/jobstep.md @@ -0,0 +1,6 @@ +--- +title: JobStep +--- + +::: pyslurm.db.JobStep +::: pyslurm.db.JobSteps diff --git a/docs/reference/db/reservation.md b/docs/reference/db/reservation.md new file mode 100644 index 00000000..c1f110a3 --- /dev/null +++ b/docs/reference/db/reservation.md @@ -0,0 +1,9 @@ +--- +title: Reservation +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.slurmdb_reservations diff --git a/docs/reference/exceptions.md b/docs/reference/exceptions.md new file mode 100644 index 00000000..4abc0047 --- /dev/null +++ b/docs/reference/exceptions.md @@ -0,0 +1,6 @@ +--- +title: Exceptions +--- + +::: pyslurm.PyslurmError +::: pyslurm.RPCError diff --git a/docs/reference/frontend.md b/docs/reference/frontend.md new file mode 100644 index 00000000..f56a7ecd --- /dev/null +++ b/docs/reference/frontend.md @@ -0,0 +1,9 @@ +--- +title: Frontend +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.front_end diff --git a/docs/reference/hostlist.md b/docs/reference/hostlist.md new file mode 100644 index 00000000..33f8485d --- /dev/null +++ b/docs/reference/hostlist.md @@ -0,0 +1,9 @@ +--- +title: Hostlist +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.hostlist diff --git a/docs/reference/index.md b/docs/reference/index.md new file mode 100644 index 00000000..af0ef05e --- /dev/null +++ b/docs/reference/index.md @@ -0,0 +1,51 @@ +# pyslurm + +The `pyslurm` package is a wrapper around the Slurm C-API + + +!!! warning + Please note that the `pyslurm` API is currently being completely reworked. + Reworked classes and functions that replace functionality of the old API + will be marked as such, with a link to the documentation of its old + counterpart. + + Old API functionality that is already replaced is marked as deprecated, + and will be removed at some point in the future. + + The new reworked classes will be tested thoroughly before making them + available here, although it is of course still possible that some bugs may + appear here and there, which we will try to identify as best as possible! + + In addition, since these classes are pretty new, their interface + (precisely: attribute names, return types) should not yet be considered + 100% stable, and changes may be made in rare cases if it makes sense to do + so. + + If you are using the new-style API, we would like to know your feedback on + it! + + +## Reworked Classes + +* Job API + * [pyslurm.Job][] + * [pyslurm.JobStep][] + * [pyslurm.JobSteps][] + * [pyslurm.Jobs][] + * [pyslurm.JobSubmitDescription][] +* Database Job API + * [pyslurm.db.Job][] + * [pyslurm.db.JobStep][] + * [pyslurm.db.Jobs][] + * [pyslurm.db.JobFilter][] +* Node API + * [pyslurm.Node][] + * [pyslurm.Nodes][] +* Partition API + * [pyslurm.Partition][] + * [pyslurm.Partitions][] +* New Exceptions + * [pyslurm.RPCError][] + * [pyslurm.PyslurmError][] +* New utility functions + * [pyslurm.utils][] diff --git a/docs/reference/job.md b/docs/reference/job.md new file mode 100644 index 00000000..cb1c19eb --- /dev/null +++ b/docs/reference/job.md @@ -0,0 +1,10 @@ +--- +title: Job +--- + +!!! note + This supersedes the [pyslurm.job](old/job.md) class, which will be + removed in a future release + +::: pyslurm.Job +::: pyslurm.Jobs diff --git a/docs/reference/jobstep.md b/docs/reference/jobstep.md new file mode 100644 index 00000000..b7b3e2b9 --- /dev/null +++ b/docs/reference/jobstep.md @@ -0,0 +1,10 @@ +--- +title: JobStep +--- + +!!! note + This supersedes the [pyslurm.jobstep](old/jobstep.md) class, which + will be removed in a future release + +::: pyslurm.JobStep +::: pyslurm.JobSteps diff --git a/docs/reference/jobsubmitdescription.md b/docs/reference/jobsubmitdescription.md new file mode 100644 index 00000000..bf7eb6bd --- /dev/null +++ b/docs/reference/jobsubmitdescription.md @@ -0,0 +1,5 @@ +--- +title: JobSubmitDescription +--- + +::: pyslurm.JobSubmitDescription diff --git a/docs/reference/node.md b/docs/reference/node.md new file mode 100644 index 00000000..e8e8d619 --- /dev/null +++ b/docs/reference/node.md @@ -0,0 +1,10 @@ +--- +title: Node +--- + +!!! note + This supersedes the [pyslurm.node](old/node.md) class, which will be + removed in a future release + +::: pyslurm.Node +::: pyslurm.Nodes diff --git a/docs/reference/old/.pages b/docs/reference/old/.pages new file mode 100644 index 00000000..ae2a9b18 --- /dev/null +++ b/docs/reference/old/.pages @@ -0,0 +1,3 @@ +hide: true +nav: + - ... diff --git a/docs/reference/old/db/.pages b/docs/reference/old/db/.pages new file mode 100644 index 00000000..ae2a9b18 --- /dev/null +++ b/docs/reference/old/db/.pages @@ -0,0 +1,3 @@ +hide: true +nav: + - ... diff --git a/docs/reference/old/db/job.md b/docs/reference/old/db/job.md new file mode 100644 index 00000000..4046026c --- /dev/null +++ b/docs/reference/old/db/job.md @@ -0,0 +1,10 @@ +--- +title: Job +--- + +!!! warning + This is superseded by [pyslurm.db.Job](../../db/job.md) class and will + be removed in a future release + +::: pyslurm.slurmdb_jobs + handler: python diff --git a/docs/reference/old/job.md b/docs/reference/old/job.md new file mode 100644 index 00000000..fb8f694a --- /dev/null +++ b/docs/reference/old/job.md @@ -0,0 +1,10 @@ +--- +title: Job +--- + +!!! warning + This class is superseded by [pyslurm.Job](../job.md) and will be removed + in a future release. + +::: pyslurm.job + handler: python diff --git a/docs/reference/old/jobstep.md b/docs/reference/old/jobstep.md new file mode 100644 index 00000000..2147e53b --- /dev/null +++ b/docs/reference/old/jobstep.md @@ -0,0 +1,10 @@ +--- +title: JobStep +--- + +!!! warning + This class is superseded by [pyslurm.JobStep](../jobstep.md) and will be + removed in a future release. + +::: pyslurm.jobstep + handler: python diff --git a/docs/reference/old/node.md b/docs/reference/old/node.md new file mode 100644 index 00000000..ec80324a --- /dev/null +++ b/docs/reference/old/node.md @@ -0,0 +1,10 @@ +--- +title: Node +--- + +!!! warning + This class is superseded by [pyslurm.Node](../node.md) and will be + removed in a future release. + +::: pyslurm.node + handler: python diff --git a/docs/reference/old/partition.md b/docs/reference/old/partition.md new file mode 100644 index 00000000..0e69bbfb --- /dev/null +++ b/docs/reference/old/partition.md @@ -0,0 +1,10 @@ +--- +title: Partition +--- + +!!! warning + This class is superseded by [pyslurm.Partition](../partition.md) and will + be removed in a future release. + +::: pyslurm.partition + handler: python diff --git a/docs/reference/partition.md b/docs/reference/partition.md new file mode 100644 index 00000000..9181e10f --- /dev/null +++ b/docs/reference/partition.md @@ -0,0 +1,10 @@ +--- +title: Partition +--- + +!!! note + This supersedes the [pyslurm.partition](old/partition.md) class, which + will be removed in a future release + +::: pyslurm.Partition +::: pyslurm.Partitions diff --git a/docs/reference/reservation.md b/docs/reference/reservation.md new file mode 100644 index 00000000..c5a3d891 --- /dev/null +++ b/docs/reference/reservation.md @@ -0,0 +1,9 @@ +--- +title: Reservation +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.reservation diff --git a/docs/reference/statistics.md b/docs/reference/statistics.md new file mode 100644 index 00000000..043461f8 --- /dev/null +++ b/docs/reference/statistics.md @@ -0,0 +1,9 @@ +--- +title: Statistics +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.statistics diff --git a/docs/reference/topology.md b/docs/reference/topology.md new file mode 100644 index 00000000..c6b8f9cc --- /dev/null +++ b/docs/reference/topology.md @@ -0,0 +1,9 @@ +--- +title: Topology +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.topology diff --git a/docs/reference/trigger.md b/docs/reference/trigger.md new file mode 100644 index 00000000..e6ea1e98 --- /dev/null +++ b/docs/reference/trigger.md @@ -0,0 +1,9 @@ +--- +title: Trigger +--- + +!!! warning + This API is currently being completely reworked, and is subject to be + removed in the future when a replacement is introduced + +::: pyslurm.trigger diff --git a/docs/reference/utilities.md b/docs/reference/utilities.md new file mode 100644 index 00000000..dbf4a09e --- /dev/null +++ b/docs/reference/utilities.md @@ -0,0 +1,19 @@ +--- +title: utils +--- + +::: pyslurm.utils + options: + members: [] + +::: pyslurm.utils.timestr_to_secs +::: pyslurm.utils.timestr_to_mins +::: pyslurm.utils.secs_to_timestr +::: pyslurm.utils.mins_to_timestr +::: pyslurm.utils.date_to_timestamp +::: pyslurm.utils.timestamp_to_date +::: pyslurm.utils.expand_range_str +::: pyslurm.utils.humanize +::: pyslurm.utils.dehumanize +::: pyslurm.utils.nodelist_from_range_str +::: pyslurm.utils.nodelist_to_range_str diff --git a/docs/reference/xcollections.md b/docs/reference/xcollections.md new file mode 100644 index 00000000..fd57ec09 --- /dev/null +++ b/docs/reference/xcollections.md @@ -0,0 +1,16 @@ +--- +title: xcollections +--- + +::: pyslurm.xcollections + handler: python + options: + members: + - MultiClusterMap + - BaseView + - KeysView + - MCKeysView + - ItemsView + - MCItemsView + - ValuesView + - ClustersView diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 00000000..eab891415 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,10 @@ +/* Maximum space for text block */ +.md-grid { + max-width: 75%; +} + +/* Indentation. */ +div.doc-contents:not(.first) { + padding-left: 25px; + border-left: .05rem solid var(--md-typeset-table-color); +} diff --git a/examples/reservation_list.py b/examples/reservation_list.py index 57e8398f..87e3a337 100755 --- a/examples/reservation_list.py +++ b/examples/reservation_list.py @@ -36,7 +36,7 @@ def display(res_dict): if res_value["start_time"] <= now <= res_value["end_time"]: resv_state = "ACTIVE" - print(f"\t{'state':-20s} : {resv_state}\n") + print(f"\t{'state':<20s} : {resv_state}\n") if __name__ == "__main__": diff --git a/doc/source/_static/pyslurm-docs.png b/logo.png similarity index 100% rename from doc/source/_static/pyslurm-docs.png rename to logo.png diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..9d81f66b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,78 @@ +site_dir: "site" +site_name: "pyslurm" +site_url: "https://pyslurm.github.io" +repo_url: "https://github.com/PySlurm/pyslurm" +repo_name: "PySlurm/pyslurm" +copyright: Copyright © 2023 PySlurm Developers + +nav: + - Home: + - Home: index.md + - Changelog: changelog.md + - ... + +theme: + name: "material" + logo: logo.png + features: + - navigation.sections + - navigation.indexes + - navigation.tabs + # - navigation.tabs.sticky + - navigation.top + - content.code.copy + - toc.follow + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + accent: purple + primary: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + accent: lime + primary: black + toggle: + icon: material/brightness-4 + name: Switch to light mode + font: + text: Roboto + code: Roboto Mono + +plugins: + - search + - awesome-pages + - autorefs + - mike + - mkdocstrings: + handlers: + python: + import: + - https://docs.python.org/3/objects.inv + options: + filters: ["!^_"] + docstring_style: google + show_signature: true + show_root_heading: true + show_symbol_type_toc: true + show_symbol_type_heading: true + +markdown_extensions: + - admonition + - pymdownx.snippets: + check_paths: true + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.superfences + - pymdownx.details + +extra: + version: + provider: mike +extra_css: + - stylesheets/extra.css diff --git a/pyproject.toml b/pyproject.toml index f6dd995d..1cb07d7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,6 @@ requires = [ "setuptools==59.2.0", "wheel==0.37.0", - "Cython>=0.29.30,<3.0", + "Cython>=0.29.30", ] diff --git a/pyslurm.spec b/pyslurm.spec index faccae97..0dadbbd0 100644 --- a/pyslurm.spec +++ b/pyslurm.spec @@ -1,6 +1,6 @@ # SPEC file taken from https://centos.pkgs.org/7/puias-computational-x86_64/python-pyslurm-17.02-1.gitab899c6.sdl7.x86_64.rpm.html Name: pyslurm -Version: 22.05.1 +Version: 23.2.2 %global rel 1 Release: %{rel}%{gittag}%{?dist}.ug Summary: PySlurm: Slurm Interface for Python @@ -24,7 +24,11 @@ Source: %{pyslurm_source_dir}.tar.gz BuildRequires: python3-Cython, python36-devel %global usepython python3 %global usepython_sitearch %{python3_sitearch} -%else +%elif 0%{?rhel} == 9 +BuildRequires: python3-Cython, python3-devel +%global usepython python3 +%global usepython_sitearch %{python3_sitearch} +%%else BuildRequires: Cython, python-devel %global usepython python %global usepython_sitearch %{python_sitearch} diff --git a/pyslurm/__init__.py b/pyslurm/__init__.py index 177bf7cb..4d3a5101 100644 --- a/pyslurm/__init__.py +++ b/pyslurm/__init__.py @@ -1,10 +1,6 @@ -""" -PySlurm: Python bindings for the Slurm C API -============================================ - -PySlurm is a Cython wrapper around Slurm C API functions. +"""pyslurm package -More information about Slurm can be found at https://slurm.schedmd.com. +pyslurm is a wrapper around the Slurm C-API. """ from __future__ import absolute_import @@ -13,9 +9,33 @@ sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL) +# Initialize slurm api +from pyslurm.api import slurm_init, slurm_fini +slurm_init() + from .pyslurm import * from .__version__ import __version__ +from pyslurm import db +from pyslurm import utils +from pyslurm import constants + +from pyslurm.core.job import ( + Job, + Jobs, + JobStep, + JobSteps, + JobSubmitDescription, +) +from pyslurm.core.node import Node, Nodes +from pyslurm.core.partition import Partition, Partitions +from pyslurm.core import error +from pyslurm.core.error import ( + PyslurmError, + RPCError, +) +from pyslurm.core import slurmctld + def version(): return __version__ diff --git a/pyslurm/__version__.py b/pyslurm/__version__.py index 81a9fc37..96711e72 100644 --- a/pyslurm/__version__.py +++ b/pyslurm/__version__.py @@ -1 +1 @@ -__version__ = "22.5.0" +__version__ = "23.2.2" diff --git a/pyslurm/api.pxd b/pyslurm/api.pxd new file mode 100644 index 00000000..b780fdba --- /dev/null +++ b/pyslurm/api.pxd @@ -0,0 +1,26 @@ +######################################################################### +# api.pxd - pyslurm core API +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.utils cimport cstr diff --git a/pyslurm/api.pyx b/pyslurm/api.pyx new file mode 100644 index 00000000..0f34fedb --- /dev/null +++ b/pyslurm/api.pyx @@ -0,0 +1,43 @@ +######################################################################### +# api.pyx - pyslurm core API +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +def slurm_init(config_path=None): + """Initialize the Slurm API. + + This function must be called first before certain RPC functions can be + executed. slurm_init is automatically called when the pyslurm module is + loaded. + + Args: + config_path (str, optional): + An absolute path to the slurm config file to use. The default is + None, so libslurm will automatically detect its config. + """ + slurm.slurm_init(cstr.from_unicode(config_path)) + + +def slurm_fini(): + """Clean up data structures previously allocated through slurm_init.""" + slurm.slurm_fini() diff --git a/pyslurm/constants.py b/pyslurm/constants.py new file mode 100644 index 00000000..0b3c11b0 --- /dev/null +++ b/pyslurm/constants.py @@ -0,0 +1,32 @@ +######################################################################### +# constants.py - pyslurm constants used throughout the project +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +"""pyslurm common Constants""" + + +UNLIMITED = "UNLIMITED" +""" +Represents an infinite/unlimited value. This is sometimes returned for +specific attributes as a value to indicate that there is no restriction for it. +""" diff --git a/pyslurm/core/__init__.pxd b/pyslurm/core/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/__init__.py b/pyslurm/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/error.pyx b/pyslurm/core/error.pyx new file mode 100644 index 00000000..a5924d08 --- /dev/null +++ b/pyslurm/core/error.pyx @@ -0,0 +1,111 @@ +######################################################################### +# error.pyx - pyslurm error utilities +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.utils cimport cstr +from pyslurm cimport slurm +from pyslurm.slurm cimport slurm_get_errno + + +def slurm_strerror(errno): + """Convert a slurm errno to a string. + + Args: + errno (int): + The error number for which the string representation should be + returned. + + Returns: + (str): String representation of errno. + """ + return cstr.to_unicode(slurm.slurm_strerror(errno)) + + +def slurm_errno(): + """Get the current slurm errno. + + Returns: + (int): Current slurm errno + """ + return slurm_get_errno() + + +def get_last_slurm_error(): + """Get the last slurm error that occured as a tuple of errno and string. + + Returns: + errno (int): The error number + errno_str (str): The errno converted to a String + """ + errno = slurm_errno() + + if errno == slurm.SLURM_SUCCESS: + return (errno, 'Success') + else: + return (errno, slurm_strerror(errno)) + + +class PyslurmError(Exception): + """The base Exception for all Pyslurm errors.""" + + +class RPCError(PyslurmError): + """Exception for handling Slurm RPC errors. + + Args: + errno (int): + A slurm error number returned by RPC functions. Default is None, + which will get the last slurm error automatically. + msg (str): + An optional, custom error description. If this is set, the errno + will not be translated to its string representation. + + Examples: + >>> import pyslurm + ... try: + ... myjob = pyslurm.Job.load(9999) + ... except pyslurm.RPCError as e: + ... print("Loading the Job failed") + """ + def __init__(self, errno=slurm.SLURM_ERROR, msg=None): + self.msg = msg + self.errno = errno + + if not msg: + if errno == slurm.SLURM_ERROR: + self.errno, self.msg = get_last_slurm_error() + else: + self.msg = slurm_strerror(errno) + + super().__init__(self.msg) + + +def verify_rpc(errno): + """Verify a Slurm RPC + + Args: + errno (int): + A Slurm error value + """ + if errno != slurm.SLURM_SUCCESS: + raise RPCError(errno) diff --git a/pyslurm/core/job/__init__.pxd b/pyslurm/core/job/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/job/__init__.py b/pyslurm/core/job/__init__.py new file mode 100644 index 00000000..ccc396e2 --- /dev/null +++ b/pyslurm/core/job/__init__.py @@ -0,0 +1,3 @@ +from .job import Job, Jobs +from .step import JobStep, JobSteps +from .submission import JobSubmitDescription diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd new file mode 100644 index 00000000..616db4c9 --- /dev/null +++ b/pyslurm/core/job/job.pxd @@ -0,0 +1,382 @@ +######################################################################### +# job.pyx - interface to retrieve slurm job informations +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.utils cimport cstr, ctime +from pyslurm.utils.uint cimport * +from pyslurm.utils.ctime cimport time_t +from libc.string cimport memcpy, memset +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, int64_t +from libc.stdlib cimport free +from pyslurm.core.job.submission cimport JobSubmitDescription +from pyslurm.core.job.step cimport JobSteps, JobStep +from pyslurm.xcollections cimport MultiClusterMap +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + working_cluster_rec, + slurm_msg_t, + job_id_msg_t, + slurm_msg_t_init, + return_code_msg_t, + slurm_send_recv_controller_msg, + slurm_free_return_code_msg, + slurm_free_job_info_msg, + slurm_free_job_info, + slurm_load_job, + slurm_load_jobs, + job_info_msg_t, + slurm_job_info_t, + slurm_job_state_string, + slurm_job_reason_string, + slurm_job_share_string, + slurm_job_batch_script, + slurm_get_job_stdin, + slurm_get_job_stdout, + slurm_get_job_stderr, + slurm_signal_job, + slurm_kill_job, + slurm_resume, + slurm_suspend, + slurm_update_job, + slurm_notify_job, + slurm_requeue, + xfree, + try_xmalloc, +) + + +cdef class Jobs(MultiClusterMap): + """A [`Multi Cluster`][pyslurm.xcollections.MultiClusterMap] collection of [pyslurm.Job][] objects. + + Args: + jobs (Union[list[int], dict[int, pyslurm.Job], str], optional=None): + Jobs to initialize this collection with. + frozen (bool, optional=False): + Control whether this collection is `frozen` when reloading Job + information. + + Attributes: + memory (int): + Total amount of memory for all Jobs in this collection, in + Mebibytes + cpus (int): + Total amount of cpus for all Jobs in this collection. + ntasks (int): + Total amount of tasks for all Jobs in this collection. + cpu_time (int): + Total amount of CPU-Time used by all the Jobs in the collection. + This is the result of multiplying the run_time with the amount of + cpus for each job. + frozen (bool): + If this is set to True and the `reload()` method is called, then + *ONLY* Jobs that already exist in this collection will be + reloaded. New Jobs that are discovered will not be added to this + collection, but old Jobs which have already been purged from the + Slurm controllers memory will not be removed either. + The default is False, so old jobs will be removed, and new Jobs + will be added - basically the same behaviour as doing Jobs.load(). + """ + cdef: + job_info_msg_t *info + slurm_job_info_t tmp_info + + cdef public: + frozen + + +cdef class Job: + """A Slurm Job. + + All attributes in this class are read-only. + + Args: + job_id (int): + An Integer representing a Job-ID. + + Attributes: + steps (JobSteps): + Steps this Job has. + Before you can access the Steps data for a Job, you have to call + the `reload()` method of a Job instance or the `load_steps()` + method of a Jobs collection. + name (str): + Name of the Job + id (int): + Unique ID of the Job. + association_id (int): + ID of the Association this Job runs with. + account (str): + Name of the Account this Job is runs with. + user_id (int): + UID of the User who submitted the Job. + user_name (str): + Name of the User who submitted the Job. + group_id (int): + GID of the Group that Job runs under. + group_name (str): + Name of the Group this Job runs under. + priority (int): + Priority of the Job. + nice (int): + Nice Value of the Job. + qos (str): + QOS Name of the Job. + min_cpus_per_node (int): + Minimum Amount of CPUs per Node the Job requested. + state (str): + State this Job is currently in. + state_reason (str): + A Reason explaining why the Job is in its current state. + is_requeueable (bool): + Whether the Job is requeuable or not. + requeue_count (int): + Amount of times the Job has been requeued. + is_batch_job (bool): + Whether the Job is a batch job or not. + node_reboot_required (bool): + Whether the Job requires the Nodes to be rebooted first. + dependencies (dict): + Dependencies the Job has to other Jobs. + time_limit (int): + Time-Limit, in minutes, for this Job. + time_limit_min (int): + Minimum Time-Limit in minutes for this Job. + submit_time (int): + Time the Job was submitted, as unix timestamp. + eligible_time (int): + Time the Job is eligible to start, as unix timestamp. + accrue_time (int): + Job accrue time, as unix timestamp + start_time (int): + Time this Job has started execution, as unix timestamp. + resize_time (int): + Time the job was resized, as unix timestamp. + deadline (int): + Time when a pending Job will be cancelled, as unix timestamp. + preempt_eligible_time (int): + Time the Job is eligible for preemption, as unix timestamp. + preempt_time (int): + Time the Job was signaled for preemption, as unix timestamp. + suspend_time (int): + Last Time the Job was suspended, as unix timestamp. + last_sched_evaluation_time (int): + Last time evaluated for Scheduling, as unix timestamp. + pre_suspension_time (int): + Amount of seconds the Job ran prior to suspension, as unix + timestamp + mcs_label (str): + MCS Label for the Job + partition (str): + Name of the Partition the Job runs in. + submit_host (str): + Name of the Host this Job was submitted from. + batch_host (str): + Name of the Host where the Batch-Script is executed. + num_nodes (int): + Amount of Nodes the Job has requested or allocated. + max_nodes (int): + Maximum amount of Nodes the Job has requested. + allocated_nodes (str): + Nodes the Job is currently using. + This is only valid when the Job is running. If the Job is pending, + it will always return None. + required_nodes (str): + Nodes the Job is explicitly requiring to run on. + excluded_nodes (str): + Nodes that are explicitly excluded for execution. + scheduled_nodes (str): + Nodes the Job is scheduled on by the slurm controller. + derived_exit_code (int): + The derived exit code for the Job. + derived_exit_code_signal (int): + Signal for the derived exit code. + exit_code (int): + Code with which the Job has exited. + exit_code_signal (int): + The signal which has led to the exit code of the Job. + batch_constraints (list): + Features that node(s) should have for the batch script. + Controls where it is possible to execute the batch-script of the + job. Also see 'constraints' + federation_origin (str): + Federation Origin + federation_siblings_active (int): + Federation siblings active + federation_siblings_viable (int): + Federation siblings viable + cpus (int): + Total amount of CPUs the Job is using. + If the Job is still pending, this will be the amount of requested + CPUs. + cpus_per_task (int): + Number of CPUs per Task used. + cpus_per_gpu (int): + Number of CPUs per GPU used. + boards_per_node (int): + Number of boards per Node. + sockets_per_board (int): + Number of sockets per board. + sockets_per_node (int): + Number of sockets per node. + cores_per_socket (int): + Number of cores per socket. + threads_per_core (int): + Number of threads per core. + ntasks (int): + Number of parallel processes. + ntasks_per_node (int): + Number of parallel processes per node. + ntasks_per_board (int): + Number of parallel processes per board. + ntasks_per_socket (int): + Number of parallel processes per socket. + ntasks_per_core (int): + Number of parallel processes per core. + ntasks_per_gpu (int): + Number of parallel processes per GPU. + delay_boot_time (int): + https://slurm.schedmd.com/sbatch.html#OPT_delay-boot, in minutes + constraints (list): + A list of features the Job requires nodes to have. + In contrast, the 'batch_constraints' option only focuses on the + initial batch-script placement. This option however means features + to restrict the list of nodes a job is able to execute on in + general beyond the initial batch-script. + cluster (str): + Name of the cluster the job is executing on. + cluster_constraints (list): + A List of features that a cluster should have. + reservation (str): + Name of the reservation this Job uses. + resource_sharing (str): + Mode controlling how a job shares resources with others. + requires_contiguous_nodes (bool): + Whether the Job has allocated a set of contiguous nodes. + licenses (list): + List of licenses the Job needs. + network (str): + Network specification for the Job. + command (str): + The command that is executed for the Job. + working_directory (str): + Path to the working directory for this Job. + admin_comment (str): + An arbitrary comment set by an administrator for the Job. + system_comment (str): + An arbitrary comment set by the slurmctld for the Job. + container (str): + The container this Job uses. + comment (str): + An arbitrary comment set for the Job. + standard_input (str): + The path to the file for the standard input stream. + standard_output (str): + The path to the log file for the standard output stream. + standard_error (str): + The path to the log file for the standard error stream. + required_switches (int): + Number of switches required. + max_wait_time_switches (int): + Amount of seconds to wait for the switches. + burst_buffer (str): + Burst buffer specification + burst_buffer_state (str): + Burst buffer state + cpu_frequency_min (Union[str, int]): + Minimum CPU-Frequency requested. + cpu_frequency_max (Union[str, int]): + Maximum CPU-Frequency requested. + cpu_frequency_governor (Union[str, int]): + CPU-Frequency Governor requested. + wckey (str): + Name of the WCKey this Job uses. + mail_user (list): + Users that should receive Mails for this Job. + mail_types (list): + Mail Flags specified by the User. + heterogeneous_id (int): + Heterogeneous job id. + heterogeneous_offset (int): + Heterogeneous job offset. + temporary_disk_per_node (int): + Temporary disk space in Mebibytes available per Node. + array_id (int): + The master Array-Job ID. + array_tasks_parallel (int): + Max number of array tasks allowed to run simultaneously. + array_task_id (int): + Array Task ID of this Job if it is an Array-Job. + array_tasks_waiting (str): + Array Tasks that are still waiting. + end_time (int): + Time at which this Job will end, as unix timestamp. + run_time (int): + Amount of seconds the Job has been running. + cores_reserved_for_system (int): + Amount of cores reserved for System use only. + threads_reserved_for_system (int): + Amount of Threads reserved for System use only. + memory (int): + Total Amount of Memory this Job has, in Mebibytes + memory_per_cpu (int): + Amount of Memory per CPU this Job has, in Mebibytes + memory_per_node (int): + Amount of Memory per Node this Job has, in Mebibytes + memory_per_gpu (int): + Amount of Memory per GPU this Job has, in Mebibytes + gres_per_node (dict): + Generic Resources (e.g. GPU) this Job is using per Node. + profile_types (list): + Types for which detailed accounting data is collected. + gres_binding (str): + Binding Enforcement of a Generic Resource (e.g. GPU). + kill_on_invalid_dependency (bool): + Whether the Job should be killed on an invalid dependency. + spreads_over_nodes (bool): + Whether the Job should be spreaded over as many nodes as possible. + power_options (list): + Options set for Power Management. + is_cronjob (bool): + Whether this Job is a cronjob. + cronjob_time (str): + The time specification for the Cronjob. + cpu_time (int): + Amount of CPU-Time used by the Job so far. + This is the result of multiplying the run_time with the amount of + cpus. + """ + cdef: + slurm_job_info_t *ptr + dict passwd + dict groups + + cdef public JobSteps steps + + cdef _calc_run_time(self) + + @staticmethod + cdef _swap_data(Job dst, Job src) + + @staticmethod + cdef Job from_ptr(slurm_job_info_t *in_ptr) + diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx new file mode 100644 index 00000000..e8e65d58 --- /dev/null +++ b/pyslurm/core/job/job.pyx @@ -0,0 +1,1308 @@ +######################################################################### +# job.pyx - interface to retrieve slurm job informations +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# Note: Some functions in this File are annotated with additional Copyright +# notices. These functions are: +# +# - get_batch_script +# - get_resource_layout_per_node +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS +import re +from typing import Union +from pyslurm.utils import cstr, ctime +from pyslurm.utils.uint import * +from pyslurm.core.job.util import * +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections +from pyslurm.core.error import ( + RPCError, + verify_rpc, + slurm_errno, +) +from pyslurm.utils.ctime import _raw_time +from pyslurm.utils.helpers import ( + uid_to_name, + gid_to_name, + signal_to_num, + _getgrall_to_dict, + _getpwall_to_dict, + instance_to_dict, + _get_exit_code, +) + + +cdef class Jobs(MultiClusterMap): + + def __cinit__(self): + self.info = NULL + + def __dealloc__(self): + slurm_free_job_info_msg(self.info) + + def __init__(self, jobs=None, frozen=False): + self.frozen = frozen + super().__init__(data=jobs, + typ="Jobs", + val_type=Job, + id_attr=Job.id, + key_type=int) + + @staticmethod + def load(preload_passwd_info=False, frozen=False): + """Retrieve all Jobs from the Slurm controller + + Args: + preload_passwd_info (bool, optional): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Job + where a UID/GID is translated to a name. If True, the + information will fetched and stored in each of the Job + instances. + frozen (bool, optional): + Decide whether this collection of Jobs should be frozen. + + Returns: + (pyslurm.Jobs): A collection of Job objects. + + Raises: + RPCError: When getting all the Jobs from the slurmctld failed. + + Examples: + >>> import pyslurm + >>> jobs = pyslurm.Jobs.load() + >>> print(jobs) + pyslurm.Jobs({1: pyslurm.Job(1), 2: pyslurm.Job(2)}) + >>> print(jobs[1]) + pyslurm.Job(1) + """ + cdef: + dict passwd = {} + dict groups = {} + Jobs jobs = Jobs(frozen=frozen) + int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL + Job job + + verify_rpc(slurm_load_jobs(0, &jobs.info, flags)) + + # If requested, preload the passwd and groups database to potentially + # speedup lookups for an attribute in a Job, e.g. user_name or + # group_name. + if preload_passwd_info: + passwd = _getpwall_to_dict() + groups = _getgrall_to_dict() + + # zero-out a dummy job_step_info_t + memset(&jobs.tmp_info, 0, sizeof(slurm_job_info_t)) + + # Put each job pointer into its own "Job" instance. + for cnt in range(jobs.info.record_count): + job = Job.from_ptr(&jobs.info.job_array[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out slurm_job_info_t. + jobs.info.job_array[cnt] = jobs.tmp_info + + if preload_passwd_info: + job.passwd = passwd + job.groups = groups + + cluster = job.cluster + if cluster not in jobs.data: + jobs.data[cluster] = {} + jobs[cluster][job.id] = job + + # We have extracted all pointers + jobs.info.record_count = 0 + jobs.frozen = frozen + return jobs + + def reload(self): + """Reload the information for jobs in a collection. + + Returns: + (pyslurm.Partitions): Returns self + + Raises: + RPCError: When getting the Jobs from the slurmctld failed. + """ + return xcollections.multi_reload(self, frozen=self.frozen) + + def load_steps(self): + """Load all Job steps for this collection of Jobs. + + This function fills in the `steps` attribute for all Jobs in the + collection. + + !!! note + + Pending Jobs will be ignored, since they don't have any Steps yet. + + Raises: + RPCError: When retrieving the Job information for all the Steps + failed. + """ + cdef dict steps = JobSteps.load_all() + for job in self.values(): + jid = job.id + if jid in steps: + job.steps = steps[jid] + + @property + def memory(self): + return xcollections.sum_property(self, Job.memory) + + @property + def cpus(self): + return xcollections.sum_property(self, Job.cpus) + + @property + def ntasks(self): + return xcollections.sum_property(self, Job.ntasks) + + @property + def cpu_time(self): + return xcollections.sum_property(self, Job.cpu_time) + + +cdef class Job: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, job_id): + self._alloc_impl() + self.ptr.job_id = job_id + self.passwd = {} + self.groups = {} + cstr.fmalloc(&self.ptr.cluster, LOCAL_CLUSTER) + self.steps = JobSteps() + + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc(sizeof(slurm_job_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for job_info_t") + + def _dealloc_impl(self): + slurm_free_job_info(self.ptr) + self.ptr = NULL + + def __dealloc__(self): + self._dealloc_impl() + + def __repr__(self): + return f'pyslurm.{self.__class__.__name__}({self.id})' + + @staticmethod + def load(job_id): + """Load information for a specific Job. + + Implements the slurm_load_job RPC. + + !!! note + + If the Job is not pending, the related Job steps will also be + loaded. + + Args: + job_id (int): + An Integer representing a Job-ID. + + Returns: + (pyslurm.Job): Returns a new Job instance + + Raises: + RPCError: If requesting the Job information from the slurmctld was + not successful. + + Examples: + >>> import pyslurm + >>> job = pyslurm.Job.load(9999) + """ + cdef: + job_info_msg_t *info = NULL + Job wrap = None + + try: + verify_rpc(slurm_load_job(&info, job_id, slurm.SHOW_DETAIL)) + + if info and info.record_count: + wrap = Job.from_ptr(&info.job_array[0]) + info.record_count = 0 + + if not slurm.IS_JOB_PENDING(wrap.ptr): + # Just ignore if the steps couldn't be loaded here. + try: + wrap.steps = JobSteps._load_single(wrap) + except RPCError: + pass + else: + raise RPCError(msg=f"RPC was successful but got no job data, " + "this should never happen") + except Exception as e: + raise e + finally: + slurm_free_job_info_msg(info) + + return wrap + + @staticmethod + cdef Job from_ptr(slurm_job_info_t *in_ptr): + cdef Job wrap = Job.__new__(Job) + wrap._alloc_impl() + wrap.passwd = {} + wrap.groups = {} + wrap.steps = JobSteps.__new__(JobSteps) + memcpy(wrap.ptr, in_ptr, sizeof(slurm_job_info_t)) + return wrap + + cdef _swap_data(Job dst, Job src): + cdef slurm_job_info_t *tmp = NULL + if dst.ptr and src.ptr: + tmp = dst.ptr + dst.ptr = src.ptr + src.ptr = tmp + + def as_dict(self): + return self.to_dict() + + def to_dict(self): + """Job information formatted as a dictionary. + + Returns: + (dict): Job information as dict + """ + cdef dict out = instance_to_dict(self) + out["steps"] = self.steps.to_dict() + return out + + def send_signal(self, signal, steps="children", hurry=False): + """Send a signal to a running Job. + + Implements the slurm_signal_job RPC. + + Args: + signal (Union[str, int]): + Any valid signal which will be sent to the Job. Can be either + a str like `SIGUSR1`, or simply an [int][]. + steps (str): + Selects which steps should be signaled. Valid values for this + are: `all`, `batch` and `children`. The default value is + `children`, where all steps except the batch-step will be + signaled. + The value `batch` in contrast means, that only the batch-step + will be signaled. With `all` every step is signaled. + hurry (bool): + If True, no burst buffer data will be staged out. The default + value is False. + + Raises: + RPCError: When sending the signal was not successful. + + Examples: + Specifying the signal as a string: + + >>> from pyslurm import Job + >>> Job(9999).send_signal("SIGUSR1") + + or passing in a numeric signal: + + >>> Job(9999).send_signal(9) + """ + cdef uint16_t flags = 0 + + if steps.casefold() == "all": + flags |= slurm.KILL_FULL_JOB + elif steps.casefold() == "batch": + flags |= slurm.KILL_JOB_BATCH + + if hurry: + flags |= slurm.KILL_HURRY + + sig = signal_to_num(signal) + slurm_kill_job(self.id, sig, flags) + + # Ignore errors when the Job is already done or when SIGKILL was + # specified and the job id is already purged from slurmctlds memory. + errno = slurm_errno() + if (errno == slurm.ESLURM_ALREADY_DONE + or errno == slurm.ESLURM_INVALID_JOB_ID and sig == 9): + pass + else: + verify_rpc(errno) + + def cancel(self): + """Cancel a Job. + + Implements the slurm_kill_job RPC. + + Raises: + RPCError: When cancelling the Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Job(9999).cancel() + """ + self.send_signal(9) + + def suspend(self): + """Suspend a running Job. + + Implements the slurm_suspend RPC. + + Raises: + RPCError: When suspending the Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Job(9999).suspend() + """ + # TODO: Report as a misbehaviour to schedmd that slurm_suspend is not + # correctly returning error code when it cannot find the job in + # _slurm_rpc_suspend it should return ESLURM_INVALID_JOB_ID, but + # returns -1 + # https://github.com/SchedMD/slurm/blob/master/src/slurmctld/proc_req.c#L4693 + verify_rpc(slurm_suspend(self.id)) + + def unsuspend(self): + """Unsuspend a currently suspended Job. + + Implements the slurm_resume RPC. + + Raises: + RPCError: When unsuspending the Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Job(9999).unsuspend() + """ + # Same problem as described in suspend() + verify_rpc(slurm_resume(self.id)) + + def modify(self, JobSubmitDescription changes): + """Modify a Job. + + Implements the slurm_update_job RPC. + + Args: + changes (pyslurm.JobSubmitDescription): + A JobSubmitDescription object which contains all the + modifications that should be done on the Job. + + Raises: + RPCError: When updating the Job was not successful. + + Examples: + >>> import pyslurm + >>> + >>> # Setting the new time-limit to 20 days + >>> changes = pyslurm.JobSubmitDescription(time_limit="20-00:00:00") + >>> pyslurm.Job(9999).modify(changes) + """ + changes._create_job_submit_desc(is_update=True) + changes.ptr.job_id = self.id + verify_rpc(slurm_update_job(changes.ptr)) + + def hold(self, mode=None): + """Hold a currently pending Job, preventing it from being scheduled. + + Args: + mode (str): + Determines in which mode the Job should be held. Possible + values are `user` or `admin`. By default, the Job is held in + `admin` mode, meaning only an Administrator will be able to + release the Job again. If you specify the mode as `user`, the + User will also be able to release the job. + + Raises: + RPCError: When holding the Job was not successful. + + Examples: + >>> import pyslurm + >>> + >>> # Holding a Job (in "admin" mode by default) + >>> pyslurm.Job(9999).hold() + >>> + >>> # Holding a Job in "user" mode + >>> pyslurm.Job(9999).hold(mode="user") + """ + cdef JobSubmitDescription job_sub = JobSubmitDescription(priority=0) + + if mode and mode.casefold() == "user": + job_sub.ptr.alloc_sid = slurm.ALLOC_SID_USER_HOLD + + self.modify(job_sub) + + def release(self): + """Release a currently held Job, allowing it to be scheduled again. + + Raises: + RPCError: When releasing a held Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Job(9999).release() + """ + self.modify(JobSubmitDescription(priority=slurm.INFINITE)) + + def requeue(self, hold=False): + """Requeue a currently running Job. + + Implements the slurm_requeue RPC. + + Args: + hold (bool, optional): + Controls whether the Job should be put in a held state or not. + Default for this is `False`, so it will not be held. + + Raises: + RPCError: When requeing the Job was not successful. + + Examples: + >>> import pyslurm + >>> + >>> # Requeing a Job while allowing it to be + >>> # scheduled again immediately + >>> pyslurm.Job(9999).requeue() + >>> + >>> # Requeing a Job while putting it in a held state + >>> pyslurm.Job(9999).requeue(hold=True) + """ + cdef uint32_t flags = 0 + + if hold: + flags |= slurm.JOB_REQUEUE_HOLD + + verify_rpc(slurm_requeue(self.id, flags)) + + def notify(self, msg): + """Sends a message to the Jobs stdout. + + Implements the slurm_notify_job RPC. + + Args: + msg (str): + The message that should be sent. + + Raises: + RPCError: When sending the message to the Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Job(9999).notify("Hello Friends!") + """ + verify_rpc(slurm_notify_job(self.id, msg)) + + def get_batch_script(self): + """Return the content of the script for a Batch-Job. + + Returns: + (str): The content of the batch script. + + Raises: + RPCError: When retrieving the Batch-Script for the Job was not + successful. + + Examples: + >>> import pyslurm + >>> script = pyslurm.Job(9999).get_batch_script() + """ + # The code for this function was taken from here: + # https://github.com/SchedMD/slurm/blob/7162f15af8deaf02c3bbf940d59e818cdeb5c69d/src/api/job_info.c#L1319 + # and therefore reimplements the slurm_job_batch_script API call, with + # slight modifications (e.g. Cython syntax). Otherwise we would have + # to parse the FILE* ptr we get from it back into a char* which + # would be a bit silly. + # + # The copyright notices for the file this function was taken from is + # included below: + # + # Portions Copyright (C) 2010-2017 SchedMD LLC . + # Copyright (C) 2002-2007 The Regents of the University of California. + # Copyright (C) 2008-2010 Lawrence Livermore National Security. + # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + # Written by Morris Jette et. al. + # CODE-OCEC-09-009. All rights reserved. + # + # Slurm is licensed under the GNU General Public License. For the full + # text of Slurm's License, please see here: + # pyslurm/slurm/SLURM_LICENSE + # + # Please, as mentioned above, also have a look at Slurm's DISCLAIMER + # under pyslurm/slurm/SLURM_DISCLAIMER + cdef: + job_id_msg_t msg + slurm_msg_t req + slurm_msg_t resp + int rc = slurm.SLURM_SUCCESS + str script = None + + slurm_msg_t_init(&req) + slurm_msg_t_init(&resp) + + memset(&msg, 0, sizeof(msg)) + msg.job_id = self.id + req.msg_type = slurm.REQUEST_BATCH_SCRIPT + req.data = &msg + + rc = slurm_send_recv_controller_msg(&req, &resp, working_cluster_rec) + verify_rpc(rc) + + if resp.msg_type == slurm.RESPONSE_BATCH_SCRIPT: + script = cstr.to_unicode(resp.data) + xfree(resp.data) + elif resp.msg_type == slurm.RESPONSE_SLURM_RC: + rc = ( resp.data).return_code + slurm_free_return_code_msg(resp.data) + verify_rpc(rc) + else: + verify_rpc(slurm.SLURM_ERROR) + + return script + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def id(self): + return self.ptr.job_id + + @property + def association_id(self): + return u32_parse(self.ptr.assoc_id) + + @property + def account(self): + return cstr.to_unicode(self.ptr.account) + + @property + def user_id(self): + return u32_parse(self.ptr.user_id, zero_is_noval=False) + + @property + def user_name(self): + return uid_to_name(self.ptr.user_id, lookup=self.passwd) + + @property + def group_id(self): + return u32_parse(self.ptr.group_id, zero_is_noval=False) + + @property + def group_name(self): + return gid_to_name(self.ptr.group_id, lookup=self.groups) + + @property + def priority(self): + return u32_parse(self.ptr.priority, zero_is_noval=False) + + @property + def nice(self): + if self.ptr.nice == slurm.NO_VAL: + return None + + return self.ptr.nice - slurm.NICE_OFFSET + + @property + def qos(self): + return cstr.to_unicode(self.ptr.qos) + + @property + def min_cpus_per_node(self): + return u32_parse(self.ptr.pn_min_cpus) + + # I don't think this is used anymore - there is no way in sbatch to ask + # for a "maximum cpu" count, so it will always be empty. + # @property + # def max_cpus(self): + # """Maximum Amount of CPUs the Job requested.""" + # return u32_parse(self.ptr.max_cpus) + + @property + def state(self): + return cstr.to_unicode(slurm_job_state_string(self.ptr.job_state)) + + @property + def state_reason(self): + if self.ptr.state_desc: + return cstr.to_unicode(self.ptr.state_desc) + + return cstr.to_unicode(slurm_job_reason_string(self.ptr.state_reason)) + + @property + def is_requeueable(self): + return u16_parse_bool(self.ptr.requeue) + + @property + def requeue_count(self): + return u16_parse(self.ptr.restart_cnt, on_noval=0) + + @property + def is_batch_job(self): + return u16_parse_bool(self.ptr.batch_flag) + + @property + def requires_node_reboot(self): + return u8_parse_bool(self.ptr.reboot) + + @property + def dependencies(self): + return dependency_str_to_dict(cstr.to_unicode(self.ptr.dependency)) + + @property + def time_limit(self): + return _raw_time(self.ptr.time_limit) + + @property + def time_limit_min(self): + return _raw_time(self.ptr.time_min) + + @property + def submit_time(self): + return _raw_time(self.ptr.submit_time) + + @property + def eligible_time(self): + return _raw_time(self.ptr.eligible_time) + + @property + def accrue_time(self): + return _raw_time(self.ptr.accrue_time) + + @property + def start_time(self): + return _raw_time(self.ptr.start_time) + + @property + def resize_time(self): + return _raw_time(self.ptr.resize_time) + + @property + def deadline(self): + return _raw_time(self.ptr.deadline) + + @property + def preempt_eligible_time(self): + return _raw_time(self.ptr.preemptable_time) + + @property + def preempt_time(self): + return _raw_time(self.ptr.preempt_time) + + @property + def suspend_time(self): + return _raw_time(self.ptr.suspend_time) + + @property + def last_sched_evaluation_time(self): + return _raw_time(self.ptr.last_sched_eval) + + @property + def pre_suspension_time(self): + return _raw_time(self.ptr.pre_sus_time) + + @property + def mcs_label(self): + return cstr.to_unicode(self.ptr.mcs_label) + + @property + def partition(self): + return cstr.to_unicode(self.ptr.partition) + + @property + def submit_host(self): + return cstr.to_unicode(self.ptr.alloc_node) + + @property + def batch_host(self): + return cstr.to_unicode(self.ptr.batch_host) + + @property + def num_nodes(self): + return u32_parse(self.ptr.num_nodes) + + @property + def max_nodes(self): + return u32_parse(self.ptr.max_nodes) + + @property + def allocated_nodes(self): + return cstr.to_unicode(self.ptr.nodes) + + @property + def required_nodes(self): + return cstr.to_unicode(self.ptr.req_nodes) + + @property + def excluded_nodes(self): + return cstr.to_unicode(self.ptr.exc_nodes) + + @property + def scheduled_nodes(self): + return cstr.to_unicode(self.ptr.sched_nodes) + + @property + def derived_exit_code(self): + ec, _ = _get_exit_code(self.ptr.derived_ec) + return ec + + @property + def derived_exit_code_signal(self): + _, sig = _get_exit_code(self.ptr.derived_ec) + return sig + + @property + def exit_code(self): + ec, _ = _get_exit_code(self.ptr.exit_code) + return ec + + @property + def exit_code_signal(self): + _, sig = _get_exit_code(self.ptr.exit_code) + return sig + + @property + def batch_constraints(self): + return cstr.to_list(self.ptr.batch_features) + + @property + def federation_origin(self): + return cstr.to_unicode(self.ptr.fed_origin_str) + + @property + def federation_siblings_active(self): + return u64_parse(self.ptr.fed_siblings_active) + + @property + def federation_siblings_viable(self): + return u64_parse(self.ptr.fed_siblings_viable) + + @property + def cpus(self): + return u32_parse(self.ptr.num_cpus, on_noval=1) + + @property + def cpus_per_task(self): + if self.ptr.cpus_per_tres: + return None + + return u16_parse(self.ptr.cpus_per_task, on_noval=1) + + @property + def cpus_per_gpu(self): + if (not self.ptr.cpus_per_tres + or self.ptr.cpus_per_task != slurm.NO_VAL16): + return None + + # TODO: Make a function that, given a GRES type, safely extracts its + # value from the string. + val = cstr.to_unicode(self.ptr.cpus_per_tres).split(":")[2] + return u16_parse(val) + + @property + def boards_per_node(self): + return u16_parse(self.ptr.boards_per_node) + + @property + def sockets_per_board(self): + return u16_parse(self.ptr.sockets_per_board) + + @property + def sockets_per_node(self): + return u16_parse(self.ptr.sockets_per_node) + + @property + def cores_per_socket(self): + return u16_parse(self.ptr.cores_per_socket) + + @property + def threads_per_core(self): + return u16_parse(self.ptr.threads_per_core) + + @property + def ntasks(self): + return u32_parse(self.ptr.num_tasks, on_noval=1) + + @property + def ntasks_per_node(self): + return u16_parse(self.ptr.ntasks_per_node) + + @property + def ntasks_per_board(self): + return u16_parse(self.ptr.ntasks_per_board) + + @property + def ntasks_per_socket(self): + return u16_parse(self.ptr.ntasks_per_socket) + + @property + def ntasks_per_core(self): + return u16_parse(self.ptr.ntasks_per_core) + + @property + def ntasks_per_gpu(self): + return u16_parse(self.ptr.ntasks_per_tres) + + @property + def delay_boot_time(self): + return _raw_time(self.ptr.delay_boot) + + @property + def constraints(self): + return cstr.to_list(self.ptr.features) + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster) + + @property + def cluster_constraints(self): + return cstr.to_list(self.ptr.cluster_features) + + @property + def reservation(self): + return cstr.to_unicode(self.ptr.resv_name) + + @property + def resource_sharing(self): + return cstr.to_unicode(slurm_job_share_string(self.ptr.shared)) + + @property + def requires_contiguous_nodes(self): + return u16_parse_bool(self.ptr.contiguous) + + @property + def licenses(self): + return cstr.to_list(self.ptr.licenses) + + @property + def network(self): + return cstr.to_unicode(self.ptr.network) + + @property + def command(self): + return cstr.to_unicode(self.ptr.command) + + @property + def working_directory(self): + return cstr.to_unicode(self.ptr.work_dir) + + @property + def admin_comment(self): + return cstr.to_unicode(self.ptr.admin_comment) + + @property + def system_comment(self): + return cstr.to_unicode(self.ptr.system_comment) + + @property + def container(self): + return cstr.to_unicode(self.ptr.container) + + @property + def comment(self): + return cstr.to_unicode(self.ptr.comment) + + @property + def standard_input(self): + cdef char tmp[1024] + slurm_get_job_stdin(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def standard_output(self): + cdef char tmp[1024] + slurm_get_job_stdout(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def standard_error(self): + cdef char tmp[1024] + slurm_get_job_stderr(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def required_switches(self): + return u32_parse(self.ptr.req_switch) + + @property + def max_wait_time_switches(self): + return _raw_time(self.ptr.wait4switch) + + @property + def burst_buffer(self): + return cstr.to_unicode(self.ptr.burst_buffer) + + @property + def burst_buffer_state(self): + return cstr.to_unicode(self.ptr.burst_buffer_state) + + @property + def cpu_frequency_min(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_min) + + @property + def cpu_frequency_max(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_max) + + @property + def cpu_frequency_governor(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_gov) + + # @property + # def tres_bindings(self): + # """str: ?""" + # # TODO: Find out how it works + # return cstr.to_unicode(self.ptr.tres_bind) + + # @property + # def tres_frequency(self): + # """?""" + # # TODO: Find out how it works + # return cstr.to_unicode(self.ptr.tres_freq) + + @property + def wckey(self): + return cstr.to_unicode(self.ptr.wckey) + + @property + def mail_user(self): + return cstr.to_list(self.ptr.mail_user) + + @property + def mail_types(self): + return mail_type_int_to_list(self.ptr.mail_type) + + @property + def heterogeneous_id(self): + return u32_parse(self.ptr.het_job_id, noval=0) + + @property + def heterogeneous_offset(self): + return u32_parse(self.ptr.het_job_offset, noval=0) + + # @property + # def hetjob_component_ids(self): + # """str: ?""" + # # TODO: Find out how to parse it in a more proper way? + # return cstr.to_unicode(self.ptr.het_job_id_set) + + @property + def temporary_disk_per_node(self): + return u32_parse(self.ptr.pn_min_tmp_disk) + + @property + def array_id(self): + return u32_parse(self.ptr.array_job_id) + + @property + def array_tasks_parallel(self): + return u32_parse(self.ptr.array_max_tasks) + + @property + def array_task_id(self): + return u32_parse(self.ptr.array_task_id) + + @property + def array_tasks_waiting(self): + task_str = cstr.to_unicode(self.ptr.array_task_str) + if not task_str: + return None + + if "%" in task_str: + # We don't want this % character and everything after it + # in here, so remove it. + task_str = task_str[:task_str.rindex("%")] + + return task_str + + @property + def end_time(self): + return _raw_time(self.ptr.end_time) + + # https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L480 + cdef _calc_run_time(self): + cdef time_t rtime + cdef time_t etime + + if slurm.IS_JOB_PENDING(self.ptr) or not self.ptr.start_time: + return 0 + elif slurm.IS_JOB_SUSPENDED(self.ptr): + return self.pre_suspension_time + else: + if slurm.IS_JOB_RUNNING(self.ptr) or self.ptr.end_time == 0: + etime = ctime.time(NULL) + else: + etime = self.ptr.end_time + + if self.ptr.suspend_time: + rtime = ctime.difftime(etime, self.ptr.suspend_time) + rtime += self.ptr.pre_sus_time + else: + rtime = ctime.difftime(etime, self.ptr.start_time) + + return u64_parse(rtime, on_noval=0) + + @property + def run_time(self): + return self._calc_run_time() + + @property + def cores_reserved_for_system(self): + if self.ptr.core_spec != slurm.NO_VAL16: + if not self.ptr.core_spec & slurm.CORE_SPEC_THREAD: + return self.ptr.core_spec + + @property + def threads_reserved_for_system(self): + if self.ptr.core_spec != slurm.NO_VAL16: + if self.ptr.core_spec & slurm.CORE_SPEC_THREAD: + return self.ptr.core_spec & (~slurm.CORE_SPEC_THREAD) + + @property + def memory(self): + mem_cpu = self.memory_per_cpu + if mem_cpu is not None: + total_cpus = self.cpus + if total_cpus is not None: + mem_cpu *= total_cpus + return mem_cpu + + mem_node = self.memory_per_node + if mem_node is not None: + num_nodes = self.num_nodes + if num_nodes is not None: + mem_node *= num_nodes + return mem_node + + + # TODO + # mem_gpu = self.memory_per_gpu + # if mem_gpu is not None: + # num_nodes = self.min_nodes + # if num_nodes is not None: + # mem_node *= num_nodes + # return mem_cpu + + return None + + @property + def memory_per_cpu(self): + if self.ptr.pn_min_memory != slurm.NO_VAL64: + if self.ptr.pn_min_memory & slurm.MEM_PER_CPU: + mem = self.ptr.pn_min_memory & (~slurm.MEM_PER_CPU) + return u64_parse(mem) + else: + return None + + @property + def memory_per_node(self): + if self.ptr.pn_min_memory != slurm.NO_VAL64: + if not self.ptr.pn_min_memory & slurm.MEM_PER_CPU: + return u64_parse(self.ptr.pn_min_memory) + else: + return None + + @property + def memory_per_gpu(self): + if self.ptr.mem_per_tres and self.ptr.pn_min_memory == slurm.NO_VAL64: + # TODO: Make a function that, given a GRES type, safely extracts + # its value from the string. + mem = int(cstr.to_unicode(self.ptr.mem_per_tres).split(":")[2]) + return u64_parse(mem) + else: + return None + + @property + def gres_per_node(self): + return cstr.to_gres_dict(self.ptr.tres_per_node) + + @property + def profile_types(self): + return acctg_profile_int_to_list(self.ptr.profile) + + @property + def gres_binding(self): + if self.ptr.bitflags & slurm.GRES_ENFORCE_BIND: + return "enforce-binding" + elif self.ptr.bitflags & slurm.GRES_DISABLE_BIND: + return "disable-binding" + else: + return None + + @property + def kill_on_invalid_dependency(self): + return u64_parse_bool_flag(self.ptr.bitflags, slurm.KILL_INV_DEP) + + @property + def spreads_over_nodes(self): + return u64_parse_bool_flag(self.ptr.bitflags, slurm.SPREAD_JOB) + + @property + def power_options(self): + return power_type_int_to_list(self.ptr.power_flags) + + @property + def is_cronjob(self): + return u64_parse_bool_flag(self.ptr.bitflags, slurm.CRON_JOB) + + @property + def cronjob_time(self): + return cstr.to_unicode(self.ptr.cronspec) + + @property + def cpu_time(self): + return self.cpus * self.run_time + + @property + def pending_time(self): + # TODO + return None + + @property + def run_time_left(self): + # TODO + return None + + def get_resource_layout_per_node(self): + """Retrieve the resource layout of this Job on each node. + + !!! warning + + Return type may still be subject to change in the future + + Returns: + (dict): Resource layout, where the key is the name of the node and + the value another dict with the keys `cpu_ids`, `memory` and + `gres`. + """ + # The code for this function is a modified reimplementation from here: + # https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L739 + # + # The copyright notices for the file that contains the original code + # is below: + # + # Portions Copyright (C) 2010-2017 SchedMD LLC . + # Copyright (C) 2002-2007 The Regents of the University of California. + # Copyright (C) 2008-2010 Lawrence Livermore National Security. + # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + # Written by Morris Jette et. al. + # CODE-OCEC-09-009. All rights reserved. + # + # Slurm is licensed under the GNU General Public License. For the full + # text of Slurm's License, please see here: + # pyslurm/slurm/SLURM_LICENSE + # + # Please, as mentioned above, also have a look at Slurm's DISCLAIMER + # under pyslurm/slurm/SLURM_DISCLAIMER + # + # TODO: Explain the structure of the return value a bit more. + cdef: + slurm.job_resources *resources = self.ptr.job_resrcs + slurm.hostlist_t hl + uint32_t rel_node_inx + int bit_inx = 0 + int bit_reps = 0 + int sock_inx = 0 + uint32_t sock_reps = 0 + int i = 0, j + uint32_t k = 0 + char *host + char *gres = NULL + slurm.bitstr_t *cpu_bitmap + char cpu_bitmap_str[128] + uint32_t threads + dict output = {} + + if not resources or not resources.core_bitmap: + return output + + hl = slurm.slurm_hostlist_create(resources.nodes) + if not hl: + raise ValueError("Unable to create hostlist.") + + for rel_node_inx in range(resources.nhosts): + # Check how many consecutive nodes have the same cpu allocation + # layout. + if sock_reps >= resources.sock_core_rep_count[sock_inx]: + sock_inx += 1 + sock_reps = 0 + sock_reps += 1 + + # Get the next node from the list of nodenames + host = slurm.slurm_hostlist_shift(hl) + + # How many rounds we have to do in order to calculate the complete + # cpu bitmap. + bit_reps = (resources.sockets_per_node[sock_inx] + * resources.cores_per_socket[sock_inx]) + + # Calculate the amount of threads per core this job has on the + # specific host. + threads = _threads_per_core(host) + + # Allocate a new, big enough cpu bitmap + cpu_bitmap = slurm.slurm_bit_alloc(bit_reps * threads) + + # Calculate the cpu bitmap for this host. + for j in range(bit_reps): + if slurm.slurm_bit_test(resources.core_bitmap, bit_inx): + for k in range(threads): + slurm.slurm_bit_set(cpu_bitmap, (j*threads)+k) + bit_inx += 1 + + # Extract the cpu bitmap into a char *cpu_bitmap_str + slurm.slurm_bit_fmt(cpu_bitmap_str, + sizeof(cpu_bitmap_str), cpu_bitmap) + slurm.slurm_bit_free(&cpu_bitmap) + + nodename = cstr.to_unicode(host) + cpu_ids = cstr.to_unicode(cpu_bitmap_str) + mem = None + + if rel_node_inx < self.ptr.gres_detail_cnt: + gres = self.ptr.gres_detail_str[rel_node_inx] + + if resources.memory_allocated: + mem = u64_parse(resources.memory_allocated[rel_node_inx]) + + if nodename: + output[nodename] = { + "cpu_ids": cpu_ids, + "gres": cstr.to_gres_dict(gres), + "memory": mem, + } + + free(host) + + slurm.slurm_hostlist_destroy(hl) + return output + + +# https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L99 +cdef _threads_per_core(char *host): + # TODO + return 1 diff --git a/pyslurm/core/job/sbatch_opts.pyx b/pyslurm/core/job/sbatch_opts.pyx new file mode 100644 index 00000000..c6e0b400 --- /dev/null +++ b/pyslurm/core/job/sbatch_opts.pyx @@ -0,0 +1,204 @@ +######################################################################### +# sbatch_opt.pyx - utilities to parse #SBATCH options +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +import re +from pathlib import Path + +SBATCH_MAGIC = "#SBATCH" + + +class _SbatchOpt(): + def __init__(self, short_opt, long_opt, + our_attr_name, attr_param=None, is_boolean=False, + has_optional_args=False): + self.short_opt = short_opt + self.long_opt = long_opt + self.our_attr_name = our_attr_name + self.attr_param = attr_param + self.is_boolean = is_boolean + self.has_optional_args = has_optional_args + + +# Sorted by occurence in the sbatch manpage - keep in order. +SBATCH_OPTIONS = [ + _SbatchOpt("A", "account", "account"), + _SbatchOpt(None, "acctg-freq", "accounting_gather_frequency"), + _SbatchOpt("a", "array", "array"), + _SbatchOpt(None, "batch", "batch_constraints"), + _SbatchOpt(None, "bb", "burst_buffer"), + _SbatchOpt(None, "bbf", "burst_buffer_file"), + _SbatchOpt("b", "begin", "begin_time"), + _SbatchOpt("D", "chdir", "working_directory"), + _SbatchOpt(None, "cluster-constraint", "cluster_constraints"), + _SbatchOpt("M", "clusters", "clusters"), + _SbatchOpt(None, "comment","comment"), + _SbatchOpt("C", "constraint", "constraints"), + _SbatchOpt(None, "container", "container"), + _SbatchOpt(None, "contiguous", "requires_contiguous_nodes"), + _SbatchOpt("S", "core-spec", "cores_reserved_for_system"), + _SbatchOpt(None, "cores-per-socket", "cores_per_socket"), + _SbatchOpt(None, "cpu-freq", "cpu_frequency"), + _SbatchOpt(None, "cpus-per-gpu", "cpus_per_gpu"), + _SbatchOpt("c", "cpus-per-task", "cpus_per_task"), + _SbatchOpt(None, "deadline", "deadline"), + _SbatchOpt(None, "delay-boot", "delay_boot_time"), + _SbatchOpt("d", "dependency", "dependencies"), + _SbatchOpt("m", "distribution", "distribution"), + _SbatchOpt("e", "error", "standard_error"), + _SbatchOpt("x", "exclude", "excluded_nodes"), + _SbatchOpt(None, "exclusive", "resource_sharing", "no"), + _SbatchOpt(None, "export", "environment"), + _SbatchOpt(None, "export-file", None), + _SbatchOpt("B", "extra-node-info", None), + _SbatchOpt(None, "get-user-env", "get_user_environment"), + _SbatchOpt(None, "gid", "group_id"), + _SbatchOpt(None, "gpu-bind", "gpu_binding"), + _SbatchOpt(None, "gpu-freq", None), + _SbatchOpt("G", "gpus", "gpus"), + _SbatchOpt(None, "gpus-per-node", "gpus_per_node"), + _SbatchOpt(None, "gpus-per-socket", "gpus_per_socket"), + _SbatchOpt(None, "gpus-per-socket", "gpus_per_task"), + _SbatchOpt(None, "gres", "gres_per_node"), + _SbatchOpt(None, "gres-flags", "gres_binding"), + _SbatchOpt(None, "hint", None), + _SbatchOpt("H", "hold", "priority", 0), + _SbatchOpt(None, "ignore-pbs", None), + _SbatchOpt("i", "input", "standard_in"), + _SbatchOpt("J", "job-name", "name"), + _SbatchOpt(None, "kill-on-invalid-dep", "kill_on_invalid_dependency"), + _SbatchOpt("L", "licenses", "licenses"), + _SbatchOpt(None, "mail-type", "mail_types"), + _SbatchOpt(None, "mail-user", "mail_user"), + _SbatchOpt(None, "mcs-label", "mcs_label"), + _SbatchOpt(None, "mem", "memory_per_node"), + _SbatchOpt(None, "mem-bind", None), + _SbatchOpt(None, "mem-per-cpu", "memory_per_cpu"), + _SbatchOpt(None, "mem-per-gpu", "memory_per_gpu"), + _SbatchOpt(None, "mincpus", "min_cpus_per_node"), + _SbatchOpt(None, "network", "network"), + _SbatchOpt(None, "nice", "nice"), + _SbatchOpt("k", "no-kill", "kill_on_node_fail", False), + _SbatchOpt(None, "no-requeue", "is_requeueable", False), + _SbatchOpt("F", "nodefile", None), + _SbatchOpt("w", "nodelist", "required_nodes"), + _SbatchOpt("N", "nodes", "nodes"), + _SbatchOpt("n", "ntasks", "ntasks"), + _SbatchOpt(None, "ntasks-per-core", "ntasks_per_core"), + _SbatchOpt(None, "ntasks-per-gpu", "ntasks_per_gpu"), + _SbatchOpt(None, "ntasks-per-node", "ntasks_per_node"), + _SbatchOpt(None, "ntasks-per-socket", "ntasks_per_socket"), + _SbatchOpt(None, "open-mode", "log_files_open_mode"), + _SbatchOpt("o", "output", "standard_output"), + _SbatchOpt("O", "overcommit", "overcommit", True), + _SbatchOpt("s", "oversubscribe", "resource_sharing", "yes"), + _SbatchOpt("p", "partition", "partition"), + _SbatchOpt(None, "power", "power_options"), + _SbatchOpt(None, "prefer", None), + _SbatchOpt(None, "priority", "priority"), + _SbatchOpt(None, "profile", "profile_types"), + _SbatchOpt(None, "propagate", None), + _SbatchOpt("q", "qos", "qos"), + _SbatchOpt(None, "reboot", "requires_node_reboot", True), + _SbatchOpt(None, "requeue", "is_requeueable", True), + _SbatchOpt(None, "reservation", "reservations"), + _SbatchOpt(None, "signal", "signal"), + _SbatchOpt(None, "sockets-per-node", "sockets_per_node"), + _SbatchOpt(None, "spread-job", "spreads_over_nodes", True), + _SbatchOpt(None, "switches", "switches"), + _SbatchOpt(None, "thread-spec", "threads_reserved_for_system"), + _SbatchOpt(None, "threads-per-core", "threads_per_core"), + _SbatchOpt("t", "time", "time_limit"), + _SbatchOpt(None, "time-min", "time_limit_min"), + _SbatchOpt(None, "tmp", "temporary_disk_per_node"), + _SbatchOpt(None, "uid", "user_id"), + _SbatchOpt(None, "use-min-nodes", "use_min_nodes", True), + _SbatchOpt(None, "wait-all-nodes", "wait_all_nodes", True), + _SbatchOpt(None, "wckey", "wckey"), +] + + +def _parse_line(line): + # Remove the #SBATCH from the start + opts = line[len("#SBATCH"):] + + # Ignore possible comments after the options + opts = opts.split("#")[0].strip() + + # Now the line can be in these forms for example: + # * -t20 or -t 20 + # * --time=20 or --time 20 or --time20 + if "=" in opts: + # -t=21 or --time=20 + opts = "=".join(opts.replace("=", " ").split()) + opt, val = opts.split("=") + elif " " in opts: + # --time 20 or -t 20 + opts = "=".join(opts.split()) + opt, val = opts.split("=") + elif any(el.isdigit() for el in opts): + # -t20 or --time20 + opt, val = list(filter(None, re.split(r'(\d+)', opts))) + else: + # Probably a boolean flag, like --exclusive or -O + opt, val = opts, None + + # Remove "-" or "--" at the front. + opt = opt[1:] + if opt[0] == "-": + # Found second dash. + opt = opt[1:] + + return opt, val + + +def _find_opt(opt): + for sbopt in SBATCH_OPTIONS: + # Check if we can find the option in our predefined mapping. + if opt == sbopt.short_opt or opt == sbopt.long_opt: + return sbopt + + return None + + +def _parse_opts_from_batch_script(desc, script, overwrite): + flags_and_vals = {} + + if not Path(script).is_file(): + raise ValueError("The script path you provided is not valid.") + + script = Path(script).read_text() + for line in script.splitlines(): + line = line.lstrip() + + if line.startswith(SBATCH_MAGIC): + flag, val = _parse_line(line) + opt = _find_opt(flag) + + if not opt or opt.our_attr_name is None: + # Not supported + continue + + if getattr(desc, opt.our_attr_name) is None or overwrite: + val = opt.attr_param if val is None else val + setattr(desc, opt.our_attr_name, val) diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd new file mode 100644 index 00000000..489e9d64 --- /dev/null +++ b/pyslurm/core/job/step.pxd @@ -0,0 +1,140 @@ +######################################################################### +# job/step.pxd - interface to retrieve slurm job step informations +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from .job cimport Job +from libc.string cimport memcpy, memset +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + job_step_info_t, + slurm_get_job_steps, + job_step_info_response_msg_t, + step_update_request_msg_t, + slurm_free_job_step_info_response_msg, + slurm_init_update_step_msg, + slurm_free_update_step_msg, + slurm_free_job_step_info_response_msg, + slurm_free_job_step_info_members, + slurm_update_step, + slurm_signal_job_step, + slurm_kill_job_step, + slurm_job_state_string, + xfree, + try_xmalloc, +) +from pyslurm.utils cimport cstr, ctime +from pyslurm.utils.uint cimport * +from pyslurm.utils.ctime cimport time_t +from pyslurm.core.job.task_dist cimport TaskDistribution + + +cdef class JobSteps(dict): + """A [dict][] of [pyslurm.JobStep][] objects for a given Job. + + Raises: + RPCError: When getting the Job steps from the slurmctld failed. + """ + + cdef: + job_step_info_response_msg_t *info + job_step_info_t tmp_info + _job_id + + @staticmethod + cdef JobSteps _load_single(Job job) + cdef dict _load_data(self, uint32_t job_id, int flags) + + +cdef class JobStep: + """A Slurm Jobstep + + Args: + job_id (Union[Job, int], optional=0): + The Job this Step belongs to. + step_id (Union[int, str], optional=0): + Step-ID for this JobStep object. + + Other Parameters: + time_limit (int): + Time limit in Minutes for this step. + + Attributes: + id (Union[str, int]): + The id for this step. + job_id (int): + The id for the Job this step belongs to. + name (str): + Name of the step. + user_id (int): + User ID who owns this step. + user_name (str): + Name of the User who owns this step. + time_limit (int): + Time limit in Minutes for this step. + network (str): + Network specification for the step. + cpu_frequency_min (Union[str, int]): + Minimum CPU-Frequency requested. + cpu_frequency_max (Union[str, int]): + Maximum CPU-Frequency requested. + cpu_frequency_governor (Union[str, int]): + CPU-Frequency Governor requested. + reserved_ports (str): + Reserved ports for the step. + cluster (str): + Name of the cluster this step runs on. + srun_host (str): + Name of the host srun was executed on. + srun_process_id (int): + Process ID of the srun command. + container (str): + Path to the container OCI. + allocated_nodes (str): + Nodes the Job is using. + start_time (int): + Time this step started, as unix timestamp. + run_time (int): + Seconds this step has been running for. + partition (str): + Name of the partition this step runs in. + state (str): + State the step is in. + allocated_cpus (int): + Number of CPUs this step uses in total. + ntasks (int): + Number of tasks this step uses. + distribution (dict): + Task distribution specification for the step. + command (str): + Command that was specified with srun. + slurm_protocol_version (int): + Slurm protocol version in use. + """ + + cdef: + job_step_info_t *ptr + step_update_request_msg_t *umsg + + @staticmethod + cdef JobStep from_ptr(job_step_info_t *in_ptr) diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx new file mode 100644 index 00000000..18c7a6f5 --- /dev/null +++ b/pyslurm/core/job/step.pyx @@ -0,0 +1,453 @@ +######################################################################### +# job/step.pyx - interface to retrieve slurm job step informations +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from typing import Union +from pyslurm.utils import cstr, ctime +from pyslurm.utils.uint import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections +from pyslurm.utils.helpers import ( + signal_to_num, + instance_to_dict, + uid_to_name, + humanize_step_id, + dehumanize_step_id, +) +from pyslurm.core.job.util import cpu_freq_int_to_str +from pyslurm.utils.ctime import ( + secs_to_timestr, + mins_to_timestr, + timestr_to_mins, + timestamp_to_date, + _raw_time, +) + + +cdef class JobSteps(dict): + + def __dealloc__(self): + slurm_free_job_step_info_response_msg(self.info) + + def __cinit__(self): + self.info = NULL + + def __init__(self, steps=None): + if isinstance(steps, dict): + self.update(steps) + elif steps is not None: + raise TypeError("Invalid Type: {type(steps)}") + + def __repr__(self): + data = super().__repr__() + return f'pyslurm.{self.__class__.__name__}({data})' + + @staticmethod + def load(job): + """Load the Job Steps from the system. + + Args: + job (Union[Job, int]): + The Job for which the Steps should be loaded. + + Returns: + (pyslurm.JobSteps): JobSteps of the Job + + Examples: + >>> import pyslurm + >>> steps = pyslurm.JobSteps.load(1) + >>> print(steps) + pyslurm.JobSteps({'batch': pyslurm.JobStep('batch')}) + >>> print(steps[1]) + pyslurm.JobStep('batch') + """ + cdef: + Job _job + JobSteps steps + + _job = Job.load(job.id if isinstance(job, Job) else job) + steps = JobSteps._load_single(_job) + steps._job_id = _job.id + return steps + + @staticmethod + cdef JobSteps _load_single(Job job): + cdef JobSteps steps = JobSteps() + + data = steps._load_data(job.id, slurm.SHOW_ALL) + if not data and not slurm.IS_JOB_PENDING(job.ptr): + msg = f"Failed to load step info for Job {job.id}." + raise RPCError(msg=msg) + + steps.update(data[job.id]) + return steps + + cdef dict _load_data(self, uint32_t job_id, int flags): + cdef: + JobStep step + uint32_t cnt = 0 + dict steps = {} + + rc = slurm_get_job_steps(0, job_id, slurm.NO_VAL, &self.info, + flags) + verify_rpc(rc) + + # zero-out a dummy job_step_info_t + memset(&self.tmp_info, 0, sizeof(job_step_info_t)) + + # Put each job-step pointer into its own "JobStep" instance. + for cnt in range(self.info.job_step_count): + step = JobStep.from_ptr(&self.info.job_steps[cnt]) + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out job_step_info_t. + self.info.job_steps[cnt] = self.tmp_info + + job_id = step.job_id + if not job_id in steps: + steps[job_id] = JobSteps() + steps[job_id][step.id] = step + + # We have extracted all pointers + self.info.job_step_count = 0 + return steps + + @staticmethod + def load_all(): + """Loads all the steps in the system. + + Returns: + (dict): A dict where every JobID (key) is mapped with an instance + of its JobSteps (value). + """ + cdef JobSteps steps = JobSteps() + return steps._load_data(slurm.NO_VAL, slurm.SHOW_ALL) + + def to_dict(self): + return xcollections.dict_recursive(self) + + +cdef class JobStep: + + def __cinit__(self): + self.ptr = NULL + self.umsg = NULL + + def __init__(self, job_id=0, step_id=0, **kwargs): + self._alloc_impl() + self.job_id = job_id.id if isinstance(job_id, Job) else job_id + self.id = step_id + cstr.fmalloc(&self.ptr.cluster, LOCAL_CLUSTER) + + # Initialize attributes, if any were provided + for k, v in kwargs.items(): + setattr(self, k, v) + + def _alloc_info(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(job_step_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for job_step_info_t") + + def _alloc_umsg(self): + if not self.umsg: + self.umsg = try_xmalloc( + sizeof(step_update_request_msg_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for " + "step_update_request_msg_t") + slurm_init_update_step_msg(self.umsg) + + def _alloc_impl(self): + self._alloc_info() + self._alloc_umsg() + + def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): + slurm_free_job_step_info_members(self.ptr) + xfree(self.ptr) + slurm_free_update_step_msg(self.umsg) + self.umsg = NULL + + def __setattr__(self, name, val): + # When a user wants to set attributes on a instance that was created + # by calling JobSteps.load(), the "umsg" pointer is not yet allocated. + # We only allocate memory for it by the time the user actually wants + # to modify something. + self._alloc_umsg() + # Call descriptors __set__ directly + JobStep.__dict__[name].__set__(self, val) + + def __repr__(self): + return f'pyslurm.{self.__class__.__name__}({self.id})' + + @staticmethod + def load(job_id, step_id): + """Load information for a specific job step. + + Implements the slurm_get_job_steps RPC. + + Args: + job_id (Union[pyslurm.Job, int]): + ID of the Job the Step belongs to. + step_id (Union[int, str]): + Step-ID for the Step to be loaded. + + Returns: + (pyslurm.JobStep): Returns a new JobStep instance + + Raises: + RPCError: When retrieving Step information from the slurmctld was + not successful. + + Examples: + >>> import pyslurm + >>> jobstep = pyslurm.JobStep.load(9999, 1) + """ + cdef: + job_step_info_response_msg_t *info = NULL + JobStep wrap = None + + job_id = job_id.id if isinstance(job_id, Job) else job_id + rc = slurm_get_job_steps(0, job_id, dehumanize_step_id(step_id), + &info, slurm.SHOW_ALL) + verify_rpc(rc) + + if info and info.job_step_count == 1: + wrap = JobStep.from_ptr(&info.job_steps[0]) + info.job_step_count = 0 + slurm_free_job_step_info_response_msg(info) + else: + slurm_free_job_step_info_response_msg(info) + msg = f"Step {step_id} of Job {job_id} not found." + raise RPCError(msg=msg) + + return wrap + + @staticmethod + cdef JobStep from_ptr(job_step_info_t *in_ptr): + cdef JobStep wrap = JobStep.__new__(JobStep) + wrap._alloc_info() + memcpy(wrap.ptr, in_ptr, sizeof(job_step_info_t)) + return wrap + + def send_signal(self, signal): + """Send a signal to a running Job step. + + Implements the slurm_signal_job_step RPC. + + Args: + signal (Union[str, int]): + Any valid signal which will be sent to the Job. Can be either + a str like `SIGUSR1`, or simply an [int][]. + + Raises: + RPCError: When sending the signal was not successful. + + Examples: + Specifying the signal as a string: + + >>> import pyslurm + >>> pyslurm.JobStep(9999, 1).send_signal("SIGUSR1") + + or passing in a numeric signal: + + >>> pyslurm.JobStep(9999, 1).send_signal(9) + """ + step_id = self.ptr.step_id.step_id + sig = signal_to_num(signal) + verify_rpc(slurm_signal_job_step(self.job_id, step_id, sig)) + + def cancel(self): + """Cancel a Job step. + + Implements the slurm_kill_job_step RPC. + + Raises: + RPCError: When cancelling the Job was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.JobStep(9999, 1).cancel() + """ + step_id = self.ptr.step_id.step_id + verify_rpc(slurm_kill_job_step(self.job_id, step_id, 9)) + + def modify(self, JobStep changes): + """Modify a job step. + + Implements the slurm_update_step RPC. + + Args: + changes (pyslurm.JobStep): + Another JobStep object that contains all the changes to apply. + Check the `Other Parameters` of the JobStep class to see which + properties can be modified. + + Raises: + RPCError: When updating the JobStep was not successful. + + Examples: + >>> import pyslurm + >>> + >>> # Setting the new time-limit to 20 days + >>> changes = pyslurm.JobStep(time_limit="20-00:00:00") + >>> pyslurm.JobStep(9999, 1).modify(changes) + """ + cdef JobStep js = changes + js._alloc_umsg() + js.umsg.step_id = self.ptr.step_id.step_id + js.umsg.job_id = self.ptr.step_id.job_id + verify_rpc(slurm_update_step(js.umsg)) + + def as_dict(self): + return self.to_dict() + + def to_dict(self): + """JobStep information formatted as a dictionary. + + Returns: + (dict): JobStep information as dict + """ + cdef dict out = instance_to_dict(self) + + dist = self.distribution + if dist: + out["distribution"] = dist.to_dict() + + return out + + @property + def id(self): + return humanize_step_id(self.ptr.step_id.step_id) + + @id.setter + def id(self, val): + self.ptr.step_id.step_id = dehumanize_step_id(val) + + @property + def job_id(self): + return self.ptr.step_id.job_id + + @job_id.setter + def job_id(self, val): + self.ptr.step_id.job_id = int(val) + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def user_id(self): + return u32_parse(self.ptr.user_id, zero_is_noval=False) + + @property + def user_name(self): + return uid_to_name(self.ptr.user_id) + + @property + def time_limit(self): + return _raw_time(self.ptr.time_limit) + + @time_limit.setter + def time_limit(self, val): + self.umsg.time_limit=self.ptr.time_limit = timestr_to_mins(val) + + @property + def network(self): + return cstr.to_unicode(self.ptr.network) + + @property + def cpu_frequency_min(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_min) + + @property + def cpu_frequency_max(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_max) + + @property + def cpu_frequency_governor(self): + return cpu_freq_int_to_str(self.ptr.cpu_freq_gov) + + @property + def reserved_ports(self): + return cstr.to_unicode(self.ptr.resv_ports) + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster) + + @property + def srun_host(self): + return cstr.to_unicode(self.ptr.srun_host) + + @property + def srun_process_id(self): + return u32_parse(self.ptr.srun_pid) + + @property + def container(self): + return cstr.to_unicode(self.ptr.container) + + @property + def allocated_nodes(self): + return cstr.to_list(self.ptr.nodes) + + @property + def start_time(self): + return _raw_time(self.ptr.start_time) + + @property + def run_time(self): + return _raw_time(self.ptr.run_time) + + @property + def partition(self): + return cstr.to_unicode(self.ptr.partition) + + @property + def state(self): + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def alloc_cpus(self): + return u32_parse(self.ptr.num_cpus) + + @property + def ntasks(self): + return u32_parse(self.ptr.num_tasks) + + @property + def distribution(self): + return TaskDistribution.from_int(self.ptr.task_dist) + + @property + def command(self): + return cstr.to_unicode(self.ptr.submit_line) + + @property + def slurm_protocol_version(self): + return u32_parse(self.ptr.start_protocol_ver) diff --git a/pyslurm/core/job/submission.pxd b/pyslurm/core/job/submission.pxd new file mode 100644 index 00000000..1005a24e --- /dev/null +++ b/pyslurm/core/job/submission.pxd @@ -0,0 +1,636 @@ +######################################################################### +# submission.pxd - interface for submitting slurm jobs +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + job_desc_msg_t, + slurm_init_job_desc_msg, + slurm_free_job_desc_msg, + submit_response_msg_t, + slurm_submit_batch_job, + slurm_free_submit_response_response_msg, + slurm_env_array_free, + slurm_env_array_create, + slurm_env_array_merge, + slurm_env_array_overwrite, + slurm_job_share_string, + xfree, + try_xmalloc, +) +from pyslurm.utils cimport cstr, ctime +from pyslurm.utils.uint cimport * +from pyslurm.utils.ctime cimport time_t +from pyslurm.core.job.task_dist cimport TaskDistribution + + +cdef class JobSubmitDescription: + """Submit Description for a Slurm Job. + + Args: + **kwargs (Any, optional=None): + Any valid Attribute this object has + + Attributes: + name (str): + Name of the Job, same as -J/--job-name from sbatch. + account (str): + Account of the job, same as -A/--account from sbatch. + user_id (Union[str, int]): + Run the job as a different User, same as --uid from sbatch. + This requires root privileges. + You can both specify the name or numeric uid of the User. + group_id (Union[str, int]): + Run the job as a different Group, same as --gid from sbatch. + This requires root privileges. + You can both specify the name or numeric gid of the User. + priority (int): + Specific priority the Job will receive. + Same as --priority from sbatch. + You can achieve the behaviour of sbatch's --hold option by + specifying a priority of 0. + site_factor (int): + Site Factor of the Job. Only used when updating an existing Job. + wckey (str): + WCKey to use with the Job, same as --wckey from sbatch. + array (str): + Job Array specification, same as -a/--array from sbatch. + batch_constraints (str): + Batch Features of a Job, same as --batch from sbatch. + begin_time (str): + Defer allocation until the specified time, same as --begin from + sbatch. + clusters (Union[list, str]): + Clusters the job may run on, same as -M/--clusters from sbatch. + cluster_constraints (str): + Comma-separated str with cluster constraints for the job. + This is the same as --cluster-constraint from sbatch. + comment (str): + Arbitrary job comment, same as --comment from sbatch. + admin_comment (str): + Arbitrary job admin comment. + Only used when updating an existing job. + requires_contiguous_nodes (bool): + Whether allocated Nodes are required to form a contiguous set. + Same as --contiguous from sbatch. + cores_reserved_for_system (int): + Count of cores reserved for system not usable by the Job. + Same as -S/--core-spec from sbatch. + Mutually exclusive with `threads_reserved_for_system`. + threads_reserved_for_system (int): + Count of threads reserved for system not usable by the Job. + Same as --thread-spec from sbatch. + Mutually exclusive with `cores_reserved_for_system`. + working_directory (str): + Work directory for the Job. Default is current work-dir from where + the job was submitted. + Same as -D/--chdir from sbatch. + cpu_frequency (Union[dict, str]): + CPU Frequency for the Job, same as --cpu-freq from sbatch. + + For example, specifying it as a dict: + + cpu_frequency = { + "min": "Low", + "max": "High", + "governor": "UserSpace" + } + + or like in sbatch with a string. For more info on that, check + out the sbatch documentation for --cpu-freq. + + If you only want to set a Governor without any min or max, you + can simply specify it as a standalone string: + + cpu_frequency = "Performance" + or + cpu_frequency = {"governor": "Performance"} + + If you want to set a specific, fixed frequency, you can do: + + cpu_frequency = + or either + cpu_frequency = {"max": } or cpu_freq = {"min": } + nodes (Union[dict, str, int]): + Amount of nodes needed for the job. + This is the same as -N/--nodes from sbatch. + + For example, providing min/max nodes as a dict: + + nodes = { + "min": 3, + "max": 6 + } + + When no range is needed, you can also simply specify it as int: + + nodes = 3 + + Other than that, a range can also be specified in a str like with + sbatch: + + nodes = "1-5" + deadline (str): + Deadline specification for the Job, same as --deadline from + sbatch. + delay_boot_time (Union[str, int]): + Delay boot specification for the Job, same as --delay-boot from + sbatch. + dependencies (Union[dict, str]): + Dependencies for the Job, same as -d/--dependency from sbatch. + excluded_nodes (Union[list, str]): + Exclude specific nodes for this Job. + This is the same as -x/--exclude from sbatch. + required_nodes (Union[list, str]): + Specific list of nodes required for the Job. + This is the same as -w/--nodelist from sbatch. + constraints (str): + Required node features for the Job. + This is the same as -C/--constraint from sbatch. + kill_on_node_fail (bool): + Should the job get killed if one of the Nodes fails? + This is the same as -k/--no-kill from sbatch. + licenses (Union[list, str]): + A list of licenses for the Job. + This is the same as -L/--licenses from sbatch. + mail_user (Union[list, str]): + List of email addresses for notifications. + This is the same as --mail-user from sbatch. + mail_types (Union[list, str]): + List of mail flags. + This is the same as --mail-type from sbatch. + mcs_label (str): + An MCS Label for the Job. + This is the same as --mcs-label from sbatch. + memory_per_cpu (Union[str, int]): + Memory required per allocated CPU. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem-per-cpu from sbatch. This is mutually + exclusive with memory_per_node and memory_per_gpu. + + + Examples: + + # 1 MiB + memory_per_cpu = 1024 + + # 3 GiB + memory_per_cpu = "3G" + memory_per_node (Union[str, int]): + Memory required per whole node. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem from sbatch. This is mutually exclusive + with memory_per_cpu and memory_per_gpu. + + + Examples: + + # 1 MiB + memory_per_node = 1024 + + # 3 GiB + memory_per_node = "3G" + memory_per_gpu (Union[str, int]): + Memory required per GPU. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem-per-gpu from sbatch. This is mutually + exclusive with memory_per_node and memory_per_cpu. + + + Examples: + + # 1 MiB + memory_per_gpu = 1024 + + # 3 GiB + memory_per_gpu = "3G" + network (str): + Network types for the Job. + This is the same as --network from sbatch. + nice (int): + Adjusted scheduling priority for the Job. + This is the same as --nice from sbatch. + log_files_open_mode (str): + Mode in which standard_output and standard_error log files should be opened. + This is the same as --open-mode from sbatch. + + + Valid options are: + + * `append` + * `truncate` + overcommit (bool): + If the resources should be overcommitted. + This is the same as -O/--overcommit from sbatch. + partitions (Union[list, str]): + A list of partitions the Job may use. + This is the same as -p/--partition from sbatch. + power_options (list): + A list of power management plugin options for the Job. + This is the same as --power from sbatch. + accounting_gather_frequency (Union[dict, str]): + Interval for accounting info to be gathered. + This is the same as --acctg-freq from sbatch. + + + For example, specifying it as a dict: + + accounting_gather_frequency = { + "energy"=60, + "network"=20, + } + + or as a single string: + + accounting_gather_frequency = "energy=60,network=20" + qos (str): + Quality of Service for the Job. + This is the same as -q/--qos from sbatch. + requires_node_reboot (bool): + Force the allocated nodes to reboot before the job starts. + This is the same --reboot from sbatch. + is_requeueable (bool): + If the Job is eligible for requeuing. + This is the same as --requeue from sbatch. + reservations (Union[list, str]): + A list of possible reservations the Job can use. + This is the same as --reservation from sbatch. + script (str): + Absolute Path or content of the batch script. + + You can specify either a path to a script which will be loaded, or + you can pass the script as a string. + If the script is passed as a string, providing arguments to it + (see `script_args`) is not supported. + script_args (str): + Arguments passed to the batch script. + You can only set arguments if a file path was specified for + `script`. + environment (Union[dict, str]): + Environment variables to be set for the Job. + This is the same as --export from sbatch. + resource_sharing (str): + Controls the resource sharing with other Jobs. + This property combines functionality of --oversubscribe and + --exclusive from sbatch. + + + Allowed values are are: + + * `oversubscribe` or `yes`: + + The Job allows resources to be shared with other running Jobs. + + * `user`: + + Only sharing resources with other Jobs that have the "user" + option set is allowed + + * `mcs`: + + Only sharing resources with other Jobs that have the "mcs" + option set is allowed. + + * `no` or `exclusive`: + + No sharing of resources is allowed. (--exclusive from sbatch) + distribution (str): + Task distribution for the Job, same as --distribution from sbatch + time_limit (str): + The time limit for the job. + This is the same as -t/--time from sbatch. + time_limit_min (str): + A minimum time limit for the Job. + This is the same as --time-min from sbatch. + container (str): + Path to an OCI container bundle. + This is the same as --container from sbatch. + cpus_per_task (int): + The amount of cpus required for each task. + + This is the same as -c/--cpus-per-task from sbatch. + This is mutually exclusive with `cpus_per_gpu`. + cpus_per_gpu (int): + The amount of cpus required for each allocated GPU. + + This is the same as --cpus-per-gpu from sbatch. + This is mutually exclusive with `cpus_per_task`. + sockets_per_node (int): + Restrict Job to nodes with atleast this many sockets. + This is the same as --sockets-per-node from sbatch. + cores_per_socket (int): + Restrict Job to nodes with atleast this many cores per socket + This is the same as --cores-per-socket from sbatch. + threads_per_core (int): + Restrict Job to nodes with atleast this many threads per socket + This is the same as --threads-per-core from sbatch. + gpus (Union[dict, str, int]): + GPUs for the Job to be allocated in total. + This is the same as -G/--gpus from sbatch. + Specifying the type of the GPU is optional. + + + For example, specifying the GPU counts as a dict: + + gpus = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus = 6 + gpus_per_socket (Union[dict, str, int]): + GPUs for the Job to be allocated per socket. + + This is the same as --gpus-per-socket from sbatch. + + Specifying the type of the GPU is optional. Note that setting + `gpus_per_socket` requires to also specify sockets_per_node. + + + For example, specifying it as a dict: + + gpus_per_socket = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus_per_socket = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus_per_socket = 6 + gpus_per_task (Union[dict, str, int]): + GPUs for the Job to be allocated per task. + + This is the same as --gpus-per-task from sbatch. + + Specifying the type of the GPU is optional. Note that setting + `gpus_per_task` requires to also specify either one of `ntasks` or + `gpus`. + + For example, specifying it as a dict: + + gpus_per_task = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus_per_task = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus_per_task = 6 + gres_per_node (Union[dict, str]): + Generic resources to be allocated per node. + + This is the same as --gres from sbatch. You should also use this + option if you want to specify GPUs per node (--gpus-per-node). + Specifying the type (by seperating GRES name and type with a + semicolon) is optional. + + For example, specifying it as a dict: + + gres_per_node = { + "gpu:tesla": 1, + "gpu:volta": 5, + } + + Or, for example, in string format: + + gres_per_node = "gpu:tesla:1,gpu:volta:5" + + GPU Gres without a specific type: + + gres_per_node = "gpu:6" + gpu_binding (str): + Specify GPU binding for the Job. + This is the same as --gpu-bind from sbatch. + ntasks (int): + Maximum amount of tasks for the Job. + This is the same as -n/--ntasks from sbatch. + ntasks_per_node (int): + Amount of tasks to be invoked on each node. + This is the same as --ntasks-per-node from sbatch. + ntasks_per_socket (int): + Maximum amount of tasks to be invoked on each socket. + This is the same as --ntasks-per-socket from sbatch. + ntasks_per_core (int): + Maximum amount of tasks to be invoked on each core. + This is the same as --ntasks-per-core from sbatch. + ntasks_per_gpu (int): + Amount of tasks to be invoked per GPU. + This is the same as --ntasks-per-socket from sbatch. + switches (Union[dict, str, int]): + Maximum amount of leaf switches and wait time desired. + + This can also optionally include a maximum waiting time for these + switches. + This is the same as --switches from sbatch. + + + For example, specifying it as a dict: + + switches = { "count": 5, "max_wait_time": "00:10:00" } + + Or as a single string (sbatch-style): + + switches = "5@00:10:00" + signal (Union[dict, str]): + Warn signal to be sent to the Job. + + This is the same as --signal from sbatch. + The signal can both be specified with its name, e.g. "SIGKILL", or + as a number, e.g. 9 + + + For example, specifying it as a dict: + + signal = { + "signal": "SIGKILL", + "time": 120 + } + + The above will send a "SIGKILL" signal 120 seconds before the + Jobs' time limit is reached. + + Or, specifying it as a string (sbatch-style): + + signal = "SIGKILL@120" + standard_in (str): + Path to a File acting as standard_in for the batch-script. + This is the same as -i/--input from sbatch. + standard_error (str): + Path to a File acting as standard_error for the batch-script. + This is the same as -e/--error from sbatch. + standard_output (str): + Path to a File to write the Jobs standard_output. + This is the same as -o/--output from sbatch. + kill_on_invalid_dependency (bool): + Kill the job if it has an invalid dependency. + This is the same as --kill-on-invalid-dep from sbatch. + spreads_over_nodes (bool): + Spread the Job over as many nodes as possible. + This is the same as --spread-job from sbatch. + use_min_nodes (bool): + Prefer the minimum amount of nodes specified. + This is the same as --use-min-nodes from sbatch. + gres_binding (str): + Generic resource task binding options. + This is the --gres-flags option from sbatch. + + + Possible values are: + + * `enforce-binding` + * `disable-binding` + temporary_disk_per_node (Union[str, int]): + Amount of temporary disk space needed per node. + + This is the same as --tmp from sbatch. You can specify units like + K|M|G|T (multiples of 1024). + If no unit is specified, the value will be assumed as Mebibytes. + + + Examples: + + # 2048 MiB + tmp_disk_per_node = "2G" + + # 1024 MiB + tmp_disk_per_node = 1024 + get_user_environment (Union[str, bool, int]): + TODO + min_cpus_per_node (str): + Set the minimum amount of CPUs required per Node. + This is the same as --mincpus from sbatch. + wait_all_nodes (bool): + Controls when the execution of the command begins. + + A value of True means that the Job should begin execution only + after all nodes in the allocation are ready. Setting it to False, + the default, means that it is not waited for the nodes to be + ready. (i.e booted) + """ + cdef: + slurm.job_desc_msg_t *ptr + is_update + + cdef public: + name + account + user_id + group_id + priority + site_factor + wckey + array + batch_constraints + begin_time + clusters + cluster_constraints + comment + admin_comment + requires_contiguous_nodes + cores_reserved_for_system + threads_reserved_for_system + working_directory + cpu_frequency + nodes + deadline + delay_boot_time + dependencies + excluded_nodes + required_nodes + constraints + kill_on_node_fail + licenses + mail_user + mail_types + mcs_label + memory_per_cpu + memory_per_node + memory_per_gpu + network + nice + log_files_open_mode + overcommit + partitions + power_options + profile_types + accounting_gather_frequency + qos + requires_node_reboot + is_requeueable + reservations + script + script_args + environment + resource_sharing + distribution + time_limit + time_limit_min + container + cpus_per_task + cpus_per_gpu + sockets_per_node + cores_per_socket + threads_per_core + gpus + gpus_per_socket + gpus_per_task + gres_per_node + gpu_binding + ntasks + ntasks_per_node + ntasks_per_socket + ntasks_per_core + ntasks_per_gpu + switches + signal + standard_in + standard_output + standard_error + kill_on_invalid_dependency + spreads_over_nodes + use_min_nodes + gres_binding + temporary_disk_per_node + get_user_environment + min_cpus_per_node + wait_all_nodes diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx new file mode 100644 index 00000000..0c9e699c --- /dev/null +++ b/pyslurm/core/job/submission.pyx @@ -0,0 +1,720 @@ +######################################################################### +# submission.pyx - interface for submitting slurm jobs +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import getcwd +from os import environ as pyenviron +import re +from typing import Union, Any +import shlex +from pathlib import Path +from pyslurm.utils import cstr +from pyslurm.utils.uint import * +from pyslurm.core.job.util import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.core.job.sbatch_opts import _parse_opts_from_batch_script +from pyslurm.utils.ctime import ( + secs_to_timestr, + timestr_to_secs, + mins_to_timestr, + timestr_to_mins, + timestamp_to_date, + date_to_timestamp, +) +from pyslurm.utils.helpers import ( + humanize, + dehumanize, + signal_to_num, + user_to_uid, + group_to_gid, + uid_to_name, + gid_to_name, +) + + +cdef class JobSubmitDescription: + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + # Initialize explicitly provided attributes, if any. + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + slurm_free_job_desc_msg(self.ptr) + + def _alloc_and_init(self): + slurm_free_job_desc_msg(self.ptr) + + self.ptr = try_xmalloc(sizeof(job_desc_msg_t)) + if not self.ptr: + raise MemoryError("xmalloc for job_desc_msg_t failed.") + + slurm_init_job_desc_msg(self.ptr) + + def __repr__(self): + return f'pyslurm.{self.__class__.__name__}' + + def submit(self): + """Submit a batch job description. + + Returns: + (int): The ID of the submitted Job. + + Raises: + RPCError: When the job submission was not successful. + + Examples: + >>> import pyslurm + >>> desc = pyslurm.JobSubmitDescription( + ... name="test-job", + ... cpus_per_task=1, + ... time_limit="10-00:00:00", + ... script="/path/to/your/submit_script.sh") + >>> + >>> job_id = desc.submit() + >>> print(job_id) + 99 + """ + cdef submit_response_msg_t *resp = NULL + + self._create_job_submit_desc() + verify_rpc(slurm_submit_batch_job(self.ptr, &resp)) + + job_id = resp.job_id + slurm_free_submit_response_response_msg(resp) + + return job_id + + def load_environment(self, overwrite=False): + """Load values of attributes provided through the environment. + + !!! note + + Instead of `SBATCH_`, pyslurm uses `PYSLURM_JOBDESC_` as a prefix + to identify environment variables which should be used to set + attributes. + + Args: + overwrite (bool): + If set to `True`, the value from an option found in the + environment will override the current value of the attribute + in this instance. Default is `False` + + Examples: + Lets consider you want to set the name of the Job, its Account + name and that the Job cannot be requeued. + Therefore, you will need to have set these environment variables: + + ```bash + # Format is: PYSLURM_JOBDESC_{ATTRIBUTE_NAME} + export PYSLURM_JOBDESC_ACCOUNT="myaccount" + export PYSLURM_JOBDESC_NAME="myjobname" + export PYSLURM_JOBDESC_IS_REQUEUEABLE="False" + ``` + + As you can see above, boolean values should be the literal strings + "False" or "True". + In python, you can do this now: + + >>> import pyslurm + >>> desc = pyslurm.JobSubmitDescription(...other args...) + >>> desc.load_environment() + >>> print(desc.name, desc.account, desc.is_requeueable) + myjobname, myaccount, False + """ + self._parse_env(overwrite) + + def load_sbatch_options(self, overwrite=False): + """Load values from `#SBATCH` options in the batch script. + + Args: + overwrite (bool): + If set to `True`, the value from an option found in the in the + batch script will override the current value of the attribute + in this instance. Default is `False` + """ + if not self.script: + raise ValueError("You need to set the 'script' attribute first.") + _parse_opts_from_batch_script(self, self.script, overwrite) + + def _parse_env(self, overwrite=False): + for attr in dir(self): + if attr.startswith("_") or callable(attr): + # Ignore everything starting with "_" and all functions. + # Arguments directly specified upon object creation will + # always have precedence. + continue + + spec = attr.upper() + val = pyenviron.get(f"PYSLURM_JOBDESC_{spec)}") + if (val is not None + and (getattr(self, attr) is None or overwrite)): + + # Just convert literal true/false strings to bool. + tmp = val.casefold() + if tmp == "true": + val = True + elif tmp == "false": + val = False + + setattr(self, attr, val) + + def _create_job_submit_desc(self, is_update=False): + self.is_update = is_update + self._alloc_and_init() + cdef slurm.job_desc_msg_t *ptr = self.ptr + + if not self.is_update: + self._validate_options() + self._set_defaults() + + if self.nice: + ptr.nice = slurm.NICE_OFFSET + int(self.nice) + + if self.site_factor: + ptr.site_factor = slurm.NICE_OFFSET + int(self.site_factor) + + if self.user_id is not None: + ptr.user_id = user_to_uid(self.user_id) + if self.group_id is not None: + ptr.group_id = group_to_gid(self.group_id) + + cstr.fmalloc(&ptr.name, self.name) + cstr.fmalloc(&ptr.account, self.account) + cstr.fmalloc(&ptr.wckey, self.wckey) + cstr.fmalloc(&ptr.array_inx, self.array) + cstr.fmalloc(&ptr.batch_features, self.batch_constraints) + cstr.fmalloc(&ptr.cluster_features, self.cluster_constraints) + cstr.fmalloc(&ptr.comment, self.comment) + cstr.fmalloc(&ptr.work_dir, self.working_directory) + cstr.fmalloc(&ptr.features, self.constraints) + cstr.fmalloc(&ptr.mail_user, self.mail_user) + cstr.fmalloc(&ptr.mcs_label, self.mcs_label) + cstr.fmalloc(&ptr.network, self.network) + cstr.fmalloc(&ptr.qos, self.qos) + cstr.fmalloc(&ptr.container, self.container) + cstr.fmalloc(&ptr.std_in, self.standard_in) + cstr.fmalloc(&ptr.std_out, self.standard_output) + cstr.fmalloc(&ptr.std_err, self.standard_error) + cstr.fmalloc(&ptr.tres_per_job, cstr.from_gres_dict(self.gpus, "gpu")) + cstr.fmalloc(&ptr.tres_per_socket, + cstr.from_gres_dict(self.gpus_per_socket, "gpu")) + cstr.fmalloc(&ptr.tres_per_task, + cstr.from_gres_dict(self.gpus_per_task, "gpu")) + cstr.fmalloc(&ptr.tres_per_node, + cstr.from_gres_dict(self.gres_per_node)) + cstr.fmalloc(&ptr.cpus_per_tres, + cstr.from_gres_dict(self.cpus_per_gpu, "gpu")) + cstr.fmalloc(&ptr.admin_comment, self.admin_comment) + cstr.fmalloc(&self.ptr.dependency, + _parse_dependencies(self.dependencies)) + cstr.from_list(&ptr.clusters, self.clusters) + cstr.from_list(&ptr.exc_nodes, self.excluded_nodes) + cstr.from_list(&ptr.req_nodes, self.required_nodes) + cstr.from_list(&ptr.licenses, self.licenses) + cstr.from_list(&ptr.partition, self.partitions) + cstr.from_list(&ptr.reservation, self.reservations) + cstr.from_dict(&ptr.acctg_freq, self.accounting_gather_frequency) + ptr.deadline = date_to_timestamp(self.deadline) + ptr.begin_time = date_to_timestamp(self.begin_time) + ptr.delay_boot = timestr_to_secs(self.delay_boot_time) + ptr.time_limit = timestr_to_mins(self.time_limit) + ptr.time_min = timestr_to_mins(self.time_limit_min) + ptr.priority = u32(self.priority, zero_is_noval=False) + ptr.num_tasks = u32(self.ntasks) + ptr.pn_min_tmp_disk = u32(dehumanize(self.temporary_disk_per_node)) + ptr.cpus_per_task = u16(self.cpus_per_task) + ptr.sockets_per_node = u16(self.sockets_per_node) + ptr.cores_per_socket = u16(self.cores_per_socket) + ptr.ntasks_per_socket = u16(self.ntasks_per_socket) + ptr.ntasks_per_tres = u16(self.ntasks_per_gpu) + ptr.ntasks_per_node = u16(self.ntasks_per_node) + ptr.threads_per_core = u16(self.threads_per_core) + ptr.ntasks_per_core = u16(self.ntasks_per_core) + u64_set_bool_flag(&ptr.bitflags, self.spreads_over_nodes, + slurm.SPREAD_JOB) + u64_set_bool_flag(&ptr.bitflags, self.kill_on_invalid_dependency, + slurm.KILL_INV_DEP) + u64_set_bool_flag(&ptr.bitflags, self.use_min_nodes, + slurm.USE_MIN_NODES) + ptr.contiguous = u16_bool(self.requires_contiguous_nodes) + ptr.kill_on_node_fail = u16_bool(self.kill_on_node_fail) + ptr.overcommit = u8_bool(self.overcommit) + ptr.reboot = u16_bool(self.requires_node_reboot) + ptr.requeue = u16_bool(self.is_requeueable) + ptr.wait_all_nodes = u16_bool(self.wait_all_nodes) + ptr.mail_type = mail_type_list_to_int(self.mail_types) + ptr.power_flags = power_type_list_to_int(self.power_options) + ptr.profile = acctg_profile_list_to_int(self.profile_types) + ptr.shared = shared_type_str_to_int(self.resource_sharing) + + if not self.is_update: + self.ptr.min_nodes, self.ptr.max_nodes = _parse_nodes(self.nodes) + cstr.fmalloc(&self.ptr.script, + _validate_batch_script(self.script, self.script_args)) + self._set_script_args() + self._set_environment() + self._set_distribution() + + self._set_memory() + self._set_open_mode() + self._set_cpu_frequency() + self._set_gpu_binding() + self._set_gres_binding() + self._set_min_cpus() + + # TODO + # burst_buffer + # mem_bind, mem_bind_type? + # gpu_freq + # --hint + # spank_env + # --propagate for rlimits + + def _set_defaults(self): + if not self.ntasks: + self.ntasks = 1 + if not self.cpus_per_task: + self.cpus_per_task = 1 + if not self.working_directory: + self.working_directory = str(getcwd()) + if not self.environment: + # By default, sbatch also exports everything in the users env. + self.environment = "ALL" + + def _validate_options(self): + if not self.script: + raise ValueError("You need to provide a batch script.") + + if (self.memory_per_node and self.memory_per_cpu + or self.memory_per_gpu and self.memory_per_cpu + or self.memory_per_node and self.memory_per_gpu): + raise ValueError("Only one of memory_per_cpu, memory_per_node or " + "memory_per_gpu can be set.") + + if (self.ntasks_per_gpu and + (self.ptr.min_nodes != u32(None) or self.nodes + or self.gpus_per_task or self.gpus_per_socket + or self.ntasks_per_node)): + raise ValueError("ntasks_per_gpu is mutually exclusive with " + "nodes, gpus_per_task, gpus_per_socket and " + "ntasks_per_node.") + + if self.cpus_per_gpu and self.cpus_per_task: + raise ValueError("cpus_per_task and cpus_per_gpu " + "are mutually exclusive.") + + if (self.cores_reserved_for_system + and self.threads_reserved_for_system): + raise ValueError("cores_reserved_for_system is mutually " + " exclusive with threads_reserved_for_system.") + + def _set_core_spec(self): + if self.cores_reserved_for_system: + self.ptr.core_spec = u16(self.cores_reserved_for_system) + elif self.threads_reserved_for_system: + self.ptr.core_spec = u16(self.threads_reserved_for_system) + self.ptr.core_spec |= slurm.CORE_SPEC_THREAD + + def _set_cpu_frequency(self): + freq = self.cpu_frequency + if not freq: + return None + + # Alternatively support sbatch-like --cpu-freq setting. + if not isinstance(freq, dict): + freq = _parse_cpu_freq_str_to_dict(freq) + + freq_min, freq_max, freq_gov = _validate_cpu_freq(freq) + self.ptr.cpu_freq_min = freq_min + self.ptr.cpu_freq_max = freq_max + self.ptr.cpu_freq_gov = freq_gov + + def _set_memory(self): + if self.memory_per_cpu: + self.ptr.pn_min_memory = u64(dehumanize(self.memory_per_cpu)) + self.ptr.pn_min_memory |= slurm.MEM_PER_CPU + elif self.memory_per_node: + self.ptr.pn_min_memory = u64(dehumanize(self.memory_per_node)) + elif self.memory_per_gpu: + mem_gpu = u64(dehumanize(val)) + cstr.fmalloc(&self.ptr.mem_per_tres, f"gres:gpu:{mem_gpu}") + + def _set_open_mode(self): + val = self.log_files_open_mode + if val == "append": + self.ptr.open_mode = slurm.OPEN_MODE_APPEND + elif val == "truncate": + self.ptr.open_mode = slurm.OPEN_MODE_TRUNCATE + + def _set_script_args(self): + args = self.script_args + if not args: + return None + + if isinstance(args, str): + sargs = shlex.split(args) + else: + sargs = list(args) + + # Script should always first in argv. + if sargs[0] != self.script: + sargs.insert(0, self.script) + + self.ptr.argc = len(sargs) + self.ptr.argv = try_xmalloc(self.ptr.argc * sizeof(char*)) + if not self.ptr.argv: + raise MemoryError("xmalloc failed for script_args") + + for idx, opt in enumerate(sargs): + cstr.fmalloc(&self.ptr.argv[idx], opt) + + def _set_environment(self): + vals = self.environment + get_user_env = self.get_user_environment + + # Clear any previous environment set for the Job. + slurm_env_array_free(self.ptr.environment) + self.ptr.env_size = 0 + + # Allocate a new environment. + self.ptr.environment = slurm_env_array_create() + + if isinstance(vals, str) or vals is None: + if vals is None or vals.casefold() == "all": + # This is the default. Export all current environment + # variables into the Job. + slurm_env_array_merge(&self.ptr.environment, + slurm.environ) + elif vals.casefold() == "none": + # Only env variables starting with "SLURM_" will be exported. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + get_user_env = True + else: + # Assume Env-vars were provided sbatch style like a string. + # Setup all 'SLURM' env vars found first. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + # Merge the provided environment variables from the string in. + for idx, item in enumerate(vals.split(",")): + if idx == 0 and item.casefold() == "all": + slurm_env_array_merge(&self.ptr.environment, + slurm.environ) + continue + + if not "=" in item: + continue + + var, val = item.split("=", 1) + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + get_user_env = True + else: + # Here, the user provided an actual dictionary as Input. + # Setup all 'SLURM' env vars first. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + # Setup all User selected env vars. + for var, val in vals.items(): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + if get_user_env: + slurm_env_array_overwrite(&self.ptr.environment, + "SLURM_GET_USER_ENV", "1") + + # Calculate Environment size + while self.ptr.environment and self.ptr.environment[self.ptr.env_size]: + self.ptr.env_size+=1 + + def _set_distribution(self): + dist=plane = None + + if not self.distribution: + self.ptr.task_dist = slurm.SLURM_DIST_UNKNOWN + return None + + if isinstance(self.distribution, int): + # Assume the user meant to specify the plane size only. + plane = u16(self.distribution) + elif isinstance(self.distribution, str): + # Support sbatch style string input + dist = TaskDistribution.from_str(self.distribution) + plane = dist.plane if isinstance(dist.plane, int) else 0 + + if plane: + self.ptr.plane_size = plane + self.ptr.task_dist = slurm.SLURM_DIST_PLANE + elif dist is not None: + self.ptr.task_dist = dist.as_int() + + def _set_gpu_binding(self): + binding = self.gpu_binding + + if not binding: + if self.ptr.ntasks_per_tres != u16(None): + # Set gpu bind implicit to single:ntasks_per_gpu + binding = f"single:{self.ntasks_per_gpu}" + else: + binding = self.gpu_binding.replace("verbose,", "") \ + .replace("gpu:", "") + if "verbose" in self.gpu_binding: + binding = f"verbose,gpu:{binding}" + + cstr.fmalloc(&self.ptr.tres_bind, binding) + + def _set_min_cpus(self): + if self.min_cpus_per_node: + self.ptr.min_cpus = u16(self.min_cpus_per_node) + elif not self.is_update: + if self.overcommit: + self.ptr.min_cpus = max(self.ptr.min_nodes, 1) + + self.ptr.min_cpus = self.ptr.cpus_per_task * self.ptr.num_tasks + + def _set_switches(self): + vals = self.switches + if not vals: + return None + + if not isinstance(vals, dict): + vals = _parse_switches_str_to_dict(vals) + + self.ptr.req_switch = u32(kwargs.get("count")) + self.ptr.wait4switch = timestr_to_secs(kwargs.get("max_wait_time")) + + def _set_signal(self): + vals = self.signal + if not vals: + return None + + if not isinstance(vals, dict): + vals = _parse_signal_str_to_dict(vals) + + self.ptr.warn_signal = u16(signal_to_num(vals.get("signal"))) + self.ptr.warn_time = u16(vals.get("time"), on_noval=60) + u16_set_bool_flag(&self.ptr.warn_flags, + bool(vals.get("batch_only")), slurm.KILL_JOB_BATCH) + u16_set_bool_flag( + &self.ptr.warn_flags, + bool(vals.get("allow_reservation_overlap")), + slurm.KILL_JOB_RESV) + + def _set_gres_binding(self): + if not self.gres_binding: + return None + elif self.gres_binding.casefold() == "enforce-binding": + self.ptr.bitflags |= slurm.GRES_ENFORCE_BIND + elif self.gres_binding.casefold() == "disable-binding": + self.ptr.bitflags |= slurm.GRES_DISABLE_BIND + + +def _parse_dependencies(val): + final = None + + if isinstance(val, str): + # TODO: Even though everything is checked in the slurmctld, maybe + # still do some sanity checks here on the input when a string + # is provided. + final = val + elif val is not None: + satisfy = val.pop("satisfy", "all").casefold() + + if satisfy == "any": + delim = "?" + else: + delim = "," + + final = [] + for condition, vals in val.items(): + if condition == "singleton" and bool(vals): + final.append("singleton") + continue + + if not isinstance(vals, list): + vals = str(vals).split(",") + + vals = [str(s) for s in vals] + final.append(f"{condition}:{':'.join(vals)}") + + final = delim.join(final) + + return final + + +def _parse_nodes(vals): + nmin=nmax = 1 + + # Support input like --nodes from sbatch (min-[max]) + if isinstance(vals, dict): + nmin = u32(vals.get("min", 1), on_noval=1) + nmax = u32(vals.get("max", 1), on_noval=nmin) + elif vals is not None: + v = str(vals).split("-", 1) + nmin = int(v[0]) + if nmin == 0: + nmin = 1 + if "-" in str(vals): + nmax = int(v[1]) + else: + nmax = nmin + + if not nmax: + nmax = nmin + if nmax < nmin: + raise ValueError("Max Nodecount cannot be less than minimum" + " nodecount.") + + return nmin, nmax + + +def _parse_signal_str_to_dict(vals): + info = {} + # This supports input like the --signal option from sbatch + val_list = re.split("[:@]+", str(vals)) + + if len(val_list): + if ":" in str(vals): + flags = val_list.pop(0).casefold() + + if "r" in flags: + info["allow_reservation_overlap"] = True + + if "b" in flags: + info["batch_only"] = True + + if "@" in str(vals): + info["time"] = val_list[1] + + info["signal"] = val_list[0] + + return info + + +def _parse_switches_str_to_dict(switches_str): + out = {} + vals = str(switches_str.split("@")) + if len(vals) > 1: + out["max_wait_time"] = timestr_to_secs(vals[1]) + + out["count"] = u32(vals[0]) + + return out + + +def _parse_cpu_freq_str_to_dict(freq_str): + freq_splitted = re.split("[-:]+", str(freq_str)) + freq_len = len(freq_splitted) + freq = {} + + # Transform cpu-freq string to the individual components. + if freq_splitted[0].isdigit(): + freq["max"] = freq_splitted[0] + else: + if freq_len > 1: + raise ValueError( + "Invalid cpu_frequency format: {kwargs}." + "Governor must be provided as single element or " + "as last element in the form of min-max:governor. " + ) + freq["governor"] = freq_splitted[0] + + if freq_len >= 2: + freq["min"] = freq["max"] + freq["max"] = freq_splitted[1] + + if freq_len == 3: + freq["governor"] = freq_splitted[2] + + return freq + + +def _validate_cpu_freq(freq): + have_no_range = False + freq_min = cpu_freq_str_to_int(freq.get("min")) + freq_max = cpu_freq_str_to_int(freq.get("max")) + freq_gov = cpu_gov_str_to_int(freq.get("governor")) + + if freq_min != u32(None): + if freq_max == u32(None): + freq_max = freq_min + freq_min = u32(None) + have_no_range = True + elif freq_max < freq_min: + raise ValueError( + f"min cpu-freq ({freq_min}) must be smaller " + f"than max cpu-freq ({freq_max})" + ) + elif freq_max != u32(None) and freq_min == u32(None): + have_no_range = True + + if have_no_range and freq_gov != u32(None): + raise ValueError( + "Setting Governor when specifying only either one " + "of min or max is not allowed." + ) + + return freq_min, freq_max, freq_gov + + +def _validate_batch_script(script, args=None): + if Path(script).is_file(): + # First assume the caller is passing a path to a script and we try + # to load it. + script = Path(script).read_text() + else: + if args: + raise ValueError("Passing arguments to a script is only allowed " + "if it was loaded from a file.") + + # Validate the script + if not script or not len(script): + raise ValueError("Batch script is empty or none was provided.") + elif script.isspace(): + raise ValueError("Batch script contains only whitespace.") + elif not script.startswith("#!"): + msg = "Not a valid Batch script. " + msg += "First line must start with '#!'," + msg += "followed by the path to an interpreter" + raise ValueError(msg) + elif "\0" in script: + msg = "The Slurm Controller does not allow scripts that " + msg += "contain a NULL character: '\\0'." + raise ValueError(msg) + elif "\r\n" in script: + msg = "Batch script contains DOS line breaks (\\r\\n) " + msg += "instead of expected UNIX line breaks (\\n)." + raise ValueError(msg) + + return script diff --git a/pyslurm/core/job/task_dist.pxd b/pyslurm/core/job/task_dist.pxd new file mode 100644 index 00000000..4f8a073d --- /dev/null +++ b/pyslurm/core/job/task_dist.pxd @@ -0,0 +1,41 @@ +######################################################################### +# task_dist.pxd - job task distribution +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.utils.uint cimport u16 +from pyslurm.slurm cimport ( + task_dist_states_t, +) + + +cdef class TaskDistribution: + + cdef public: + str nodes + str sockets + str cores + plane + pack + + cdef task_dist_states_t state diff --git a/pyslurm/core/job/task_dist.pyx b/pyslurm/core/job/task_dist.pyx new file mode 100644 index 00000000..0c46cbc8 --- /dev/null +++ b/pyslurm/core/job/task_dist.pyx @@ -0,0 +1,352 @@ +######################################################################### +# task_dist.pyx - job task distribution +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +cdef class TaskDistribution: + + def __init__(self, nodes="block", sockets="cyclic", + cores=None, pack=None, plane_size=None): + self.nodes = nodes + self.sockets = sockets + self.cores = cores if cores else self.sockets + self.pack = pack + self.plane = plane_size + self.state = self._get_task_dist_state() + + def __eq__(self, other): + if not isinstance(other, TaskDistribution): + return NotImplemented + return self.as_int() == other.as_int() + + @staticmethod + def from_int(dist): + cdef TaskDistribution tdist = None + + if int(dist) <= 0 or dist == slurm.SLURM_DIST_UNKNOWN: + return None + + if (dist & slurm.SLURM_DIST_STATE_BASE) != slurm.SLURM_DIST_UNKNOWN: + tdist = _parse_task_dist_from_int(dist) + + dist_flag = dist & slurm.SLURM_DIST_STATE_FLAGS + tdist = _parse_task_dist_flags_from_int(tdist, dist_flag) + + if tdist: + tdist.state = dist + + return tdist + + def _to_str_no_flags(self): + if self.plane: + return "plane" + + dist_str = "" + nodes = self.nodes + if nodes is not None and nodes != "*": + dist_str = f"{nodes}" + else: + dist_str = "block" + + sockets = self.sockets + if sockets is not None and sockets != "*": + dist_str = f"{dist_str}:{sockets}" + else: + dist_str = f"{dist_str}:cyclic" + + cores = self.cores + if cores is not None and cores != "*": + dist_str = f"{dist_str}:{cores}" + else: + dist_str = f"{dist_str}:{sockets}" + + return dist_str + + def to_str(self): + dist_str = self._to_str_no_flags() + + if self.pack is not None: + dist_str = f"{dist_str},{'Pack' if self.pack else 'NoPack'}" + + return dist_str + + def to_dict(self): + return { + "nodes": self.nodes, + "sockets": self.sockets, + "cores": self.cores, + "plane": self.plane, + "pack": self.pack, + } + + def as_int(self): + return self.state + + def _get_task_dist_state(self): + cdef task_dist_states_t dist_state + + dist_str = self._to_str_no_flags() + if dist_str == "plane": + return slurm.SLURM_DIST_PLANE + + dist_state = _parse_str_to_task_dist_int(dist_str) + if dist_state == slurm.SLURM_DIST_UNKNOWN: + raise ValueError(f"Invalid distribution specification: {dist_str}") + + # Check for Pack/NoPack + # Don't do anything if it is None + if self.pack: + dist_state = (dist_state | slurm.SLURM_DIST_PACK_NODES) + elif self.pack is not None and not self.pack: + dist_state = (dist_state | slurm.SLURM_DIST_NO_PACK_NODES) + + return dist_state + + @staticmethod + def from_str(dist_str): + cdef TaskDistribution tdist = TaskDistribution.__new__(TaskDistribution) + + # Plane method - return early because nothing else can be + # specified when this is set. + if "plane" in dist_str: + if "plane=" in dist_str: + plane_size = u16(dist_str.split("=", 1)[1]) + return TaskDistribution(plane_size=plane_size) + else: + return TaskDistribution(plane_size=True) + + # [0] = distribution method for nodes:sockets:cores + # [1] = pack/nopack specification (true or false) + dist_items = dist_str.split(",", 1) + + # Parse the different methods + dist_methods = dist_items[0].split(":") + if len(dist_methods) and dist_methods[0] != "*": + tdist.nodes = dist_methods[0] + + if len(dist_methods) > 2 and dist_methods[1] != "*": + tdist.sockets = dist_methods[1] + + if len(dist_methods) >= 3: + if dist_methods[2] == "*": + tdist.cores = tdist.sockets + else: + tdist.cores = dist_methods[2] + + if len(dist_items) > 1: + if dist_items[1].casefold() == "pack": + tdist.pack = True + elif dist_items[1].casefold() == "nopack": + tdist.pack = False + + tdist.state = tdist._get_task_dist_state() + return tdist + + +# https://github.com/SchedMD/slurm/blob/510ba4f17dfa559b579aa054cb8a415dcc224abc/src/common/proc_args.c#L319 +def _parse_task_dist_from_int(dist): + cdef TaskDistribution out = TaskDistribution.__new__(TaskDistribution) + + state = dist & slurm.SLURM_DIST_STATE_BASE + if state == slurm.SLURM_DIST_BLOCK: + out.nodes = "block" + elif state == slurm.SLURM_DIST_CYCLIC: + out.nodes = "cyclic" + elif state == slurm.SLURM_DIST_PLANE: + out.plane = state + elif state == slurm.SLURM_DIST_ARBITRARY: + out.nodes = "arbitrary" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC: + out.nodes = "cyclic" + out.sockets = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK: + out.nodes = "cyclic" + out.sockets = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL: + out.nodes = "cyclic" + out.sockets = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC: + out.nodes = "block" + out.sockets = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK: + out.nodes = "block" + out.sockets = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL: + out.nodes = "block" + out.sockets = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CFULL: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC: + out.nodes = "block" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK: + out.nodes = "block" + out.sockets = "block" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CFULL: + out.nodes = "block" + out.sockets = "block" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_BLOCK: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CFULL: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "fcyclic" + else: + return None + + return out + + +def _parse_task_dist_flags_from_int(TaskDistribution dst, dist_flag): + if not dist_flag: + return dst + + cdef TaskDistribution _dst = dst + if not _dst: + _dst = TaskDistribution.__new__(TaskDistribution) + + if dist_flag == slurm.SLURM_DIST_PACK_NODES: + _dst.pack = True + elif dist_flag == slurm.SLURM_DIST_NO_PACK_NODES: + _dst.pack = False + + return _dst + + +def _parse_str_to_task_dist_int(dist_str): + # Select the correct distribution method according to dist_str. + if dist_str == "cyclic": + return slurm.SLURM_DIST_CYCLIC + elif dist_str == "block": + return slurm.SLURM_DIST_BLOCK + elif dist_str == "arbitrary" or dist_str == "hostfile": + return slurm.SLURM_DIST_ARBITRARY + elif dist_str == "cyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC + elif dist_str == "cyclic:block": + return slurm.SLURM_DIST_CYCLIC_BLOCK + elif dist_str == "block:block": + return slurm.SLURM_DIST_BLOCK_BLOCK + elif dist_str == "block:cyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC + elif dist_str == "block:fcyclic": + return slurm.SLURM_DIST_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL + elif dist_str == "cyclic:cyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC + elif dist_str == "cyclic:cyclic:block": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK + elif dist_str == "cyclic:cyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL + elif dist_str == "cyclic:block:cyclic": + return slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC + elif dist_str == "cyclic:block:block": + return slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK + elif dist_str == "cyclic:block:fcyclic": + return slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC + elif dist_str == "cyclic:fcyclic:block": + return slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK + elif dist_str == "cyclic:fcyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL_CFULL + elif dist_str == "block:cyclic:cyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC + elif dist_str == "block:cyclic:block": + return slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK + elif dist_str == "block:cyclic:fcyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL + elif dist_str == "block:block:cyclic": + return slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC + elif dist_str == "block:block:block": + return slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK + elif dist_str == "block:block:fcyclic": + return slurm.SLURM_DIST_BLOCK_BLOCK_CFULL + elif dist_str == "block:fcyclic:cyclic": + return slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC + elif dist_str == "block:fcyclic:block": + return slurm.SLURM_DIST_BLOCK_CFULL_BLOCK + elif dist_str == "block:fcyclic:fcyclic": + return slurm.SLURM_DIST_BLOCK_CFULL_CFULL + else: + return slurm.SLURM_DIST_UNKNOWN diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx new file mode 100644 index 00000000..aedd43d3 --- /dev/null +++ b/pyslurm/core/job/util.pyx @@ -0,0 +1,346 @@ +######################################################################### +# util.pyx - utility functions used to parse various job flags +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm cimport slurm +from pyslurm.utils.uint import * +from pyslurm.utils.uint cimport * + +# Note: Maybe consider using libslurmfull again to avoid having to reimplement +# some of these functions and keeping track for changes in new releases. + +def mail_type_list_to_int(types): + """Convert a str or list of mail types to a uint16_t.""" + cdef uint16_t flags = 0 + + if not types or "None" == types: + return slurm.NO_VAL16 + + if isinstance(types, str): + types = types.split(",") + + for typ in types: + typ = typ.casefold() + + if "array_tasks" == typ: + flags |= slurm.MAIL_ARRAY_TASKS + elif "begin" == typ: + flags |= slurm.MAIL_JOB_BEGIN + elif "end" == typ: + flags |= slurm.MAIL_JOB_END + elif "fail" == typ: + flags |= slurm.MAIL_JOB_FAIL + # elif "invalid_depend" == typ: + # flags |= slurm.MAIL_INVALID_DEPEND + elif "requeue" == typ: + flags |= slurm.MAIL_JOB_REQUEUE + elif "stage_out" == typ: + flags |= slurm.MAIL_JOB_STAGE_OUT + elif "time_limit" == typ: + flags |= slurm.MAIL_JOB_TIME100 + elif "time_limit_90" == typ: + flags |= slurm.MAIL_JOB_TIME90 + elif "time_limit_80" == typ: + flags |= slurm.MAIL_JOB_TIME80 + elif "time_limit_50" == typ: + flags |= slurm.MAIL_JOB_TIME50 + elif "all" == typ: + flags |= (slurm.MAIL_JOB_BEGIN + | slurm.MAIL_JOB_END + | slurm.MAIL_JOB_FAIL + | slurm.MAIL_JOB_REQUEUE + | slurm.MAIL_JOB_STAGE_OUT) + else: + raise ValueError("Invalid Mail type: {typ}.") + + return flags + + +def mail_type_int_to_list(uint16_t typ): + """Convert uint16_t to a list of mail types.""" + types = [] + + if typ == 0: + return types + + if typ & slurm.MAIL_ARRAY_TASKS: + types.append("ARRAY_TASKS") + +# if typ & slurm.MAIL_INVALID_DEPEND: +# types.append("invalid_depend") + + if typ & slurm.MAIL_JOB_BEGIN: + types.append("BEGIN") + + if typ & slurm.MAIL_JOB_END: + types.append("END") + + if typ & slurm.MAIL_JOB_FAIL: + types.append("FAIL") + + if typ & slurm.MAIL_JOB_REQUEUE: + types.append("REQUEUE") + + if typ & slurm.MAIL_JOB_STAGE_OUT: + types.append("STAGE_OUT") + + if typ & slurm.MAIL_JOB_TIME50: + types.append("TIME_LIMIT_50") + + if typ & slurm.MAIL_JOB_TIME80: + types.append("TIME_LIMIT_80") + + if typ & slurm.MAIL_JOB_TIME90: + types.append("TIME_LIMIT_90") + + if typ & slurm.MAIL_JOB_TIME100: + types.append("TIME_LIMIT_100") + + return types + + +def acctg_profile_list_to_int(types): + """Convert a str or list of accounting gather profiles to uin32_t.""" + cdef uint32_t profile = 0 + + if not types: + return slurm.NO_VAL + + if isinstance(types, str): + types = types.split(",") + + for typ in types: + typ = typ.casefold() + + if "energy" == typ: + profile |= slurm.ACCT_GATHER_PROFILE_ENERGY + elif "task" == typ: + profile |= slurm.ACCT_GATHER_PROFILE_TASK + elif "lustre" == typ: + profile |= slurm.ACCT_GATHER_PROFILE_LUSTRE + elif "network" == typ: + profile |= slurm.ACCT_GATHER_PROFILE_NETWORK + elif "none" == typ: + return slurm.ACCT_GATHER_PROFILE_NONE + elif "all" == typ: + return slurm.ACCT_GATHER_PROFILE_ALL + else: + raise ValueError("Invalid profile type: {typ}.") + + return profile + + +def acctg_profile_int_to_list(flags): + """Convert uin32_t accounting gather profiles to a list of strings.""" + profiles = [] + + if flags == 0 or flags == slurm.NO_VAL: + return [] + + if flags == slurm.ACCT_GATHER_PROFILE_ALL: + return ["ALL"] + elif flags == slurm.ACCT_GATHER_PROFILE_NONE: + return [] + + if flags & slurm.ACCT_GATHER_PROFILE_ENERGY: + profiles.append("ENERGY") + + if flags & slurm.ACCT_GATHER_PROFILE_TASK: + profiles.append("TASK") + + if flags & slurm.ACCT_GATHER_PROFILE_LUSTRE: + profiles.append("LUSTRE") + + if flags & slurm.ACCT_GATHER_PROFILE_NETWORK: + profiles.append("NETWORK") + + return profiles + + +def power_type_list_to_int(types): + """Convert a str or list of str with power types to uint8_t.""" + cdef uint8_t flags = 0 + + if not types: + return slurm.NO_VAL8 + + if isinstance(types, str): + types = types.split(",") + + for typ in types: + typ = typ.casefold() + + if "level" == typ: + flags |= slurm.SLURM_POWER_FLAGS_LEVEL + else: + raise ValueError("Invalid power type: {typ}.") + + return flags + + +def power_type_int_to_list(flags): + """Convert uint8_t power type flags to a list of strings.""" + types = [] + + if flags & slurm.SLURM_POWER_FLAGS_LEVEL: + types.append("LEVEL") + + return types + + +def shared_type_str_to_int(typ): + """Convert a job-sharing type str to its numerical representation.""" + if not typ: + return slurm.NO_VAL16 + + typ = typ.casefold() + if typ == "oversubscribe" or typ == "yes": + return slurm.JOB_SHARED_OK + elif typ == "user": + return slurm.JOB_SHARED_USER + elif typ == "mcs": + return slurm.JOB_SHARED_MCS + elif typ == "no" or typ == "exclusive": + return slurm.JOB_SHARED_NONE + else: + raise ValueError(f"Invalid resource_sharing type: {typ}.") + + +def cpu_gov_str_to_int(gov): + """Convert a cpu governor str to is numerical representation.""" + if not gov: + return u32(None) + + gov = gov.casefold() + rc = 0 + + if gov == "conservative": + rc = slurm.CPU_FREQ_CONSERVATIVE + elif gov == "ondemand": + rc = slurm.CPU_FREQ_ONDEMAND + elif gov == "performance": + rc = slurm.CPU_FREQ_PERFORMANCE + elif gov == "powersave": + rc = slurm.CPU_FREQ_POWERSAVE + elif gov == "userspace": + rc = slurm.CPU_FREQ_USERSPACE + elif gov == "schedutil": + rc = slurm.CPU_FREQ_SCHEDUTIL + else: + raise ValueError("Invalid cpu gov type: {}".format(gov)) + + return rc | slurm.CPU_FREQ_RANGE_FLAG + + +def cpu_freq_str_to_int(freq): + """Convert a cpu-frequency str to its numerical representation.""" + if not freq: + return slurm.NO_VAL + + if isinstance(freq, str) and not freq.isdigit(): + freq = freq.casefold() + + if freq == "low": + return slurm.CPU_FREQ_LOW + elif freq == "highm1": + return slurm.CPU_FREQ_HIGHM1 + elif freq == "high": + return slurm.CPU_FREQ_HIGH + elif freq == "medium": + return slurm.CPU_FREQ_MEDIUM + else: + fr = u32(int(freq)) + if fr != slurm.NO_VAL: + return fr + + raise ValueError(f"Invalid cpu freq value: {freq}.") + + +def cpu_freq_int_to_str(freq): + """Convert a numerical cpufreq value to its string representation.""" + if freq == slurm.CPU_FREQ_LOW: + return "LOW" + elif freq == slurm.CPU_FREQ_MEDIUM: + return "MEDIUM" + elif freq == slurm.CPU_FREQ_HIGHM1: + return "HIGHM1" + elif freq == slurm.CPU_FREQ_HIGH: + return "HIGH" + elif freq == slurm.CPU_FREQ_CONSERVATIVE: + return "CONSERVATIVE" + elif freq == slurm.CPU_FREQ_PERFORMANCE: + return "PERFORMANCE" + elif freq == slurm.CPU_FREQ_POWERSAVE: + return "POWERSAVE" + elif freq == slurm.CPU_FREQ_USERSPACE: + return "USERSPACE" + elif freq == slurm.CPU_FREQ_ONDEMAND: + return "ONDEMAND" + elif freq == slurm.CPU_FREQ_SCHEDUTIL: + return "SCHEDUTIL" + elif freq & slurm.CPU_FREQ_RANGE_FLAG: + return None + elif freq == slurm.NO_VAL or freq == 0: + return None + else: + # This is in kHz + return freq + + +def dependency_str_to_dict(dep): + if not dep: + return None + + out = { + "after": [], + "afterany": [], + "afterburstbuffer": [], + "aftercorr": [], + "afternotok": [], + "afterok": [], + "singleton": False, + "satisfy": "all", + } + + delim = "," + if "?" in dep: + delim = "?" + out["satisfy"] = "any" + + for item in dep.split(delim): + if item == "singleton": + out["singleton"] = True + + dep_and_job = item.split(":", 1) + if len(dep_and_job) != 2: + continue + + dep_name, jobs = dep_and_job[0], dep_and_job[1].split(":") + if dep_name not in out: + continue + + for job in jobs: + out[dep_name].append(int(job) if job.isdigit() else job) + + return out diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd new file mode 100644 index 00000000..d5e87da4 --- /dev/null +++ b/pyslurm/core/node.pxd @@ -0,0 +1,248 @@ +######################################################################### +# node.pxd - interface to work with nodes in slurm +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from libc.string cimport memcpy, memset +from pyslurm cimport slurm +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm.slurm cimport ( + node_info_t, + node_info_msg_t, + update_node_msg_t, + partition_info_msg_t, + slurm_load_node, + slurm_load_node_single, + slurm_update_node, + slurm_delete_node, + slurm_create_node, + slurm_load_partitions, + slurm_free_update_node_msg, + slurm_init_update_node_msg, + slurm_populate_node_partitions, + slurm_free_node_info_msg, + slurm_free_node_info_members, + slurm_free_update_node_msg, + slurm_free_partition_info_msg, + slurm_get_select_nodeinfo, + slurm_sprint_cpu_bind_type, + slurm_node_state_string_complete, + slurm_node_state_string, + cpu_bind_type_t, + xfree, + try_xmalloc, +) +from pyslurm.utils cimport cstr +from pyslurm.utils cimport ctime +from pyslurm.utils.ctime cimport time_t +from pyslurm.utils.uint cimport * +from pyslurm.xcollections cimport MultiClusterMap + + +cdef class Nodes(MultiClusterMap): + """A [`Multi Cluster`][pyslurm.xcollections.MultiClusterMap] collection of [pyslurm.Node][] objects. + + Args: + nodes (Union[list[str], dict[str, Node], str], optional=None): + Nodes to initialize this collection with. + + Attributes: + free_memory (int): + Amount of free memory in this node collection. (in Mebibytes) + Note that this means actual free memory as returned by the `free` + command + real_memory (int): + Amount of real memory in this node collection. (in Mebibytes) + idle_memory (int): + Amount of idle memory in this node collection. (in Mebibytes) + allocated_memory (int): + Amount of alloc Memory in this node collection. (in Mebibytes) + total_cpus (int): + Total amount of CPUs in this node collection. + idle_cpus (int): + Total amount of idle CPUs in this node collection. + allocated_cpus (int): + Total amount of allocated CPUs in this node collection. + effective_cpus (int): + Total amount of effective CPUs in this node collection. + current_watts (int): + Total amount of Watts consumed in this node collection. + avg_watts (int): + Amount of average watts consumed in this node collection. + """ + cdef: + node_info_msg_t *info + partition_info_msg_t *part_info + node_info_t tmp_info + + +cdef class Node: + """A Slurm node. + + Args: + name (str, optional=None): + Name of a node + + Other Parameters: + configured_gres (dict): + Configured GRES for the node + address (str): + Address of the node + hostname (str): + Hostname of the node + extra (str): + Arbitrary extra string + comment (str): + Comment for the node + weight (int): + Weight associated to the node + available_features (list): + Available features for the node + active_features (list): + Active features for the node + cpu_binding (str): + Default CPU-Binding for the node + state (str): + State of the node + reason (str): + Reason for the Node, typically used along with updating the node + state + + Attributes: + name (str): + Name of the node. + architecture (str): + Architecture of the node (e.g. x86_64) + configured_gres (dict): + Generic Resources this Node is configured with. + owner (str): + User that owns the Node. + address (str): + Address of the node. + hostname (str): + Hostname of the node. + extra (str): + Arbitrary string attached to the Node. + reason (str): + Reason why this node is in its current state. + reason_user (str): + Name of the User who set the reason. + comment (str): + Arbitrary node comment. + bcast_address (str): + Address of the node for sbcast. + slurm_version (str): + Version of slurm this node is running on. + operating_system (str): + Name of the operating system installed. + allocated_gres (dict): + Generic Resources currently in use on the node. + mcs_label (str): + MCS label for the node. + allocated_memory (int): + Memory in Mebibytes allocated on the node. + real_memory (int): + Real Memory in Mebibytes configured for this node. + free_memory (int): + Free Memory in Mebibytes on the node. + Note that this means actual free memory as returned by the `free` + command + idle_memory (int): + Idle Memory in Mebibytes on the node. + memory_reserved_for_system (int): + Memory in Mebibytes reserved for the System not usable by Jobs. + temporary_disk (int): + Amount of temporary disk space this node has, in Mebibytes. + weight (int): + Weight of the node in scheduling. + effective_cpus (int): + Number of effective CPUs the node has. + total_cpus (int): + Total amount of CPUs the node has. + sockets (int): + Number of sockets the node has. + cores_reserved_for_system (int): + Number of cores reserved for the System not usable by Jobs. + boards (int): + Number of boards the node has. + cores_per_socket (int): + Number of cores per socket configured for the node. + threads_per_core (int): + Number of threads per core configured for the node. + available_features (list): + List of features available on the node. + active_features (list): + List of features on the node. + partitions (list): + List of partitions this Node is part of. + boot_time (int): + Time the node has booted, as unix timestamp. + slurmd_start_time (int): + Time the slurmd has started on the Node, as unix timestamp. + last_busy_time (int): + Time this node was last busy, as unix timestamp. + reason_time (int): + Time the reason was set for the node, as unix timestamp. + allocated_tres (dict): + Currently allocated Trackable Resources + allocated_cpus (int): + Number of allocated CPUs on the node. + idle_cpus (int): + Number of idle CPUs. + cpu_binding (str): + Default CPU-Binding on the node. + cap_watts (int): + Node cap watts. + current_watts (int): + Current amount of watts consumed on the node. + avg_watts (int): + Average amount of watts consumed on the node. + external_sensors (dict): + External Sensor info for the Node. + The dict returned contains the following information: + + * `joules_total` (int) + * `current_watts` (int) + * `temperature` (int) + state (str): + State the node is currently in. + next_state (str): + Next state the node will be in. + cpu_load (float): + CPU Load on the Node. + slurmd_port (int): + Port the slurmd is listening on the node. + """ + cdef: + node_info_t *info + update_node_msg_t *umsg + dict passwd + dict groups + + cdef readonly cluster + + @staticmethod + cdef _swap_data(Node dst, Node src) + + @staticmethod + cdef Node from_ptr(node_info_t *in_ptr) + diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx new file mode 100644 index 00000000..bf5676a4 --- /dev/null +++ b/pyslurm/core/node.pyx @@ -0,0 +1,772 @@ +######################################################################### +# node.pyx - interface to work with nodes in slurm +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from typing import Union +from pyslurm.utils import cstr +from pyslurm.utils import ctime +from pyslurm.utils.uint import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.utils.ctime import timestamp_to_date, _raw_time +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections +from pyslurm.utils.helpers import ( + uid_to_name, + gid_to_name, + humanize, + _getgrall_to_dict, + _getpwall_to_dict, + cpubind_to_num, + instance_to_dict, + nodelist_from_range_str, + nodelist_to_range_str, + gres_from_tres_dict, +) + + +cdef class Nodes(MultiClusterMap): + + def __dealloc__(self): + slurm_free_node_info_msg(self.info) + slurm_free_partition_info_msg(self.part_info) + + def __cinit__(self): + self.info = NULL + self.part_info = NULL + + def __init__(self, nodes=None): + super().__init__(data=nodes, + typ="Nodes", + val_type=Node, + id_attr=Node.name, + key_type=str) + + @staticmethod + def load(preload_passwd_info=False): + """Load all nodes in the system. + + Args: + preload_passwd_info (bool): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Node + where a UID/GID is translated to a name. + If True, the information will fetched and stored in each of + the Node instances. The default is False. + + Returns: + (pyslurm.Nodes): Collection of node objects. + + Raises: + RPCError: When getting all the Nodes from the slurmctld failed. + """ + cdef: + dict passwd = {} + dict groups = {} + Nodes nodes = Nodes() + int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL + Node node + + verify_rpc(slurm_load_node(0, &nodes.info, flags)) + verify_rpc(slurm_load_partitions(0, &nodes.part_info, flags)) + slurm_populate_node_partitions(nodes.info, nodes.part_info) + + # If requested, preload the passwd and groups database to potentially + # speedup lookups for an attribute in a node, e.g "owner". + if preload_passwd_info: + passwd = _getpwall_to_dict() + groups = _getgrall_to_dict() + + # zero-out a dummy node_info_t + memset(&nodes.tmp_info, 0, sizeof(node_info_t)) + + # Put each node pointer into its own "Node" instance. + for cnt in range(nodes.info.record_count): + node = Node.from_ptr(&nodes.info.node_array[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out node_info_t. + nodes.info.node_array[cnt] = nodes.tmp_info + + name = node.name + if not name: + # Could be possible if there are nodes configured in + # slurm.conf that cannot be reached anymore. + continue + + if preload_passwd_info: + node.passwd = passwd + node.groups = groups + + cluster = node.cluster + if cluster not in nodes.data: + nodes.data[cluster] = {} + nodes.data[cluster][name] = node + + # We have extracted all pointers + nodes.info.record_count = 0 + return nodes + + def reload(self): + """Reload the information for Nodes in a collection. + + !!! note + + Only information for nodes which are already in the collection at + the time of calling this method will be reloaded. + + Returns: + (pyslurm.Nodes): Returns self + + Raises: + RPCError: When getting the Nodes from the slurmctld failed. + """ + return xcollections.multi_reload(self) + + def modify(self, Node changes): + """Modify all Nodes in a collection. + + Args: + changes (pyslurm.Node): + Another Node object that contains all the changes to apply. + Check the `Other Parameters` of the Node class to see which + properties can be modified. + + Raises: + RPCError: When updating the Node was not successful. + + Examples: + >>> import pyslurm + >>> + >>> nodes = pyslurm.Nodes.load() + >>> # Prepare the changes + >>> changes = pyslurm.Node(state="DRAIN", reason="DRAIN Reason") + >>> # Apply the changes to all the nodes + >>> nodes.modify(changes) + """ + cdef Node n = changes + node_str = nodelist_to_range_str(list(self.keys())) + n._alloc_umsg() + cstr.fmalloc(&n.umsg.node_names, node_str) + verify_rpc(slurm_update_node(n.umsg)) + + @property + def free_memory(self): + return xcollections.sum_property(self, Node.free_memory) + + @property + def real_memory(self): + return xcollections.sum_property(self, Node.real_memory) + + @property + def idle_memory(self): + return xcollections.sum_property(self, Node.idle_memory) + + @property + def allocated_memory(self): + return xcollections.sum_property(self, Node.allocated_memory) + + @property + def total_cpus(self): + return xcollections.sum_property(self, Node.total_cpus) + + @property + def idle_cpus(self): + return xcollections.sum_property(self, Node.idle_cpus) + + @property + def allocated_cpus(self): + return xcollections.sum_property(self, Node.allocated_cpus) + + @property + def effective_cpus(self): + return xcollections.sum_property(self, Node.effective_cpus) + + @property + def current_watts(self): + return xcollections.sum_property(self, Node.current_watts) + + @property + def avg_watts(self): + return xcollections.sum_property(self, Node.avg_watts) + + +cdef class Node: + + def __cinit__(self): + self.info = NULL + self.umsg = NULL + + def __init__(self, name=None, **kwargs): + self._alloc_impl() + self.name = name + self.cluster = LOCAL_CLUSTER + for k, v in kwargs.items(): + setattr(self, k, v) + + def _alloc_impl(self): + self._alloc_info() + self._alloc_umsg() + + def _alloc_info(self): + if not self.info: + self.info = try_xmalloc(sizeof(node_info_t)) + if not self.info: + raise MemoryError("xmalloc failed for node_info_t") + + def _alloc_umsg(self): + if not self.umsg: + self.umsg = try_xmalloc(sizeof(update_node_msg_t)) + if not self.umsg: + raise MemoryError("xmalloc failed for update_node_msg_t") + slurm_init_update_node_msg(self.umsg) + + def _dealloc_impl(self): + slurm_free_update_node_msg(self.umsg) + self.umsg = NULL + slurm_free_node_info_members(self.info) + xfree(self.info) + + def __dealloc__(self): + self._dealloc_impl() + + def __setattr__(self, name, val): + # When a user wants to set attributes on a Node instance that was + # created by calling Nodes(), the "umsg" pointer is not yet allocated. + # We only allocate memory for it by the time the user actually wants + # to modify something. + self._alloc_umsg() + # Call descriptors __set__ directly + Node.__dict__[name].__set__(self, val) + + def __repr__(self): + return f'pyslurm.{self.__class__.__name__}({self.name})' + + @staticmethod + cdef Node from_ptr(node_info_t *in_ptr): + cdef Node wrap = Node.__new__(Node) + wrap._alloc_info() + wrap.passwd = {} + wrap.groups = {} + wrap.cluster = LOCAL_CLUSTER + memcpy(wrap.info, in_ptr, sizeof(node_info_t)) + return wrap + + cdef _swap_data(Node dst, Node src): + cdef node_info_t *tmp = NULL + if dst.info and src.info: + tmp = dst.info + dst.info = src.info + src.info = tmp + + @staticmethod + def load(name): + """Load information for a specific node. + + Implements the slurm_load_node_single RPC. + + Args: + name (str): + The name of the Node to load. + + Returns: + (pyslurm.Node): Returns a new Node instance. + + Raises: + RPCError: If requesting the Node information from the slurmctld + was not successful. + + Examples: + >>> import pyslurm + >>> node = pyslurm.Node.load("localhost") + """ + cdef: + node_info_msg_t *node_info = NULL + partition_info_msg_t *part_info = NULL + Node wrap = None + + try: + verify_rpc(slurm_load_node_single(&node_info, + name, slurm.SHOW_ALL)) + verify_rpc(slurm_load_partitions(0, &part_info, slurm.SHOW_ALL)) + slurm_populate_node_partitions(node_info, part_info) + + if node_info and node_info.record_count: + wrap = Node.from_ptr(&node_info.node_array[0]) + node_info.record_count = 0 + else: + raise RPCError(msg=f"Node '{name}' does not exist") + except Exception as e: + raise e + finally: + slurm_free_node_info_msg(node_info) + slurm_free_partition_info_msg(part_info) + + return wrap + + def create(self, state="future"): + """Create a node. + + Implements the slurm_create_node RPC. + + Args: + state (str, optional): + An optional state the created Node should have. Allowed values + are `future` and `cloud`. `future` is the default. + + Returns: + (pyslurm.Node): This function returns the current Node-instance + object itself. + + Raises: + RPCError: If creating the Node was not successful. + + Examples: + >>> import pyslurm + >>> node = pyslurm.Node("testnode").create() + """ + if not self.name: + raise ValueError("You need to set a node name first.") + + self._alloc_umsg() + cstr.fmalloc(&self.umsg.extra, + f"NodeName={self.name} State={state}") + verify_rpc(slurm_create_node(self.umsg)) + + return self + + def modify(self, Node changes): + """Modify a node. + + Implements the slurm_update_node RPC. + + Args: + changes (pyslurm.Node): + Another Node object that contains all the changes to apply. + Check the `Other Parameters` of the Node class to see which + properties can be modified. + + Raises: + RPCError: When updating the Node was not successful. + + Examples: + >>> import pyslurm + >>> + >>> mynode = pyslurm.Node.load("localhost") + >>> # Prepare the changes + >>> changes = pyslurm.Node(state="DRAIN", reason="DRAIN Reason") + >>> # Modify it + >>> mynode.modify(changes) + """ + cdef Node n = changes + n._alloc_umsg() + cstr.fmalloc(&n.umsg.node_names, self.name) + verify_rpc(slurm_update_node(n.umsg)) + + def delete(self): + """Delete a node. + + Implements the slurm_delete_node RPC. + + Raises: + RPCError: If deleting the Node was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Node("localhost").delete() + """ + self._alloc_umsg() + verify_rpc(slurm_delete_node(self.umsg)) + + def as_dict(self): + return self.to_dict() + + def to_dict(self): + """Node information formatted as a dictionary. + + Returns: + (dict): Node information as dict + + Examples: + >>> import pyslurm + >>> mynode = pyslurm.Node.load("mynode") + >>> mynode_dict = mynode.to_dict() + """ + return instance_to_dict(self) + + @property + def name(self): + return cstr.to_unicode(self.info.name) + + @name.setter + def name(self, val): + cstr.fmalloc2(&self.info.name, &self.umsg.node_names, val) + + @property + def architecture(self): + return cstr.to_unicode(self.info.arch) + + @property + def configured_gres(self): + return cstr.to_gres_dict(self.info.gres) + + @configured_gres.setter + def configured_gres(self, val): + cstr.fmalloc2(&self.info.gres, &self.umsg.gres, + cstr.from_gres_dict(val)) + + @property + def owner(self): + return uid_to_name(self.info.owner, lookup=self.passwd) + + @property + def address(self): + return cstr.to_unicode(self.info.node_addr) + + @address.setter + def address(self, val): + cstr.fmalloc2(&self.info.node_addr, &self.umsg.node_addr, val) + + @property + def hostname(self): + return cstr.to_unicode(self.info.node_hostname) + + @hostname.setter + def hostname(self, val): + cstr.fmalloc2(&self.info.node_hostname, &self.umsg.node_hostname, val) + + @property + def extra(self): + return cstr.to_unicode(self.info.extra) + + @extra.setter + def extra(self, val): + cstr.fmalloc2(&self.info.extra, &self.umsg.extra, val) + + @property + def reason(self): + return cstr.to_unicode(self.info.reason) + + @reason.setter + def reason(self, val): + cstr.fmalloc2(&self.info.reason, &self.umsg.reason, val) + + @property + def reason_user(self): + return uid_to_name(self.info.reason_uid, lookup=self.passwd) + + @property + def comment(self): + return cstr.to_unicode(self.info.comment) + + @comment.setter + def comment(self, val): + cstr.fmalloc2(&self.info.comment, &self.umsg.comment, val) + + @property + def bcast_address(self): + return cstr.to_unicode(self.info.bcast_address) + + @property + def slurm_version(self): + return cstr.to_unicode(self.info.version) + + @property + def operating_system(self): + return cstr.to_unicode(self.info.os) + + @property + def allocated_gres(self): + return gres_from_tres_dict(self.allocated_tres) + + @property + def mcs_label(self): + return cstr.to_unicode(self.info.mcs_label) + + @property + def allocated_memory(self): + cdef uint64_t alloc_memory = 0 + if self.info.select_nodeinfo: + slurm_get_select_nodeinfo( + self.info.select_nodeinfo, + slurm.SELECT_NODEDATA_MEM_ALLOC, + slurm.NODE_STATE_ALLOCATED, + &alloc_memory) + return alloc_memory + + @property + def real_memory(self): + return u64_parse(self.info.real_memory) + + @property + def free_memory(self): + return u64_parse(self.info.free_mem) + + @property + def idle_memory(self): + real = self.real_memory + return 0 if not real else real - self.allocated_memory + + @property + def memory_reserved_for_system(self): + return u64_parse(self.info.mem_spec_limit) + + @property + def temporary_disk(self): + return u32_parse(self.info.tmp_disk) + + @property + def weight(self): + return u32_parse(self.info.weight) + + @weight.setter + def weight(self, val): + self.info.weight=self.umsg.weight = u32(val) + + @property + def effective_cpus(self): + return u16_parse(self.info.cpus_efctv) + + @property + def total_cpus(self): + return u16_parse(self.info.cpus, on_noval=0) + + @property + def sockets(self): + return u16_parse(self.info.sockets, on_noval=0) + + @property + def cores_reserved_for_system(self): + return u16_parse(self.info.core_spec_cnt) + + @property + def boards(self): + return u16_parse(self.info.boards) + + @property + def cores_per_socket(self): + return u16_parse(self.info.cores) + + @property + def threads_per_core(self): + return u16_parse(self.info.threads) + + @property + def available_features(self): + return cstr.to_list(self.info.features) + + @available_features.setter + def available_features(self, val): + cstr.from_list2(&self.info.features, &self.umsg.features, val) + + @property + def active_features(self): + return cstr.to_list(self.info.features_act) + + @active_features.setter + def active_features(self, val): + cstr.from_list2(&self.info.features_act, &self.umsg.features_act, val) + + @property + def partitions(self): + return cstr.to_list(self.info.partitions) + + @property + def boot_time(self): + return _raw_time(self.info.boot_time) + + @property + def slurmd_start_time(self): + return _raw_time(self.info.slurmd_start_time) + + @property + def last_busy_time(self): + return _raw_time(self.info.last_busy) + + @property + def reason_time(self): + return _raw_time(self.info.reason_time) + +# @property +# def tres_configured(self): +# """dict: TRES that are configured on the node.""" +# return cstr.to_dict(self.info.tres_fmt_str) + + @property + def allocated_tres(self): + cdef char *alloc_tres = NULL + if self.info.select_nodeinfo: + slurm_get_select_nodeinfo( + self.info.select_nodeinfo, + slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR, + slurm.NODE_STATE_ALLOCATED, + &alloc_tres + ) + return cstr.to_dict(alloc_tres) + + @property + def allocated_cpus(self): + cdef uint16_t alloc_cpus = 0 + if self.info.select_nodeinfo: + slurm_get_select_nodeinfo( + self.info.select_nodeinfo, + slurm.SELECT_NODEDATA_SUBCNT, + slurm.NODE_STATE_ALLOCATED, + &alloc_cpus + ) + return alloc_cpus + + @property + def idle_cpus(self): + efctv = self.effective_cpus + if not efctv: + return None + + return efctv - self.allocated_cpus + + @property + def cpu_binding(self): + cdef char cpu_bind[128] + slurm_sprint_cpu_bind_type(cpu_bind, + self.info.cpu_bind) + if cpu_bind == "(null type)": + return None + + return cstr.to_unicode(cpu_bind) + + @cpu_binding.setter + def cpu_binding(self, val): + self.info.cpu_bind=self.umsg.cpu_bind = cpubind_to_num(val) + + @property + def cap_watts(self): + if not self.info.power: + return 0 + return u32_parse(self.info.power.cap_watts, on_noval=0) + + @property + def current_watts(self): + if not self.info.energy: + return 0 + return u32_parse(self.info.energy.current_watts, on_noval=0) + + @property + def avg_watts(self): + if not self.info.energy: + return 0 + return u32_parse(self.info.energy.ave_watts, on_noval=0) + + @property + def external_sensors(self): + if not self.info.ext_sensors: + return {} + + return { + "joules_total": u64_parse(self.info.ext_sensors.consumed_energy), + "current_watts": u32_parse(self.info.ext_sensors.current_watts), + "temperature": u32_parse(self.info.ext_sensors.temperature) + } + + @property + def _node_state(self): + idle_cpus = self.idle_cpus + state = self.info.node_state + + if idle_cpus and idle_cpus != self.effective_cpus: + # If we aren't idle but also not allocated, then set state to + # MIXED. + state &= slurm.NODE_STATE_FLAGS + state |= slurm.NODE_STATE_MIXED + + return state + + @property + def state(self): + cdef char* state = slurm_node_state_string_complete(self._node_state) + state_str = cstr.to_unicode(state) + xfree(state) + return state_str + + @state.setter + def state(self, val): + self.umsg.node_state=self.info.node_state = _node_state_from_str(val) + + @property + def next_state(self): + state = self._node_state + if ((self.info.next_state != slurm.NO_VAL) + and (state & slurm.NODE_STATE_REBOOT_REQUESTED + or state & slurm.NODE_STATE_REBOOT_ISSUED)): + return cstr.to_unicode( + slurm_node_state_string(self.info.next_state)) + else: + return None + + @property + def cpu_load(self): + load = u32_parse(self.info.cpu_load) + return load / 100.0 if load is not None else 0.0 + + @property + def slurmd_port(self): + return u16_parse(self.info.port) + + +def _node_state_from_str(state, err_on_invalid=True): + if not state: + return slurm.NO_VAL + ustate = state.upper() + + # Following states are explicitly possible as per documentation + # https://slurm.schedmd.com/scontrol.html#OPT_State_1 + if ustate == "CANCEL_REBOOT": + return slurm.NODE_STATE_CANCEL_REBOOT + elif ustate == "DOWN": + return slurm.NODE_STATE_DOWN + elif ustate == "DRAIN": + return slurm.NODE_STATE_DRAIN + elif ustate == "FAIL": + return slurm.NODE_STATE_FAIL + elif ustate == "FUTURE": + return slurm.NODE_STATE_FUTURE + elif ustate == "NORESP" or ustate == "NO_RESP": + return slurm.NODE_STATE_NO_RESPOND + elif ustate == "POWER_DOWN": + return slurm.NODE_STATE_POWER_DOWN + elif ustate == "POWER_DOWN_ASAP": + # Drain and mark for power down + return slurm.NODE_STATE_POWER_DOWN | slurm.NODE_STATE_POWER_DRAIN + elif ustate == "POWER_DOWN_FORCE": + # Kill all Jobs and power down + return slurm.NODE_STATE_POWER_DOWN | slurm.NODE_STATE_POWERED_DOWN + elif ustate == "POWER_UP": + return slurm.NODE_STATE_POWER_UP + elif ustate == "RESUME": + return slurm.NODE_RESUME + elif ustate == "UNDRAIN": + return slurm.NODE_STATE_UNDRAIN + + if err_on_invalid: + raise ValueError(f"Invalid Node state: {state}") + else: + return slurm.NO_VAL diff --git a/pyslurm/core/partition.pxd b/pyslurm/core/partition.pxd new file mode 100644 index 00000000..a5a638df --- /dev/null +++ b/pyslurm/core/partition.pxd @@ -0,0 +1,223 @@ +######################################################################### +# partition.pxd - interface to work with partitions in slurm +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from libc.string cimport memcpy, memset +from pyslurm cimport slurm +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm.slurm cimport ( + partition_info_msg_t, + job_defaults_t, + delete_part_msg_t, + partition_info_t, + update_part_msg_t, + slurm_free_partition_info_members, + slurm_free_partition_info_msg, + slurm_free_update_part_msg, + slurm_init_part_desc_msg, + slurm_load_partitions, + slurm_sprint_cpu_bind_type, + cpu_bind_type_t, + slurm_preempt_mode_string, + slurm_preempt_mode_num, + slurm_create_partition, + slurm_update_partition, + slurm_delete_partition, + xfree, + try_xmalloc, +) +from pyslurm.db.util cimport ( + SlurmList, + SlurmListItem, +) +from pyslurm.utils cimport cstr +from pyslurm.utils cimport ctime +from pyslurm.utils.ctime cimport time_t +from pyslurm.utils.uint cimport * +from pyslurm.core cimport slurmctld +from pyslurm.xcollections cimport MultiClusterMap + + +cdef class Partitions(MultiClusterMap): + """A [`Multi Cluster`][pyslurm.xcollections.MultiClusterMap] collection of [pyslurm.Partition][] objects. + + Args: + partitions (Union[list[str], dict[str, Partition], str], optional=None): + Partitions to initialize this collection with. + + Attributes: + total_cpus (int): + Total amount of CPUs the Partitions in a Collection have + total_nodes (int): + Total amount of Nodes the Partitions in a Collection have + """ + cdef: + partition_info_msg_t *info + partition_info_t tmp_info + + +cdef class Partition: + """A Slurm partition. + + ??? info "Setting Memory related attributes" + + Unless otherwise noted, all attributes in this class representing a + memory value, like `default_memory_per_cpu`, may also be set with a + string that contains suffixes like "K", "M", "G" or "T". + + For example: + + default_memory_per_cpu = "10G" + + This will internally be converted to 10240 (how the Slurm API expects + it) + + Args: + name (str, optional=None): + Name of a Partition + **kwargs (Any, optional=None): + Every attribute of a Partition can be set, except for: + + * total_cpus + * total_nodes + * select_type_parameters + + Attributes: + name (str): + Name of the Partition. + allowed_submit_nodes (list[str]): + List of Nodes from which Jobs can be submitted to the partition. + allowed_accounts (list[str]): + List of Accounts which are allowed to execute Jobs + allowed_groups (list[str]): + List of Groups which are allowed to execute Jobs + allowed_qos (list[str]): + List of QoS which are allowed to execute Jobs + alternate (str): + Name of the alternate Partition in case a Partition is down. + select_type_parameters (list[str]): + List of Select type parameters for the select plugin. + cpu_binding (str): + Default CPU-binding for Jobs that execute in a Partition. + default_memory_per_cpu (int): + Default Memory per CPU for Jobs in this Partition, in Mebibytes. + Mutually exclusive with `default_memory_per_node`. + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + default_memory_per_node (int): + Default Memory per Node for Jobs in this Partition, in Mebibytes. + Mutually exclusive with `default_memory_per_cpu`. + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + max_memory_per_cpu (int): + Max Memory per CPU allowed for Jobs in this Partition, in + Mebibytes. Mutually exclusive with `max_memory_per_node`. + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + max_memory_per_node (int): + Max Memory per Node allowed for Jobs in this Partition, in + Mebibytes. Mutually exclusive with `max_memory_per_cpu` + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + default_time (int): + Default run time-limit in minutes for Jobs that don't specify one. + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + denied_qos (list[str]): + List of QoS that cannot be used in a Partition + denied_accounts (list[str]): + List of Accounts that cannot use a Partition + preemption_grace_time (int): + Grace Time in seconds when a Job is selected for Preemption. + default_cpus_per_gpu (int): + Default CPUs per GPU for Jobs in this Partition + default_memory_per_gpu (int): + Default Memory per GPU, in Mebibytes, for Jobs in this Partition + max_cpus_per_node (int): + Max CPUs per Node allowed for Jobs in this Partition + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + max_cpus_per_socket (int): + Max CPUs per Socket allowed for Jobs in this Partition + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + max_nodes (int): + Max number of Nodes allowed for Jobs + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + min_nodes (int): + Minimum number of Nodes that must be requested by Jobs + max_time (int): + Max Time-Limit in minutes that Jobs can request + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + oversubscribe (str): + The oversubscribe mode for this Partition + nodes (str): + Nodes that are in a Partition + nodesets (list[str]): + List of Nodesets that a Partition has configured + over_time_limit (int): + Limit in minutes that Jobs can exceed their time-limit + + This can also return [UNLIMITED][pyslurm.constants.UNLIMITED] + preempt_mode (str): + Preemption Mode in a Partition + priority_job_factor (int): + The Priority Job Factor for a partition + priority_tier (int): + The priority tier for a Partition + qos (str): + A QoS associated with a Partition, used to extend possible limits + total_cpus (int): + Total number of CPUs available in a Partition + total_nodes (int): + Total number of nodes available in a Partition + state (str): + State the Partition is in + is_default (bool): + Whether this Partition is the default partition or not + allow_root_jobs (bool): + Whether Jobs by the root user are allowed + is_user_exclusive (bool): + Whether nodes will be exclusively allocated to users + is_hidden (bool): + Whether the partition is hidden or not + least_loaded_nodes_scheduling (bool): + Whether Least-Loaded-Nodes scheduling algorithm is used on a + Partition + is_root_only (bool): + Whether only root is able to use a Partition + requires_reservation (bool): + Whether a reservation is required to use a Partition + """ + cdef: + partition_info_t *ptr + int power_save_enabled + slurmctld.Config slurm_conf + + cdef readonly cluster + + @staticmethod + cdef Partition from_ptr(partition_info_t *in_ptr) diff --git a/pyslurm/core/partition.pyx b/pyslurm/core/partition.pyx new file mode 100644 index 00000000..ba0bf559 --- /dev/null +++ b/pyslurm/core/partition.pyx @@ -0,0 +1,848 @@ +######################################################################### +# partition.pyx - interface to work with partitions in slurm +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from typing import Union, Any +from pyslurm.utils import cstr +from pyslurm.utils import ctime +from pyslurm.utils.uint import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.utils.ctime import timestamp_to_date, _raw_time +from pyslurm.constants import UNLIMITED +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections +from pyslurm.utils.helpers import ( + uid_to_name, + gid_to_name, + _getgrall_to_dict, + _getpwall_to_dict, + cpubind_to_num, + instance_to_dict, + dehumanize, +) +from pyslurm.utils.ctime import ( + timestr_to_mins, + timestr_to_secs, +) + + +cdef class Partitions(MultiClusterMap): + + def __dealloc__(self): + slurm_free_partition_info_msg(self.info) + + def __cinit__(self): + self.info = NULL + + def __init__(self, partitions=None): + super().__init__(data=partitions, + typ="Partitions", + val_type=Partition, + id_attr=Partition.name, + key_type=str) + + @staticmethod + def load(): + """Load all Partitions in the system. + + Returns: + (pyslurm.Partitions): Collection of Partition objects. + + Raises: + RPCError: When getting all the Partitions from the slurmctld + failed. + """ + cdef: + Partitions partitions = Partitions() + int flags = slurm.SHOW_ALL + Partition partition + slurmctld.Config slurm_conf + int power_save_enabled = 0 + + verify_rpc(slurm_load_partitions(0, &partitions.info, flags)) + slurm_conf = slurmctld.Config.load() + + # zero-out a dummy partition_info_t + memset(&partitions.tmp_info, 0, sizeof(partition_info_t)) + + if slurm_conf.suspend_program and slurm_conf.resume_program: + power_save_enabled = 1 + + # Put each pointer into its own instance. + for cnt in range(partitions.info.record_count): + partition = Partition.from_ptr(&partitions.info.partition_array[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out partition_info_t. + partitions.info.partition_array[cnt] = partitions.tmp_info + + cluster = partition.cluster + if cluster not in partitions.data: + partitions.data[cluster] = {} + + partition.power_save_enabled = power_save_enabled + partition.slurm_conf = slurm_conf + partitions.data[cluster][partition.name] = partition + + # We have extracted all pointers + partitions.info.record_count = 0 + return partitions + + def reload(self): + """Reload the information for Partitions in a collection. + + !!! note + + Only information for Partitions which are already in the + collection at the time of calling this method will be reloaded. + + Returns: + (pyslurm.Partitions): Returns self + + Raises: + RPCError: When getting the Partitions from the slurmctld failed. + """ + return xcollections.multi_reload(self) + + def modify(self, changes): + """Modify all Partitions in a Collection. + + Args: + changes (pyslurm.Partition): + Another Partition object that contains all the changes to + apply. Check the `Other Parameters` of the Partition class to + see which properties can be modified. + + Raises: + RPCError: When updating at least one Partition failed. + + Examples: + >>> import pyslurm + >>> + >>> parts = pyslurm.Partitions.load() + >>> # Prepare the changes + >>> changes = pyslurm.Partition(state="DRAIN") + >>> # Apply the changes to all the partitions + >>> parts.modify(changes) + """ + for part in self.values(): + part.modify(changes) + + @property + def total_cpus(self): + return xcollections.sum_property(self, Partition.total_cpus) + + @property + def total_nodes(self): + return xcollections.sum_property(self, Partition.total_nodes) + + +cdef class Partition: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, name=None, **kwargs): + self._alloc_impl() + self.name = name + self.cluster = LOCAL_CLUSTER + for k, v in kwargs.items(): + setattr(self, k, v) + + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc(sizeof(partition_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for partition_info_t") + + slurm_init_part_desc_msg(self.ptr) + + def _dealloc_impl(self): + slurm_free_partition_info_members(self.ptr) + xfree(self.ptr) + + def __dealloc__(self): + self._dealloc_impl() + + def __repr__(self): + return f'pyslurm.{self.__class__.__name__}({self.name})' + + @staticmethod + cdef Partition from_ptr(partition_info_t *in_ptr): + cdef Partition wrap = Partition.__new__(Partition) + wrap._alloc_impl() + wrap.cluster = LOCAL_CLUSTER + memcpy(wrap.ptr, in_ptr, sizeof(partition_info_t)) + return wrap + + def _error_or_name(self): + if not self.name: + raise ValueError("You need to set a Partition name for this " + "instance.") + return self.name + + def as_dict(self): + return self.to_dict() + + def to_dict(self): + """Partition information formatted as a dictionary. + + Returns: + (dict): Partition information as dict + + Examples: + >>> import pyslurm + >>> mypart = pyslurm.Partition.load("mypart") + >>> mypart_dict = mypart.to_dict() + """ + return instance_to_dict(self) + + @staticmethod + def load(name): + """Load information for a specific Partition. + + Args: + name (str): + The name of the Partition to load. + + Returns: + (pyslurm.Partition): Returns a new Partition instance. + + Raises: + RPCError: If requesting the Partition information from the + slurmctld was not successful. + + Examples: + >>> import pyslurm + >>> part = pyslurm.Partition.load("normal") + """ + part = Partitions.load().get(name) + if not part: + raise RPCError(msg=f"Partition '{name}' doesn't exist") + + return part + + def create(self): + """Create a Partition. + + Implements the slurm_create_partition RPC. + + Returns: + (pyslurm.Partition): This function returns the current Partition + instance object itself. + + Raises: + RPCError: If creating the Partition was not successful. + + Examples: + >>> import pyslurm + >>> part = pyslurm.Partition("debug").create() + """ + self._error_or_name() + verify_rpc(slurm_create_partition(self.ptr)) + return self + + def modify(self, Partition changes): + """Modify a Partition. + + Implements the slurm_update_partition RPC. + + Args: + changes (pyslurm.Partition): + Another Partition object that contains all the changes to + apply. Check the `Other Parameters` of the Partition class to + see which properties can be modified. + + Raises: + RPCError: When updating the Partition was not successful. + + Examples: + >>> import pyslurm + >>> + >>> part = pyslurm.Partition.load("normal") + >>> # Prepare the changes + >>> changes = pyslurm.Partition(state="DRAIN") + >>> # Apply the changes to the "normal" Partition + >>> part.modify(changes) + """ + cdef Partition part = changes + part.name = self._error_or_name() + verify_rpc(slurm_update_partition(part.ptr)) + + def delete(self): + """Delete a Partition. + + Implements the slurm_delete_partition RPC. + + Raises: + RPCError: When deleting the Partition was not successful. + + Examples: + >>> import pyslurm + >>> pyslurm.Partition("normal").delete() + """ + cdef delete_part_msg_t del_part_msg + memset(&del_part_msg, 0, sizeof(del_part_msg)) + del_part_msg.name = cstr.from_unicode(self._error_or_name()) + verify_rpc(slurm_delete_partition(&del_part_msg)) + + # If using property getter/setter style internally becomes too messy at + # some point, we can easily switch to normal "cdef public" attributes and + # just extract the getter/setter logic into two functions, where one + # creates a pointer from the instance attributes, and the other parses + # pointer values into instance attributes. + # + # From a user perspective nothing would change. + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def _id(self): + return self.name + + @name.setter + def name(self, val): + cstr.fmalloc(&self.ptr.name, val) + + @property + def allowed_submit_nodes(self): + return cstr.to_list(self.ptr.allow_alloc_nodes, ["ALL"]) + + @allowed_submit_nodes.setter + def allowed_submit_nodes(self, val): + cstr.from_list(&self.ptr.allow_alloc_nodes, val) + + @property + def allowed_accounts(self): + return cstr.to_list(self.ptr.allow_accounts, ["ALL"]) + + @allowed_accounts.setter + def allowed_accounts(self, val): + cstr.from_list(&self.ptr.allow_accounts, val) + + @property + def allowed_groups(self): + return cstr.to_list(self.ptr.allow_groups, ["ALL"]) + + @allowed_groups.setter + def allowed_groups(self, val): + cstr.from_list(&self.ptr.allow_groups, val) + + @property + def allowed_qos(self): + return cstr.to_list(self.ptr.allow_qos, ["ALL"]) + + @allowed_qos.setter + def allowed_qos(self, val): + cstr.from_list(&self.ptr.allow_qos, val) + + @property + def alternate(self): + return cstr.to_unicode(self.ptr.alternate) + + @alternate.setter + def alternate(self, val): + cstr.fmalloc(&self.ptr.alternate, val) + + @property + def select_type_parameters(self): + return _select_type_int_to_list(self.ptr.cr_type) + + @property + def cpu_binding(self): + cdef char cpu_bind[128] + slurm_sprint_cpu_bind_type(cpu_bind, + self.ptr.cpu_bind) + if cpu_bind == "(null type)": + return None + + return cstr.to_unicode(cpu_bind) + + @cpu_binding.setter + def cpu_binding(self, val): + self.ptr.cpu_bind = cpubind_to_num(val) + + @property + def default_memory_per_cpu(self): + return _get_memory(self.ptr.def_mem_per_cpu, per_cpu=True) + + @default_memory_per_cpu.setter + def default_memory_per_cpu(self, val): + self.ptr.def_mem_per_cpu = u64(dehumanize(val)) + self.ptr.def_mem_per_cpu |= slurm.MEM_PER_CPU + + @property + def default_memory_per_node(self): + return _get_memory(self.ptr.def_mem_per_cpu, per_cpu=False) + + @default_memory_per_node.setter + def default_memory_per_node(self, val): + self.ptr.def_mem_per_cpu = u64(dehumanize(val)) + + @property + def max_memory_per_cpu(self): + return _get_memory(self.ptr.max_mem_per_cpu, per_cpu=True) + + @max_memory_per_cpu.setter + def max_memory_per_cpu(self, val): + self.ptr.max_mem_per_cpu = u64(dehumanize(val)) + self.ptr.max_mem_per_cpu |= slurm.MEM_PER_CPU + + @property + def max_memory_per_node(self): + return _get_memory(self.ptr.max_mem_per_cpu, per_cpu=False) + + @max_memory_per_node.setter + def max_memory_per_node(self, val): + self.ptr.max_mem_per_cpu = u64(dehumanize(val)) + + @property + def default_time(self): + return _raw_time(self.ptr.default_time, on_inf=UNLIMITED) + + @default_time.setter + def default_time(self, val): + self.ptr.default_time = timestr_to_mins(val) + + @property + def denied_qos(self): + return cstr.to_list(self.ptr.deny_qos, ["ALL"]) + + @denied_qos.setter + def denied_qos(self, val): + cstr.from_list(&self.ptr.deny_qos, val) + + @property + def denied_accounts(self): + return cstr.to_list(self.ptr.deny_accounts, ["ALL"]) + + @denied_accounts.setter + def denied_accounts(self, val): + cstr.from_list(&self.ptr.deny_accounts, val) + + @property + def preemption_grace_time(self): + return _raw_time(self.ptr.grace_time) + + @preemption_grace_time.setter + def preemption_grace_time(self, val): + self.ptr.grace_time = timestr_to_secs(val) + + @property + def default_cpus_per_gpu(self): + def_dict = cstr.to_dict(self.ptr.job_defaults_str) + if def_dict and "DefCpuPerGpu" in def_dict: + return int(def_dict["DefCpuPerGpu"]) + + return _extract_job_default_item(slurm.JOB_DEF_CPU_PER_GPU, + self.ptr.job_defaults_list) + + @default_cpus_per_gpu.setter + def default_cpus_per_gpu(self, val): + _concat_job_default_str("DefCpuPerGpu", val, + &self.ptr.job_defaults_str) + + @property + def default_memory_per_gpu(self): + def_dict = cstr.to_dict(self.ptr.job_defaults_str) + if def_dict and "DefMemPerGpu" in def_dict: + return int(def_dict["DefMemPerGpu"]) + + return _extract_job_default_item(slurm.JOB_DEF_MEM_PER_GPU, + self.ptr.job_defaults_list) + + @default_memory_per_gpu.setter + def default_memory_per_gpu(self, val): + _concat_job_default_str("DefMemPerGpu", val, + &self.ptr.job_defaults_str) + + @property + def max_cpus_per_node(self): + return u32_parse(self.ptr.max_cpus_per_node) + + @max_cpus_per_node.setter + def max_cpus_per_node(self, val): + self.ptr.max_cpus_per_node = u32(val) + + @property + def max_cpus_per_socket(self): + return u32_parse(self.ptr.max_cpus_per_socket) + + @max_cpus_per_socket.setter + def max_cpus_per_socket(self, val): + self.ptr.max_cpus_per_socket = u32(val) + + @property + def max_nodes(self): + return u32_parse(self.ptr.max_nodes) + + @max_nodes.setter + def max_nodes(self, val): + self.ptr.max_nodes = u32(val) + + @property + def min_nodes(self): + return u32_parse(self.ptr.min_nodes, zero_is_noval=False) + + @min_nodes.setter + def min_nodes(self, val): + self.ptr.min_nodes = u32(val, zero_is_noval=False) + + @property + def max_time(self): + return _raw_time(self.ptr.max_time, on_inf=UNLIMITED) + + @max_time.setter + def max_time(self, val): + self.ptr.max_time = timestr_to_mins(val) + + @property + def oversubscribe(self): + return _oversubscribe_int_to_str(self.ptr.max_share) + + @oversubscribe.setter + def oversubscribe(self, val): + self.ptr.max_share = _oversubscribe_str_to_int(val) + + @property + def nodes(self): + return cstr.to_unicode(self.ptr.nodes) + + @nodes.setter + def nodes(self, val): + cstr.from_list(&self.ptr.nodes, val) + + @property + def nodesets(self): + return cstr.to_list(self.ptr.nodesets) + + @nodesets.setter + def nodesets(self, val): + cstr.from_list(&self.ptr.nodesets, val) + + @property + def over_time_limit(self): + return u16_parse(self.ptr.over_time_limit) + + @over_time_limit.setter + def over_time_limit(self, val): + self.ptr.over_time_limit = u16(self.ptr.over_time_limit) + + @property + def preempt_mode(self): + return _preempt_mode_int_to_str(self.ptr.preempt_mode, self.slurm_conf) + + @preempt_mode.setter + def preempt_mode(self, val): + self.ptr.preempt_mode = _preempt_mode_str_to_int(val) + + @property + def priority_job_factor(self): + return u16_parse(self.ptr.priority_job_factor) + + @priority_job_factor.setter + def priority_job_factor(self, val): + self.ptr.priority_job_factor = u16(val) + + @property + def priority_tier(self): + return u16_parse(self.ptr.priority_tier) + + @priority_tier.setter + def priority_tier(self, val): + self.ptr.priority_tier = u16(val) + + @property + def qos(self): + return cstr.to_unicode(self.ptr.qos_char) + + @qos.setter + def qos(self, val): + cstr.fmalloc(&self.ptr.qos_char, val) + + @property + def total_cpus(self): + return u32_parse(self.ptr.total_cpus, on_noval=0) + + @property + def total_nodes(self): + return u32_parse(self.ptr.total_nodes, on_noval=0) + + @property + def state(self): + return _partition_state_int_to_str(self.ptr.state_up) + + @state.setter + def state(self, val): + self.ptr.state_up = _partition_state_str_to_int(val) + + @property + def is_default(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_DEFAULT) + + @is_default.setter + def is_default(self, val): + u16_set_bool_flag(&self.ptr.flags, val, + slurm.PART_FLAG_DEFAULT, slurm.PART_FLAG_DEFAULT_CLR) + + @property + def allow_root_jobs(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_NO_ROOT) + + @allow_root_jobs.setter + def allow_root_jobs(self, val): + u16_set_bool_flag(&self.ptr.flags, val, slurm.PART_FLAG_NO_ROOT, + slurm.PART_FLAG_NO_ROOT_CLR) + + @property + def is_user_exclusive(self): + return u16_parse_bool_flag(self.ptr.flags, + slurm.PART_FLAG_EXCLUSIVE_USER) + + @is_user_exclusive.setter + def is_user_exclusive(self, val): + u16_set_bool_flag(&self.ptr.flags, val, slurm.PART_FLAG_EXCLUSIVE_USER, + slurm.PART_FLAG_EXC_USER_CLR) + + @property + def is_hidden(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_HIDDEN) + + @is_hidden.setter + def is_hidden(self, val): + u16_set_bool_flag(&self.ptr.flags, val, + slurm.PART_FLAG_HIDDEN, slurm.PART_FLAG_HIDDEN_CLR) + + @property + def least_loaded_nodes_scheduling(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_LLN) + + @least_loaded_nodes_scheduling.setter + def least_loaded_nodes_scheduling(self, val): + u16_set_bool_flag(&self.ptr.flags, val, slurm.PART_FLAG_LLN, + slurm.PART_FLAG_LLN_CLR) + + @property + def is_root_only(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_ROOT_ONLY) + + @is_root_only.setter + def is_root_only(self, val): + u16_set_bool_flag(&self.ptr.flags, val, slurm.PART_FLAG_ROOT_ONLY, + slurm.PART_FLAG_ROOT_ONLY_CLR) + + @property + def requires_reservation(self): + return u16_parse_bool_flag(self.ptr.flags, slurm.PART_FLAG_REQ_RESV) + + @requires_reservation.setter + def requires_reservation(self, val): + u16_set_bool_flag(&self.ptr.flags, val, slurm.PART_FLAG_REQ_RESV, + slurm.PART_FLAG_REQ_RESV_CLR) + + # TODO: tres_fmt_str + + +def _partition_state_int_to_str(state): + if state == slurm.PARTITION_UP: + return "UP" + elif state == slurm.PARTITION_DOWN: + return "DOWN" + elif state == slurm.PARTITION_INACTIVE: + return "INACTIVE" + elif state == slurm.PARTITION_DRAIN: + return "DRAIN" + else: + return "UNKNOWN" + + +def _partition_state_str_to_int(state): + state = state.upper() + + if state == "UP": + return slurm.PARTITION_UP + elif state == "DOWN": + return slurm.PARTITION_DOWN + elif state == "INACTIVE": + return slurm.PARTITION_INACTIVE + elif state == "DRAIN": + return slurm.PARTITION_DRAIN + else: + choices = "UP, DOWN, INACTIVE, DRAIN" + raise ValueError(f"Invalid partition state: {state}, valid choices " + f"are {choices}") + + +def _oversubscribe_int_to_str(shared): + if shared == slurm.NO_VAL16: + return None + + is_forced = shared & slurm.SHARED_FORCE + max_jobs = shared & (~slurm.SHARED_FORCE) + + if not max_jobs: + return "EXCLUSIVE" + elif is_forced: + return f"FORCE:{max_jobs}" + elif max_jobs == 1: + return "NO" + else: + return f"YES:{max_jobs}" + + +def _oversubscribe_str_to_int(typ): + typ = typ.upper() + + if typ == "NO": + return 1 + elif typ == "EXCLUSIVE": + return 0 + elif "YES" in typ: + return _split_oversubscribe_str(typ) + elif "FORCE" in typ: + return _split_oversubscribe_str(typ) | slurm.SHARED_FORCE + else: + return slurm.NO_VAL16 + + +def _split_oversubscribe_str(val): + max_jobs = val.split(":", 1) + if len(max_jobs) == 2: + return int(max_jobs[1]) + else: + return 4 + + +def _select_type_int_to_list(stype): + # The rest of the CR_* stuff are just some extra parameters to the select + # plugin + out = _select_type_int_to_cons_res(stype) + + if stype & slurm.CR_OTHER_CONS_RES: + out.append("OTHER_CONS_RES") + + if stype & slurm.CR_ONE_TASK_PER_CORE: + out.append("ONE_TASK_PER_CORE") + + if stype & slurm.CR_PACK_NODES: + out.append("PACK_NODES") + + if stype & slurm.CR_OTHER_CONS_TRES: + out.append("OTHER_CONS_TRES") + + if stype & slurm.CR_CORE_DEFAULT_DIST_BLOCK: + out.append("CORE_DEFAULT_DIST_BLOCK") + + if stype & slurm.CR_LLN: + out.append("LLN") + + return out + + +def _select_type_int_to_cons_res(stype): + # https://github.com/SchedMD/slurm/blob/257ca5e4756a493dc4c793ded3ac3c1a769b3c83/slurm/slurm.h#L996 + # The 3 main select types are mutually exclusive, and may be combined with + # CR_MEMORY + # CR_BOARD exists but doesn't show up in the documentation, so ignore it. + if stype & slurm.CR_CPU and stype & slurm.CR_MEMORY: + return "CPU_MEMORY" + elif stype & slurm.CR_CORE and stype & slurm.CR_MEMORY: + return "CORE_MEMORY" + elif stype & slurm.CR_SOCKET and stype & slurm.CR_MEMORY: + return "SOCKET_MEMORY" + elif stype & slurm.CR_CPU: + return "CPU" + elif stype & slurm.CR_CORE: + return "CORE" + elif stype & slurm.CR_SOCKET: + return "SOCKET" + elif stype & slurm.CR_MEMORY: + return "MEMORY" + else: + return [] + + +def _preempt_mode_str_to_int(mode): + if not mode: + return slurm.NO_VAL16 + + pmode = slurm_preempt_mode_num(str(mode)) + if pmode == slurm.NO_VAL16: + raise ValueError(f"Invalid Preempt mode: {mode}") + + return pmode + + +def _preempt_mode_int_to_str(mode, slurmctld.Config slurm_conf): + if mode == slurm.NO_VAL16: + return slurm_conf.preempt_mode if slurm_conf else None + else: + return cstr.to_unicode(slurm_preempt_mode_string(mode)) + + +cdef _extract_job_default_item(typ, slurm.List job_defaults_list): + cdef: + job_defaults_t *default_item + SlurmList job_def_list + SlurmListItem job_def_item + + job_def_list = SlurmList.wrap(job_defaults_list, owned=False) + for job_def_item in job_def_list: + default_item = job_def_item.data + if default_item.type == typ: + return default_item.value + + return None + + +cdef _concat_job_default_str(typ, val, char **job_defaults_str): + cdef uint64_t _val = u64(dehumanize(val)) + + current = cstr.to_dict(job_defaults_str[0]) + if _val == slurm.NO_VAL64: + current.pop(typ, None) + else: + current.update({typ : _val}) + + cstr.from_dict(job_defaults_str, current) + + +def _get_memory(value, per_cpu): + if value != slurm.NO_VAL64: + if value & slurm.MEM_PER_CPU and per_cpu: + if value == slurm.MEM_PER_CPU: + return UNLIMITED + return u64_parse(value & (~slurm.MEM_PER_CPU)) + + # For these values, Slurm interprets 0 as being equal to + # INFINITE/UNLIMITED + elif value == 0 and not per_cpu: + return UNLIMITED + + elif not value & slurm.MEM_PER_CPU and not per_cpu: + return u64_parse(value) + + return None diff --git a/pyslurm/core/slurmctld.pxd b/pyslurm/core/slurmctld.pxd new file mode 100644 index 00000000..8bafb01f --- /dev/null +++ b/pyslurm/core/slurmctld.pxd @@ -0,0 +1,39 @@ +######################################################################### +# slurmctld.pxd - pyslurm slurmctld api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurm_conf_t, + slurm_load_ctl_conf, + slurm_free_ctl_conf, + slurm_preempt_mode_string, + try_xmalloc, +) +from pyslurm.utils cimport cstr +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, int64_t +from pyslurm.utils.uint cimport * + + +cdef class Config: + cdef slurm_conf_t *ptr diff --git a/pyslurm/core/slurmctld.pyx b/pyslurm/core/slurmctld.pyx new file mode 100644 index 00000000..7f06966e --- /dev/null +++ b/pyslurm/core/slurmctld.pyx @@ -0,0 +1,62 @@ +######################################################################### +# slurmctld.pyx - pyslurm slurmctld api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import verify_rpc, RPCError + + +cdef class Config: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, job_id): + raise RuntimeError("Cannot instantiate class directly") + + def __dealloc__(self): + slurm_free_ctl_conf(self.ptr) + self.ptr = NULL + + @staticmethod + def load(): + cdef Config conf = Config.__new__(Config) + verify_rpc(slurm_load_ctl_conf(0, &conf.ptr)) + return conf + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster_name) + + @property + def preempt_mode(self): + cdef char *tmp = slurm_preempt_mode_string(self.ptr.preempt_mode) + return cstr.to_unicode(tmp) + + @property + def suspend_program(self): + return cstr.to_unicode(self.ptr.suspend_program) + + @property + def resume_program(self): + return cstr.to_unicode(self.ptr.resume_program) + diff --git a/pyslurm/db/__init__.pxd b/pyslurm/db/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/db/__init__.py b/pyslurm/db/__init__.py new file mode 100644 index 00000000..acd36a40 --- /dev/null +++ b/pyslurm/db/__init__.py @@ -0,0 +1,44 @@ +######################################################################### +# db/__init__.py - pyslurm database api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from .connection import Connection +from .step import JobStep, JobSteps +from .stats import JobStatistics +from .job import ( + Job, + Jobs, + JobFilter, + JobSearchFilter, +) +from .tres import ( + TrackableResource, + TrackableResources, +) +from .qos import ( + QualitiesOfService, + QualityOfService, + QualityOfServiceFilter, +) +from .assoc import ( + Associations, + Association, + AssociationFilter, +) diff --git a/pyslurm/db/assoc.pxd b/pyslurm/db/assoc.pxd new file mode 100644 index 00000000..384dbb0a --- /dev/null +++ b/pyslurm/db/assoc.pxd @@ -0,0 +1,88 @@ +######################################################################### +# assoc.pxd - pyslurm slurmdbd association api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_assoc_rec_t, + slurmdb_assoc_cond_t, + slurmdb_associations_get, + slurmdb_destroy_assoc_rec, + slurmdb_destroy_assoc_cond, + slurmdb_init_assoc_rec, + slurmdb_associations_modify, + try_xmalloc, +) +from pyslurm.db.util cimport ( + SlurmList, + SlurmListItem, + make_char_list, + slurm_list_to_pylist, + qos_list_to_pylist, +) +from pyslurm.db.tres cimport ( + _set_tres_limits, + TrackableResources, + TrackableResourceLimits, +) +from pyslurm.db.connection cimport Connection +from pyslurm.utils cimport cstr +from pyslurm.utils.uint cimport * +from pyslurm.db.qos cimport QualitiesOfService, _set_qos_list +from pyslurm.xcollections cimport MultiClusterMap + +cdef _parse_assoc_ptr(Association ass) +cdef _create_assoc_ptr(Association ass, conn=*) + + +cdef class Associations(MultiClusterMap): + pass + + +cdef class AssociationFilter: + cdef slurmdb_assoc_cond_t *ptr + + cdef public: + users + ids + + +cdef class Association: + cdef: + slurmdb_assoc_rec_t *ptr + QualitiesOfService qos_data + TrackableResources tres_data + + cdef public: + group_tres + group_tres_mins + group_tres_run_mins + max_tres_mins_per_job + max_tres_run_mins_per_user + max_tres_per_job + max_tres_per_node + qos + + @staticmethod + cdef Association from_ptr(slurmdb_assoc_rec_t *in_ptr) + diff --git a/pyslurm/db/assoc.pyx b/pyslurm/db/assoc.pyx new file mode 100644 index 00000000..93617669 --- /dev/null +++ b/pyslurm/db/assoc.pyx @@ -0,0 +1,437 @@ +######################################################################### +# assoc.pyx - pyslurm slurmdbd association api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import RPCError +from pyslurm.utils.helpers import ( + instance_to_dict, + user_to_uid, +) +from pyslurm.utils.uint import * +from pyslurm.db.connection import _open_conn_or_error +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections + + +cdef class Associations(MultiClusterMap): + + def __init__(self, assocs=None): + super().__init__(data=assocs, + typ="Associations", + val_type=Association, + id_attr=Association.id, + key_type=int) + + @staticmethod + def load(AssociationFilter db_filter=None, Connection db_connection=None): + cdef: + Associations out = Associations() + Association assoc + AssociationFilter cond = db_filter + SlurmList assoc_data + SlurmListItem assoc_ptr + Connection conn + QualitiesOfService qos_data + TrackableResources tres_data + + # Prepare SQL Filter + if not db_filter: + cond = AssociationFilter() + cond._create() + + # Setup DB Conn + conn = _open_conn_or_error(db_connection) + + # Fetch Assoc Data + assoc_data = SlurmList.wrap(slurmdb_associations_get( + conn.ptr, cond.ptr)) + + if assoc_data.is_null: + raise RPCError(msg="Failed to get Association data from slurmdbd") + + # Fetch other necessary dependencies needed for translating some + # attributes (i.e QoS IDs to its name) + qos_data = QualitiesOfService.load(db_connection=conn, + name_is_key=False) + tres_data = TrackableResources.load(db_connection=conn, + name_is_key=False) + + # Setup Association objects + for assoc_ptr in SlurmList.iter_and_pop(assoc_data): + assoc = Association.from_ptr(assoc_ptr.data) + assoc.qos_data = qos_data + assoc.tres_data = tres_data + _parse_assoc_ptr(assoc) + + cluster = assoc.cluster + if cluster not in out.data: + out.data[cluster] = {} + out.data[cluster][assoc.id] = assoc + + return out + + @staticmethod + def modify(db_filter, Association changes, Connection db_connection=None): + cdef: + AssociationFilter afilter + Connection conn + SlurmList response + SlurmListItem response_ptr + list out = [] + + # Prepare SQL Filter + if isinstance(db_filter, Associations): + assoc_ids = [ass.id for ass in db_filter] + afilter = AssociationFilter(ids=assoc_ids) + else: + afilter = db_filter + afilter._create() + + # Setup DB conn + conn = _open_conn_or_error(db_connection) + + # Any data that isn't parsed yet or needs validation is done in this + # function. + _create_assoc_ptr(changes, conn) + + # Modify associations, get the result + # This returns a List of char* with the associations that were + # modified + response = SlurmList.wrap(slurmdb_associations_modify( + conn.ptr, afilter.ptr, changes.ptr)) + + if not response.is_null and response.cnt: + for response_ptr in response: + response_str = cstr.to_unicode(response_ptr.data) + if not response_str: + continue + + # TODO: Better format + out.append(response_str) + + elif not response.is_null: + # There was no real error, but simply nothing has been modified + raise RPCError(msg="Nothing was modified") + else: + # Autodetects the last slurm error + raise RPCError() + + if not db_connection: + # Autocommit if no connection was explicitly specified. + conn.commit() + + return out + + +cdef class AssociationFilter: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_assoc_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_assoc_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_assoc_cond_t") + + def _parse_users(self): + if not self.users: + return None + return list({user_to_uid(user) for user in self.users}) + + def _create(self): + self._alloc() + cdef slurmdb_assoc_cond_t *ptr = self.ptr + + make_char_list(&ptr.user_list, self.users) + + +cdef class Association: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + self._alloc_impl() + self.id = 0 + self.cluster = LOCAL_CLUSTER + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): + slurmdb_destroy_assoc_rec(self.ptr) + self.ptr = NULL + + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(slurmdb_assoc_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_assoc_rec_t") + + slurmdb_init_assoc_rec(self.ptr, 0) + + def __repr__(self): + return f'pyslurm.db.{self.__class__.__name__}({self.id})' + + @staticmethod + cdef Association from_ptr(slurmdb_assoc_rec_t *in_ptr): + cdef Association wrap = Association.__new__(Association) + wrap.ptr = in_ptr + return wrap + + def to_dict(self): + """Database Association information formatted as a dictionary. + + Returns: + (dict): Database Association information as dict + """ + return instance_to_dict(self) + + def __eq__(self, other): + if isinstance(other, Association): + return self.id == other.id and self.cluster == other.cluster + return NotImplemented + + @property + def account(self): + return cstr.to_unicode(self.ptr.acct) + + @account.setter + def account(self, val): + cstr.fmalloc(&self.ptr.acct, val) + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster) + + @cluster.setter + def cluster(self, val): + cstr.fmalloc(&self.ptr.cluster, val) + + @property + def comment(self): + return cstr.to_unicode(self.ptr.comment) + + @comment.setter + def comment(self, val): + cstr.fmalloc(&self.ptr.comment, val) + + # uint32_t def_qos_id + + # uint16_t flags (ASSOC_FLAG_*) + + @property + def group_jobs(self): + return u32_parse(self.ptr.grp_jobs, zero_is_noval=False) + + @group_jobs.setter + def group_jobs(self, val): + self.ptr.grp_jobs = u32(val, zero_is_noval=False) + + @property + def group_jobs_accrue(self): + return u32_parse(self.ptr.grp_jobs_accrue, zero_is_noval=False) + + @group_jobs_accrue.setter + def group_jobs_accrue(self, val): + self.ptr.grp_jobs_accrue = u32(val, zero_is_noval=False) + + @property + def group_submit_jobs(self): + return u32_parse(self.ptr.grp_submit_jobs, zero_is_noval=False) + + @group_submit_jobs.setter + def group_submit_jobs(self, val): + self.ptr.grp_submit_jobs = u32(val, zero_is_noval=False) + + @property + def group_wall_time(self): + return u32_parse(self.ptr.grp_wall, zero_is_noval=False) + + @group_wall_time.setter + def group_wall_time(self, val): + self.ptr.grp_wall = u32(val, zero_is_noval=False) + + @property + def id(self): + return u32_parse(self.ptr.id) + + @id.setter + def id(self, val): + self.ptr.id = val + + @property + def is_default(self): + return u16_parse_bool(self.ptr.is_def) + + @property + def lft(self): + return u32_parse(self.ptr.lft) + + @property + def max_jobs(self): + return u32_parse(self.ptr.max_jobs, zero_is_noval=False) + + @max_jobs.setter + def max_jobs(self, val): + self.ptr.max_jobs = u32(val, zero_is_noval=False) + + @property + def max_jobs_accrue(self): + return u32_parse(self.ptr.max_jobs_accrue, zero_is_noval=False) + + @max_jobs_accrue.setter + def max_jobs_accrue(self, val): + self.ptr.max_jobs_accrue = u32(val, zero_is_noval=False) + + @property + def max_submit_jobs(self): + return u32_parse(self.ptr.max_submit_jobs, zero_is_noval=False) + + @max_submit_jobs.setter + def max_submit_jobs(self, val): + self.ptr.max_submit_jobs = u32(val, zero_is_noval=False) + + @property + def max_wall_time_per_job(self): + return u32_parse(self.ptr.max_wall_pj, zero_is_noval=False) + + @max_wall_time_per_job.setter + def max_wall_time_per_job(self, val): + self.ptr.max_wall_pj = u32(val, zero_is_noval=False) + + @property + def min_priority_threshold(self): + return u32_parse(self.ptr.min_prio_thresh, zero_is_noval=False) + + @min_priority_threshold.setter + def min_priority_threshold(self, val): + self.ptr.min_prio_thresh = u32(val, zero_is_noval=False) + + @property + def parent_account(self): + return cstr.to_unicode(self.ptr.parent_acct) + + @property + def parent_account_id(self): + return u32_parse(self.ptr.parent_id, zero_is_noval=False) + + @property + def partition(self): + return cstr.to_unicode(self.ptr.partition) + + @partition.setter + def partition(self, val): + cstr.fmalloc(&self.ptr.partition, val) + + @property + def priority(self): + return u32_parse(self.ptr.priority, zero_is_noval=False) + + @priority.setter + def priority(self, val): + self.ptr.priority = u32(val) + + @property + def rgt(self): + return u32_parse(self.ptr.rgt) + + @property + def shares(self): + return u32_parse(self.ptr.shares_raw, zero_is_noval=False) + + @shares.setter + def shares(self, val): + self.ptr.shares_raw = u32(val) + + @property + def user(self): + return cstr.to_unicode(self.ptr.user) + + @user.setter + def user(self, val): + cstr.fmalloc(&self.ptr.user, val) + + +cdef _parse_assoc_ptr(Association ass): + cdef: + TrackableResources tres = ass.tres_data + QualitiesOfService qos = ass.qos_data + + ass.group_tres = TrackableResourceLimits.from_ids( + ass.ptr.grp_tres, tres) + ass.group_tres_mins = TrackableResourceLimits.from_ids( + ass.ptr.grp_tres_mins, tres) + ass.group_tres_run_mins = TrackableResourceLimits.from_ids( + ass.ptr.grp_tres_mins, tres) + ass.max_tres_mins_per_job = TrackableResourceLimits.from_ids( + ass.ptr.max_tres_mins_pj, tres) + ass.max_tres_run_mins_per_user = TrackableResourceLimits.from_ids( + ass.ptr.max_tres_run_mins, tres) + ass.max_tres_per_job = TrackableResourceLimits.from_ids( + ass.ptr.max_tres_pj, tres) + ass.max_tres_per_node = TrackableResourceLimits.from_ids( + ass.ptr.max_tres_pn, tres) + ass.qos = qos_list_to_pylist(ass.ptr.qos_list, qos) + + +cdef _create_assoc_ptr(Association ass, conn=None): + # _set_tres_limits will also check if specified TRES are valid and + # translate them to its ID which is why we need to load the current TRES + # available in the system. + ass.tres_data = TrackableResources.load(db_connection=conn) + _set_tres_limits(&ass.ptr.grp_tres, ass.group_tres, ass.tres_data) + _set_tres_limits(&ass.ptr.grp_tres_mins, ass.group_tres_mins, + ass.tres_data) + _set_tres_limits(&ass.ptr.grp_tres_run_mins, ass.group_tres_run_mins, + ass.tres_data) + _set_tres_limits(&ass.ptr.max_tres_mins_pj, ass.max_tres_mins_per_job, + ass.tres_data) + _set_tres_limits(&ass.ptr.max_tres_run_mins, ass.max_tres_run_mins_per_user, + ass.tres_data) + _set_tres_limits(&ass.ptr.max_tres_pj, ass.max_tres_per_job, + ass.tres_data) + _set_tres_limits(&ass.ptr.max_tres_pn, ass.max_tres_per_node, + ass.tres_data) + + # _set_qos_list will also check if specified QoS are valid and translate + # them to its ID, which is why we need to load the current QOS available + # in the system. + ass.qos_data = QualitiesOfService.load(db_connection=conn) + _set_qos_list(&ass.ptr.qos_list, self.qos, ass.qos_data) + diff --git a/pyslurm/db/connection.pxd b/pyslurm/db/connection.pxd new file mode 100644 index 00000000..6ac2dfc6 --- /dev/null +++ b/pyslurm/db/connection.pxd @@ -0,0 +1,43 @@ +######################################################################### +# connection.pxd - pyslurm slurmdbd database connection +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from libc.stdint cimport uint16_t +from pyslurm.slurm cimport ( + slurmdb_connection_get, + slurmdb_connection_close, + slurmdb_connection_commit, +) + + +cdef class Connection: + """A connection to the slurmdbd. + + Attributes: + is_open (bool): + Whether the connection is open or closed. + """ + cdef: + void *ptr + uint16_t flags diff --git a/pyslurm/db/connection.pyx b/pyslurm/db/connection.pyx new file mode 100644 index 00000000..935f921a --- /dev/null +++ b/pyslurm/db/connection.pyx @@ -0,0 +1,108 @@ +######################################################################### +# connection.pyx - pyslurm slurmdbd database connection +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import RPCError + + +def _open_conn_or_error(conn): + if not conn: + conn = Connection.open() + + if not conn.is_open: + raise ValueError("Database connection is not open") + + return conn + + +cdef class Connection: + + def __cinit__(self): + self.ptr = NULL + self.flags = 0 + + def __init__(self): + raise RuntimeError("A new connection should be created through " + "calling Connection.open()") + + def __dealloc__(self): + self.close() + + def __repr__(self): + state = "open" if self.is_open else "closed" + return f'pyslurm.db.{self.__class__.__name__} is {state}' + + @staticmethod + def open(): + """Open a new connection to the slurmdbd + + Raises: + RPCError: When opening the connection fails + + Returns: + (pyslurm.db.Connection): Connection to slurmdbd + + Examples: + >>> import pyslurm + >>> connection = pyslurm.db.Connection.open() + >>> print(connection.is_open) + True + """ + cdef Connection conn = Connection.__new__(Connection) + conn.ptr = slurmdb_connection_get(&conn.flags) + if not conn.ptr: + raise RPCError(msg="Failed to open onnection to slurmdbd") + + return conn + + def close(self): + """Close the current connection. + + Examples: + >>> import pyslurm + >>> connection = pyslurm.db.Connection.open() + >>> ... + >>> connection.close() + >>> print(connection.is_open) + False + """ + if self.is_open: + slurmdb_connection_close(&self.ptr) + self.ptr = NULL + + def commit(self): + """Commit recent changes.""" + if slurmdb_connection_commit(self.ptr, 1) == slurm.SLURM_ERROR: + raise RPCError("Failed to commit database changes.") + + def rollback(self): + """Rollback recent changes.""" + if slurmdb_connection_commit(self.ptr, 0) == slurm.SLURM_ERROR: + raise RPCError("Failed to rollback database changes.") + + @property + def is_open(self): + if self.ptr: + return True + else: + return False diff --git a/pyslurm/db/job.pxd b/pyslurm/db/job.pxd new file mode 100644 index 00000000..a06791bf --- /dev/null +++ b/pyslurm/db/job.pxd @@ -0,0 +1,352 @@ +######################################################################### +# job.pxd - pyslurm slurmdbd job api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_job_rec_t, + slurmdb_job_cond_t, + slurmdb_step_rec_t, + slurmdb_jobs_get, + slurmdb_destroy_job_cond, + slurmdb_destroy_job_rec, + slurmdb_destroy_step_rec, + slurm_destroy_selected_step, + slurm_selected_step_t, + slurm_list_create, + slurm_list_append, + try_xmalloc, + slurmdb_job_cond_def_start_end, + slurm_job_state_string, + slurm_job_reason_string, + slurmdb_create_job_rec, + slurmdb_job_modify, +) +from pyslurm.db.util cimport ( + SlurmList, + SlurmListItem, + make_char_list, +) +from pyslurm.db.step cimport JobStep, JobSteps +from pyslurm.db.stats cimport JobStatistics +from pyslurm.db.connection cimport Connection +from pyslurm.utils cimport cstr +from pyslurm.db.qos cimport QualitiesOfService +from pyslurm.db.tres cimport TrackableResources, TrackableResource +from pyslurm.xcollections cimport MultiClusterMap + + +cdef class JobFilter: + """Query-Conditions for Jobs in the Slurm Database. + + Args: + **kwargs (Any, optional=None): + Any valid attribute of the object. + + Attributes: + ids (list[int]): + A list of Job ids to search for. + start_time (Union[str, int, datetime.datetime]): + Search for Jobs which started after this time. + end_time (Union[str, int, datetime.datetime]): + Search for Jobs which ended before this time. + accounts (list[str]): + Search for Jobs with these account names. + association_ids (list[int]): + Search for Jobs with these association ids. + clusters (list[str]): + Search for Jobs running in these clusters. + constraints (list[str]): + Search for Jobs with these constraints. + cpus (int): + Search for Jobs with exactly this many CPUs. + Note: If you also specify `max_cpus`, then this value will act as + the minimum. + max_cpus (int): + Search for Jobs with no more than this amount of CPUs. + Note: This value has no effect without also setting `cpus`. + nodes (int): + Search for Jobs with exactly this many nodes. + Note: If you also specify `max_nodes`, then this value will act as + the minimum. + max_nodes (int): + Search for Jobs with no more than this amount of nodes. + Note: This value has no effect without also setting `nodes`. + qos (list[str]): + Search for Jobs with these Qualities of Service. + names (list[str]): + Search for Jobs with these job names. + partitions (list[str]): + Search for Jobs with these partition names. + groups (list[str]): + Search for Jobs with these group names. Alternatively, you can + also specify the GIDs directly. + timelimit (Union[str, int]): + Search for Jobs with exactly this timelimit. + Note: If you also specify `max_timelimit`, then this value will act + as the minimum. + max_timelimit (Union[str, int]): + Search for Jobs which run no longer than this timelimit + Note: This value has no effect without also setting `timelimit` + users (list[str]): + Search for Jobs with these user names. Alternatively, you can also + specify the UIDs directly. + wckeys (list[str]): + Search for Jobs with these WCKeys + nodelist (list[str]): + Search for Jobs that ran on any of these Nodes + with_script (bool): + Instruct the slurmdbd to also send the job script(s) + Note: This requires specifying explictiy job ids, and is mutually + exclusive with `with_env` + with_env (bool): + Instruct the slurmdbd to also send the job environment(s) + Note: This requires specifying explictiy job ids, and is mutually + exclusive with `with_script` + truncate_time (bool): + Truncate start and end time. + For example, when a Job has actually started before the requested + `start_time`, the time will be truncated to `start_time`. Same + logic applies for `end_time`. This is like the `-T` / `--truncate` + option from `sacct`. + """ + cdef slurmdb_job_cond_t *ptr + + cdef public: + ids + start_time + end_time + accounts + association_ids + clusters + constraints + cpus + max_cpus + nodes + max_nodes + qos + names + partitions + groups + timelimit + max_timelimit + users + wckeys + nodelist + with_script + with_env + truncate_time + + +cdef class Jobs(MultiClusterMap): + """A [`Multi Cluster`][pyslurm.xcollections.MultiClusterMap] collection of [pyslurm.db.Job][] objects. + + Args: + jobs (Union[list[int], dict[int, pyslurm.db.Job], str], optional=None): + Jobs to initialize this collection with. + + Attributes: + consumed_energy (int): + Total amount of energy consumed, in joules. + disk_read (int): + Total amount of bytes read. + disk_write (int): + Total amount of bytes written. + page_faults (int): + Total amount of page faults. + resident_memory (int): + Total Resident Set Size (RSS) used in bytes. + virtual_memory (int): + Total Virtual Memory Size (VSZ) used in bytes. + elapsed_cpu_time (int): + Total amount of time used (Elapsed time * cpu count) in seconds. + This is not the real CPU-Efficiency, but rather the total amount + of cpu-time the CPUs were occupied for. + total_cpu_time (int): + Sum of `user_cpu_time` and `system_cpu_time`, in seconds + user_cpu_time (int): + Total amount of Time spent in user space, in seconds + system_cpu_time (int): + Total amount of Time spent in kernel space, in seconds + cpus (int): + Total amount of cpus. + nodes (int): + Total amount of nodes. + memory (int): + Total amount of requested memory in Mebibytes. + """ + cdef public: + consumed_energy + disk_read + disk_write + page_faults + resident_memory + virtual_memory + elapsed_cpu_time + total_cpu_time + user_cpu_time + system_cpu_time + cpus + nodes + memory + + +cdef class Job: + """A Slurm Database Job. + + Args: + job_id (int, optional=0): + An Integer representing a Job-ID. + cluster (str, optional=None): + Name of the Cluster for this Job. Default is the name of the local + Cluster. + + Other Parameters: + admin_comment (str): + Admin comment for the Job. + comment (str): + Comment for the Job + wckey (str): + Name of the WCKey for this Job + derived_exit_code (int): + Highest exit code of all the Job steps + extra (str): + Arbitrary string that can be stored with a Job. + + Attributes: + steps (pyslurm.db.JobSteps): + Steps this Job has + stats (pyslurm.db.JobStatistics): + Utilization statistics of this Job + account (str): + Account of the Job. + admin_comment (str): + Admin comment for the Job. + num_nodes (int): + Amount of nodes this Job has allocated (if it is running) or + requested (if it is still pending). + array_id (int): + The master Array-Job ID. + array_tasks_parallel (int): + Max number of array tasks allowed to run simultaneously. + array_task_id (int): + Array Task ID of this Job if it is an Array-Job. + array_tasks_waiting (str): + Array Tasks that are still waiting. + association_id (int): + ID of the Association this job runs in. + block_id (str): + Name of the block used (for BlueGene Systems) + cluster (str): + Cluster this Job belongs to + constraints (str): + Constraints of the Job + container (str): + Path to OCI Container bundle + db_index (int): + Unique database index of the Job in the job table + derived_exit_code (int): + Highest exit code of all the Job steps + derived_exit_code_signal (int): + Signal of the derived exit code + comment (str): + Comment for the Job + elapsed_time (int): + Amount of seconds elapsed for the Job + eligible_time (int): + When the Job became eligible to run, as a unix timestamp + end_time (int): + When the Job ended, as a unix timestamp + extra (str): + Arbitrary string that can be stored with a Job. + exit_code (int): + Exit code of the job script or salloc. + exit_code_signal (int): + Signal of the exit code for this Job. + failed_node (str): + Name of the failed node that caused the job to get killed. + group_id (int): + ID of the group for this Job + group_name (str): + Name of the group for this Job + id (int): + ID of the Job + name (str): + Name of the Job + mcs_label (str): + MCS Label of the Job + nodelist (str): + Nodes this Job is using + partition (str): + Name of the Partition for this Job + priority (int): + Priority for the Job + qos (str): + Name of the Quality of Service for the Job + cpus (int): + Amount of CPUs the Job has/had allocated, or, if the Job is still + pending, this will reflect the amount requested. + memory (int): + Amount of memory the Job requested in total, in Mebibytes + reservation (str): + Name of the Reservation for this Job + script (str): + The batch script for this Job. + Note: Only available if the "with_script" condition was given + start_time (int): + Time when the Job started, as a unix timestamp + state (str): + State of the Job + state_reason (str): + Last reason a Job was blocked from running + cancelled_by (str): + Name of the User who cancelled this Job + submit_time (int): + Time the Job was submitted, as a unix timestamp + submit_command (str): + Full command issued to submit the Job + suspended_time (int): + Amount of seconds the Job was suspended + system_comment (str): + Arbitrary System comment for the Job + time_limit (int): + Time limit of the Job in minutes + user_id (int): + UID of the User this Job belongs to + user_name (str): + Name of the User this Job belongs to + wckey (str): + Name of the WCKey for this Job + working_directory (str): + Working directory of the Job + """ + cdef: + slurmdb_job_rec_t *ptr + QualitiesOfService qos_data + + cdef public: + JobSteps steps + JobStatistics stats + + @staticmethod + cdef Job from_ptr(slurmdb_job_rec_t *in_ptr) diff --git a/pyslurm/db/job.pyx b/pyslurm/db/job.pyx new file mode 100644 index 00000000..0457e1fa --- /dev/null +++ b/pyslurm/db/job.pyx @@ -0,0 +1,869 @@ +######################################################################### +# job.pyx - pyslurm slurmdbd job api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from typing import Union, Any +from pyslurm.core.error import RPCError, PyslurmError +from pyslurm.core import slurmctld +from typing import Any +from pyslurm.utils.uint import * +from pyslurm.settings import LOCAL_CLUSTER +from pyslurm import xcollections +from pyslurm.db.stats import ( + reset_stats_for_job_collection, + add_stats_to_job_collection, +) +from pyslurm.utils.ctime import ( + date_to_timestamp, + timestr_to_mins, + _raw_time, +) +from pyslurm.utils.helpers import ( + gid_to_name, + group_to_gid, + user_to_uid, + uid_to_name, + nodelist_to_range_str, + instance_to_dict, + _get_exit_code, +) +from pyslurm.db.connection import _open_conn_or_error + + +cdef class JobFilter: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_job_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_job_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_job_cond_t") + + self.ptr.db_flags = slurm.SLURMDB_JOB_FLAG_NOTSET + self.ptr.flags |= slurm.JOBCOND_FLAG_NO_TRUNC + + def _parse_qos(self): + if not self.qos: + return None + + qos_id_list = [] + qos_data = QualitiesOfService.load() + for user_input in self.qos: + found = False + for qos in qos_data.values(): + if (qos.id == user_input + or qos.name == user_input + or qos == user_input): + qos_id_list.append(str(qos.id)) + found = True + break + + if not found: + raise ValueError(f"QoS '{user_input}' does not exist") + + return qos_id_list + + def _parse_groups(self): + if not self.groups: + return None + return list({group_to_gid(group) for group in self.groups}) + + def _parse_users(self): + if not self.users: + return None + return list({user_to_uid(user) for user in self.users}) + + def _parse_clusters(self): + if not self.clusters: + # This is a requirement for some other parameters to function + # correctly, like self.nodelist + return [LOCAL_CLUSTER] + elif self.clusters == "all": + return None + else: + return self.clusters + + def _parse_state(self): + # TODO: implement + return None + + def _create(self): + self._alloc() + cdef: + slurmdb_job_cond_t *ptr = self.ptr + slurm_selected_step_t *selected_step + + ptr.usage_start = date_to_timestamp(self.start_time) + ptr.usage_end = date_to_timestamp(self.end_time) + ptr.cpus_min = u32(self.cpus, on_noval=0) + ptr.cpus_max = u32(self.max_cpus, on_noval=0) + ptr.nodes_min = u32(self.nodes, on_noval=0) + ptr.nodes_max = u32(self.max_nodes, on_noval=0) + ptr.timelimit_min = u32(timestr_to_mins(self.timelimit), on_noval=0) + ptr.timelimit_max = u32(timestr_to_mins(self.max_timelimit), + on_noval=0) + make_char_list(&ptr.acct_list, self.accounts) + make_char_list(&ptr.associd_list, self.association_ids) + make_char_list(&ptr.cluster_list, self._parse_clusters()) + make_char_list(&ptr.constraint_list, self.constraints) + make_char_list(&ptr.jobname_list, self.names) + make_char_list(&ptr.groupid_list, self._parse_groups()) + make_char_list(&ptr.userid_list, self._parse_users()) + make_char_list(&ptr.wckey_list, self.wckeys) + make_char_list(&ptr.partition_list, self.partitions) + make_char_list(&ptr.qos_list, self._parse_qos()) + make_char_list(&ptr.state_list, self._parse_state()) + + if self.nodelist: + cstr.fmalloc(&ptr.used_nodes, + nodelist_to_range_str(self.nodelist)) + + if self.truncate_time: + ptr.flags &= ~slurm.JOBCOND_FLAG_NO_TRUNC + + if self.ids: + # These are only allowed by the slurmdbd when specific jobs are + # requested. + if self.with_script and self.with_env: + raise ValueError("with_script and with_env are mutually " + "exclusive") + + if self.with_script: + ptr.flags |= slurm.JOBCOND_FLAG_SCRIPT + elif self.with_env: + ptr.flags |= slurm.JOBCOND_FLAG_ENV + + ptr.step_list = slurm_list_create(slurm_destroy_selected_step) + already_added = [] + for i in self.ids: + job_id = u32(i) + if job_id in already_added: + continue + + selected_step = NULL + selected_step = try_xmalloc( + sizeof(slurm_selected_step_t)) + if not selected_step: + raise MemoryError("xmalloc failed for slurm_selected_step_t") + + selected_step.array_task_id = slurm.NO_VAL + selected_step.het_job_offset = slurm.NO_VAL + selected_step.step_id.step_id = slurm.NO_VAL + selected_step.step_id.job_id = job_id + slurm_list_append(ptr.step_list, selected_step) + already_added.append(job_id) + + # This must be at the end because it makes decisions based on some + # conditions that might be set. + slurmdb_job_cond_def_start_end(ptr) + + +# Alias +JobSearchFilter = JobFilter + + +cdef class Jobs(MultiClusterMap): + + def __init__(self, jobs=None): + super().__init__(data=jobs, + typ="db.Jobs", + val_type=Job, + id_attr=Job.id, + key_type=int) + self._reset_stats() + + @staticmethod + def load(JobFilter db_filter=None, Connection db_connection=None): + """Load Jobs from the Slurm Database + + Implements the slurmdb_jobs_get RPC. + + Args: + db_filter (pyslurm.db.JobFilter): + A search filter that the slurmdbd will apply when retrieving + Jobs from the database. + db_connection (pyslurm.db.Connection): + An open database connection. By default if none is specified, + one will be opened automatically. + + Returns: + (pyslurm.db.Jobs): A Collection of database Jobs. + + Raises: + RPCError: When getting the Jobs from the Database was not + sucessful + + Examples: + Without a Filter the default behaviour applies, which is + simply retrieving all Jobs from the same day: + + >>> import pyslurm + >>> db_jobs = pyslurm.db.Jobs.load() + >>> print(db_jobs) + pyslurm.db.Jobs({1: pyslurm.db.Job(1), 2: pyslurm.db.Job(2)}) + >>> print(db_jobs[1]) + pyslurm.db.Job(1) + + Now with a Job Filter, so only Jobs that have specific Accounts + are returned: + + >>> import pyslurm + >>> accounts = ["acc1", "acc2"] + >>> db_filter = pyslurm.db.JobFilter(accounts=accounts) + >>> db_jobs = pyslurm.db.Jobs.load(db_filter) + """ + cdef: + Jobs out = Jobs() + Job job + JobFilter cond = db_filter + SlurmList job_data + SlurmListItem job_ptr + Connection conn + QualitiesOfService qos_data + + # Prepare SQL Filter + if not db_filter: + cond = JobFilter() + cond._create() + + # Setup DB Conn + conn = _open_conn_or_error(db_connection) + + # Fetch Job data + job_data = SlurmList.wrap(slurmdb_jobs_get(conn.ptr, cond.ptr)) + if job_data.is_null: + raise RPCError(msg="Failed to get Jobs from slurmdbd") + + # Fetch other necessary dependencies needed for translating some + # attributes (i.e QoS IDs to its name) + qos_data = QualitiesOfService.load(db_connection=conn, + name_is_key=False) + + # TODO: also get trackable resources with slurmdb_tres_get and store + # it in each job instance. tres_alloc_str and tres_req_str only + # contain the numeric tres ids, but it probably makes more sense to + # convert them to its type name for the user in advance. + + # TODO: How to handle the possibility of duplicate job ids that could + # appear if IDs on a cluster are resetted? + for job_ptr in SlurmList.iter_and_pop(job_data): + job = Job.from_ptr(job_ptr.data) + job.qos_data = qos_data + job._create_steps() + job.stats = JobStatistics.from_job_steps(job) + + cluster = job.cluster + if cluster not in out.data: + out.data[cluster] = {} + out[cluster][job.id] = job + + add_stats_to_job_collection(out, job.stats) + out.cpus += job.cpus + out.nodes += job.num_nodes + out.memory += job.memory + + return out + + def _reset_stats(self): + reset_stats_for_job_collection(self) + self.cpus = 0 + self.nodes = 0 + self.memory = 0 + + def calc_stats(self): + """(Re)Calculate Statistics for the Job Collection.""" + self._reset_stats() + for job in self.values(): + add_stats_to_job_collection(self, job.stats) + self.cpus += job.cpus + self.nodes += job.num_nodes + self.memory += job.memory + + @staticmethod + def modify(db_filter, Job changes, db_connection=None): + """Modify Slurm database Jobs. + + Implements the slurm_job_modify RPC. + + Args: + db_filter (Union[pyslurm.db.JobFilter, pyslurm.db.Jobs]): + A filter to decide which Jobs should be modified. + changes (pyslurm.db.Job): + Another [pyslurm.db.Job][] object that contains all the + changes to apply. Check the `Other Parameters` of the + [pyslurm.db.Job][] class to see which properties can be + modified. + db_connection (pyslurm.db.Connection): + A Connection to the slurmdbd. By default, if no connection is + supplied, one will automatically be created internally. This + means that when the changes were considered successful by the + slurmdbd, those modifications will be **automatically + committed**. + + If you however decide to provide your own Connection instance + (which must be already opened before), and the changes were + successful, they will basically be in a kind of "staging + area". By the time this function returns, the changes are not + actually made. + You are then responsible to decide whether the changes should + be committed or rolled back by using the respective methods on + the connection object. This way, you have a chance to see + which Jobs were modified before you commit the changes. + + Returns: + (list[int]): A list of Jobs that were modified + + Raises: + RPCError: When a failure modifying the Jobs occurred. + + Examples: + In its simplest form, you can do something like this: + + >>> import pyslurm + >>> + >>> db_filter = pyslurm.db.JobFilter(ids=[9999]) + >>> changes = pyslurm.db.Job(comment="A comment for the job") + >>> modified_jobs = pyslurm.db.Jobs.modify(db_filter, changes) + >>> print(modified_jobs) + [9999] + + In the above example, the changes will be automatically committed + if successful. + You can however also control this manually by providing your own + connection object: + + >>> import pyslurm + >>> + >>> db_conn = pyslurm.db.Connection.open() + >>> db_filter = pyslurm.db.JobFilter(ids=[9999]) + >>> changes = pyslurm.db.Job(comment="A comment for the job") + >>> modified_jobs = pyslurm.db.Jobs.modify( + ... db_filter, changes, db_conn) + + Now you can first examine which Jobs have been modified: + + >>> print(modified_jobs) + [9999] + + And then you can actually commit the changes: + + >>> db_conn.commit() + + You can also explicitly rollback these changes instead of + committing, so they will not become active: + + >>> db_conn.rollback() + """ + cdef: + JobFilter cond + Connection conn + SlurmList response + SlurmListItem response_ptr + list out = [] + + # Prepare SQL Filter + if isinstance(db_filter, Jobs): + job_ids = [job.id for job in self] + cond = JobFilter(ids=job_ids) + else: + cond = db_filter + cond._create() + + # Setup DB Conn + conn = _open_conn_or_error(db_connection) + + # Modify Jobs, get the result + # This returns a List of char* with the Jobs ids that were + # modified + response = SlurmList.wrap( + slurmdb_job_modify(conn.ptr, cond.ptr, changes.ptr)) + + if not response.is_null and response.cnt: + for response_ptr in response: + response_str = cstr.to_unicode(response_ptr.data) + if not response_str: + continue + + # The strings in the list returned above have a structure + # like this: + # + # " submitted at " + # + # We are just interested in the Job-ID, so extract it + job_id = response_str.split(" ")[0] + if job_id and job_id.isdigit(): + out.append(int(job_id)) + + elif not response.is_null: + # There was no real error, but simply nothing has been modified + raise RPCError(msg="Nothing was modified") + else: + # Autodetects the last slurm error + raise RPCError() + + if not db_connection: + # Autocommit if no connection was explicitly specified. + conn.commit() + + return out + + +cdef class Job: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, job_id=0, cluster=None, **kwargs): + self._alloc_impl() + self.ptr.jobid = int(job_id) + cstr.fmalloc(&self.ptr.cluster, + LOCAL_CLUSTER if not cluster else cluster) + self.qos_data = QualitiesOfService() + self.steps = JobSteps() + self.stats = JobStatistics() + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): + slurmdb_destroy_job_rec(self.ptr) + self.ptr = NULL + + def _alloc_impl(self): + if not self.ptr: + self.ptr = slurmdb_create_job_rec() + + @staticmethod + cdef Job from_ptr(slurmdb_job_rec_t *in_ptr): + cdef Job wrap = Job.__new__(Job) + wrap.ptr = in_ptr + wrap.steps = JobSteps.__new__(JobSteps) + return wrap + + @staticmethod + def load(job_id, cluster=None, with_script=False, with_env=False): + """Load the information for a specific Job from the Database. + + Args: + job_id (int): + ID of the Job to be loaded. + cluster (str): + Name of the Cluster to search in. Default is the local + Cluster. + with_script (bool): + Whether the Job-Script should also be loaded. Mutually + exclusive with `with_env`. + with_env (bool): + Whether the Job Environment should also be loaded. Mutually + exclusive with `with_script`. + + Returns: + (pyslurm.db.Job): Returns a new Database Job instance + + Raises: + RPCError: If requesting the information for the database Job was + not sucessful. + + Examples: + >>> import pyslurm + >>> db_job = pyslurm.db.Job.load(10000) + + In the above example, attributes like `script` and `environment` + are not populated. You must explicitly request one of them to be + loaded: + + >>> import pyslurm + >>> db_job = pyslurm.db.Job.load(10000, with_script=True) + >>> print(db_job.script) + """ + cluster = LOCAL_CLUSTER if not cluster else cluster + jfilter = JobFilter(ids=[int(job_id)], clusters=[cluster], + with_script=with_script, with_env=with_env) + job = Jobs.load(jfilter).get((cluster, int(job_id))) + if not job: + raise RPCError(msg=f"Job {job_id} does not exist on " + f"Cluster {cluster}") + + # TODO: There might be multiple entries when job ids were reset. + return job + + def _create_steps(self): + cdef: + JobStep step + SlurmList step_list + SlurmListItem step_ptr + + step_list = SlurmList.wrap(self.ptr.steps, owned=False) + for step_ptr in SlurmList.iter_and_pop(step_list): + step = JobStep.from_ptr(step_ptr.data) + self.steps[step.id] = step + + def as_dict(self): + return self.to_dict() + + def to_dict(self): + """Convert Database Job information to a dictionary. + + Returns: + (dict): Database Job information as dict + + Examples: + >>> import pyslurm + >>> myjob = pyslurm.db.Job.load(10000) + >>> myjob_dict = myjob.to_dict() + """ + cdef dict out = instance_to_dict(self) + + if self.stats: + out["stats"] = self.stats.to_dict() + if self.steps: + out["steps"] = self.steps.to_dict() + + return out + + def __repr__(self): + return f'pyslurm.db.{self.__class__.__name__}({self.id})' + + def modify(self, changes, db_connection=None): + """Modify a Slurm database Job. + + Args: + changes (pyslurm.db.Job): + Another [pyslurm.db.Job][] object that contains all the + changes to apply. Check the `Other Parameters` of the + [pyslurm.db.Job][] class to see which properties can be + modified. + db_connection (pyslurm.db.Connection): + A slurmdbd connection. See + [pyslurm.db.Jobs.modify][pyslurm.db.job.Jobs.modify] for more + info on this parameter. + + Raises: + RPCError: When modifying the Job failed. + """ + cdef JobFilter jfilter = JobFilter(ids=[self.id]) + Jobs.modify(jfilter, changes, db_connection) + + @property + def account(self): + return cstr.to_unicode(self.ptr.account) + + @property + def admin_comment(self): + return cstr.to_unicode(self.ptr.admin_comment) + + @admin_comment.setter + def admin_comment(self, val): + cstr.fmalloc(&self.ptr.admin_comment, val) + + @property + def num_nodes(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_NODE) + if val is not None: + # Job is already running and has nodes allocated + return val + else: + # Job is still pending, so we return the number of requested nodes + # instead. + val = TrackableResources.find_count_in_str(self.ptr.tres_req_str, + slurm.TRES_NODE) + return val + + @property + def array_id(self): + return u32_parse(self.ptr.array_job_id) + + @property + def array_tasks_parallel(self): + return u32_parse(self.ptr.array_max_tasks) + + @property + def array_task_id(self): + return u32_parse(self.ptr.array_task_id) + + @property + def array_tasks_waiting(self): + task_str = cstr.to_unicode(self.ptr.array_task_str) + if not task_str: + return None + + if "%" in task_str: + # We don't want this % character and everything after it + # in here, so remove it. + task_str = task_str[:task_str.rindex("%")] + + return task_str + + @property + def association_id(self): + return u32_parse(self.ptr.associd) + + @property + def block_id(self): + return cstr.to_unicode(self.ptr.blockid) + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster) + + @property + def constraints(self): + return cstr.to_list(self.ptr.constraints) + + @property + def container(self): + return cstr.to_list(self.ptr.container) + + @property + def db_index(self): + return u64_parse(self.ptr.db_index) + + @property + def derived_exit_code(self): + ec, _ = _get_exit_code(self.ptr.derived_ec) + return ec + + @derived_exit_code.setter + def derived_exit_code(self, val): + self.ptr.derived_ec = int(val) + + @property + def derived_exit_code_signal(self): + _, sig = _get_exit_code(self.ptr.derived_ec) + return sig + + @property + def comment(self): + return cstr.to_unicode(self.ptr.derived_es) + + @comment.setter + def comment(self, val): + cstr.fmalloc(&self.ptr.derived_es, val) + + @property + def elapsed_time(self): + return _raw_time(self.ptr.elapsed) + + @property + def eligible_time(self): + return _raw_time(self.ptr.eligible) + + @property + def end_time(self): + return _raw_time(self.ptr.end) + + @property + def extra(self): + return cstr.to_unicode(self.ptr.extra) + + @extra.setter + def extra(self, val): + cstr.fmalloc(&self.ptr.extra, val) + + @property + def exit_code(self): + ec, _ = _get_exit_code(self.ptr.exitcode) + return ec + + @property + def exit_code_signal(self): + _, sig = _get_exit_code(self.ptr.exitcode) + return sig + + # uint32_t flags + + @property + def failed_node(self): + return cstr.to_unicode(self.ptr.failed_node) + + def group_id(self): + return u32_parse(self.ptr.gid, zero_is_noval=False) + + def group_name(self): + return gid_to_name(self.ptr.gid) + + # uint32_t het_job_id + # uint32_t het_job_offset + + @property + def id(self): + return self.ptr.jobid + + @property + def name(self): + return cstr.to_unicode(self.ptr.jobname) + + # uint32_t lft + + @property + def mcs_label(self): + return cstr.to_unicode(self.ptr.mcs_label) + + @property + def nodelist(self): + return cstr.to_unicode(self.ptr.nodes) + + @property + def partition(self): + return cstr.to_unicode(self.ptr.partition) + + @property + def priority(self): + return u32_parse(self.ptr.priority, zero_is_noval=False) + + @property + def qos(self): + _qos = self.qos_data.get(self.ptr.qosid, None) + if _qos: + return _qos.name + else: + return None + + @property + def cpus(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_CPU) + if val is not None: + # Job is already running and has cpus allocated + return val + else: + # Job is still pending, so we return the number of requested cpus + # instead. + return u32_parse(self.ptr.req_cpus, on_noval=0, zero_is_noval=False) + + @property + def memory(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_req_str, + slurm.TRES_MEM) + return val + + @property + def reservation(self): + return cstr.to_unicode(self.ptr.resv_name) + +# @property +# def reservation_id(self): +# return u32_parse(self.ptr.resvid) + + @property + def script(self): + return cstr.to_unicode(self.ptr.script) + + @property + def environment(self): + return cstr.to_dict(self.ptr.env, delim1="\n", delim2="=") + + @property + def start_time(self): + return _raw_time(self.ptr.start) + + @property + def state(self): + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def state_reason(self): + return cstr.to_unicode(slurm_job_reason_string + (self.ptr.state_reason_prev)) + + @property + def cancelled_by(self): + return uid_to_name(self.ptr.requid) + + @property + def submit_time(self): + return _raw_time(self.ptr.submit) + + @property + def submit_command(self): + return cstr.to_unicode(self.ptr.submit_line) + + @property + def suspended_time(self): + return _raw_time(self.ptr.elapsed) + + @property + def system_comment(self): + return cstr.to_unicode(self.ptr.system_comment) + + @system_comment.setter + def system_comment(self, val): + cstr.fmalloc(&self.ptr.system_comment, val) + + @property + def time_limit(self): + # TODO: Perhaps we should just find out what the actual PartitionLimit + # is? + return _raw_time(self.ptr.timelimit, "PartitionLimit") + + @property + def user_id(self): + return u32_parse(self.ptr.uid, zero_is_noval=False) + + @property + def user_name(self): + # Theres also a ptr->user + # https://github.com/SchedMD/slurm/blob/6365a8b7c9480c48678eeedef99864d8d3b6a6b5/src/sacct/print.c#L1946 + return uid_to_name(self.ptr.uid) + + # TODO: used gres + + @property + def wckey(self): + return cstr.to_unicode(self.ptr.wckey) + + @wckey.setter + def wckey(self, val): + cstr.fmalloc(&self.ptr.wckey, val) + +# @property +# def wckey_id(self): +# return u32_parse(self.ptr.wckeyid) + + @property + def working_directory(self): + return cstr.to_unicode(self.ptr.work_dir) + +# @property +# def tres_allocated(self): +# return TrackableResources.from_str(self.ptr.tres_alloc_str) + +# @property +# def tres_requested(self): +# return TrackableResources.from_str(self.ptr.tres_req_str) diff --git a/pyslurm/db/qos.pxd b/pyslurm/db/qos.pxd new file mode 100644 index 00000000..ea0fde2d --- /dev/null +++ b/pyslurm/db/qos.pxd @@ -0,0 +1,66 @@ +######################################################################### +# qos.pxd - pyslurm slurmdbd qos api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_qos_rec_t, + slurmdb_qos_cond_t, + slurmdb_destroy_qos_rec, + slurmdb_destroy_qos_cond, + slurmdb_qos_get, + slurm_preempt_mode_num, + List, + try_xmalloc, +) +from pyslurm.db.util cimport ( + SlurmList, + SlurmListItem, + make_char_list, +) +from pyslurm.db.connection cimport Connection +from pyslurm.utils cimport cstr + +cdef _set_qos_list(List *in_list, vals, QualitiesOfService data) + + +cdef class QualitiesOfService(dict): + pass + + +cdef class QualityOfServiceFilter: + cdef slurmdb_qos_cond_t *ptr + + cdef public: + names + ids + descriptions + preempt_modes + with_deleted + + +cdef class QualityOfService: + cdef slurmdb_qos_rec_t *ptr + + @staticmethod + cdef QualityOfService from_ptr(slurmdb_qos_rec_t *in_ptr) diff --git a/pyslurm/db/qos.pyx b/pyslurm/db/qos.pyx new file mode 100644 index 00000000..09819611 --- /dev/null +++ b/pyslurm/db/qos.pyx @@ -0,0 +1,228 @@ +######################################################################### +# qos.pyx - pyslurm slurmdbd qos api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import RPCError +from pyslurm.utils.helpers import instance_to_dict +from pyslurm.db.connection import _open_conn_or_error + + +cdef class QualitiesOfService(dict): + + def __init__(self): + pass + + @staticmethod + def load(QualityOfServiceFilter db_filter=None, + Connection db_connection=None, name_is_key=True): + """Load QoS data from the Database + + Args: + name_is_key (bool, optional): + By default, the keys in this dict are the names of each QoS. + If this is set to `False`, then the unique ID of the QoS will + be used as dict keys. + """ + cdef: + QualitiesOfService out = QualitiesOfService() + QualityOfService qos + QualityOfServiceFilter cond = db_filter + SlurmList qos_data + SlurmListItem qos_ptr + Connection conn + + # Prepare SQL Filter + if not db_filter: + cond = QualityOfServiceFilter() + cond._create() + + # Setup DB Conn + conn = _open_conn_or_error(db_connection) + + # Fetch QoS Data + qos_data = SlurmList.wrap(slurmdb_qos_get(conn.ptr, cond.ptr)) + + if qos_data.is_null: + raise RPCError(msg="Failed to get QoS data from slurmdbd") + + # Setup QOS objects + for qos_ptr in SlurmList.iter_and_pop(qos_data): + qos = QualityOfService.from_ptr(qos_ptr.data) + _id = qos.name if name_is_key else qos.id + out[_id] = qos + + return out + + +cdef class QualityOfServiceFilter: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_qos_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_qos_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_qos_cond_t") + + def _parse_preempt_modes(self): + if not self.preempt_modes: + return 0 + + if isinstance(self.preempt_modes, int): + return self.preempt_modes + + out = 0 + for mode in self.preempt_modes: + _mode = slurm_preempt_mode_num(mode) + if _mode == slurm.NO_VAL16: + raise ValueError(f"Unknown preempt mode: {mode}") + + if _mode == slurm.PREEMPT_MODE_OFF: + _mode = slurm.PREEMPT_MODE_COND_OFF + + out |= _mode + + return out + + def _create(self): + self._alloc() + cdef slurmdb_qos_cond_t *ptr = self.ptr + + make_char_list(&ptr.name_list, self.names) + make_char_list(&ptr.id_list, self.ids) + make_char_list(&ptr.description_list, self.descriptions) + ptr.preempt_mode = self._parse_preempt_modes() + ptr.with_deleted = 1 if bool(self.with_deleted) else 0 + + +cdef class QualityOfService: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, name=None): + self._alloc_impl() + self.name = name + + def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): + slurmdb_destroy_qos_rec(self.ptr) + self.ptr = NULL + + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(slurmdb_qos_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_qos_rec_t") + + @staticmethod + cdef QualityOfService from_ptr(slurmdb_qos_rec_t *in_ptr): + cdef QualityOfService wrap = QualityOfService.__new__(QualityOfService) + wrap.ptr = in_ptr + return wrap + + def __repr__(self): + return f'pyslurm.db.{self.__class__.__name__}({self.name})' + + def to_dict(self): + """Database QualityOfService information formatted as a dictionary. + + Returns: + (dict): Database QualityOfService information as dict + """ + return instance_to_dict(self) + + @staticmethod + def load(name): + """Load the information for a specific Quality of Service. + + Args: + name (str): + Name of the Quality of Service to be loaded. + + Returns: + (QualityOfService): Returns a new QualityOfService + instance. + + Raises: + RPCError: If requesting the information from the database was not + sucessful. + """ + qfilter = QualityOfServiceFilter(names=[name]) + qos = QualitiesOfService.load(qfilter).get(name) + if not qos: + raise RPCError(msg=f"QualityOfService {name} does not exist") + + return qos + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @name.setter + def name(self, val): + cstr.fmalloc(&self.ptr.name, val) + + @property + def description(self): + return cstr.to_unicode(self.ptr.description) + + @property + def id(self): + return self.ptr.id + + +def _qos_names_to_ids(qos_list, QualitiesOfService data): + cdef list out = [] + if not qos_list: + return None + + return [_validate_qos_single(qid, data) for qid in qos_list] + + +def _validate_qos_single(qid, QualitiesOfService data): + for item in data.values(): + if qid == item.id or qid == item.name: + return item.id + + raise ValueError(f"Invalid QOS specified: {qid}") + + +cdef _set_qos_list(List *in_list, vals, QualitiesOfService data): + qos_ids = _qos_names_to_ids(vals, data) + make_char_list(in_list, qos_ids) diff --git a/pyslurm/db/stats.pxd b/pyslurm/db/stats.pxd new file mode 100644 index 00000000..5615b2c3 --- /dev/null +++ b/pyslurm/db/stats.pxd @@ -0,0 +1,147 @@ +######################################################################### +# stats.pxd - pyslurm slurmdbd job stats +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + try_xmalloc, + slurmdb_stats_t, + slurmdb_job_rec_t, +) +from pyslurm.db.tres cimport TrackableResources +from pyslurm.db.step cimport JobStep, JobSteps +from pyslurm.db.job cimport Job +from pyslurm.utils cimport cstr + + +cdef class JobStatistics: + """Statistics for a Slurm Job or Step. + + !!! note + + For more information also see the sacct manpage. + + Attributes: + consumed_energy (int): + Total amount of energy consumed, in joules + elapsed_cpu_time (int): + Total amount of time used(Elapsed time * cpu count) in seconds. + This is not the real CPU-Efficiency, but rather the total amount + of cpu-time the CPUs were occupied for + avg_cpu_time (int): + Average CPU-Time (System + User) in seconds of all tasks + avg_cpu_frequency (int): + Average weighted CPU-Frequency of all tasks, in Kilohertz + avg_disk_read (int): + Average number of bytes read by all tasks + avg_disk_write (int): + Average number of bytes written by all tasks + avg_page_faults (int): + Average number of page faults by all tasks + avg_resident_memory (int): + Average Resident Set Size (RSS) in bytes of all tasks + avg_virtual_memory (int): + Average Virtual Memory Size (VSZ) in bytes of all tasks + max_disk_read (int): + Highest peak number of bytes read by all tasks + max_disk_read_node (int): + Name of the Node where max_disk_read occured + max_disk_read_task (int): + ID of the Task where max_disk_read occured + max_disk_write (int): + Lowest peak number of bytes written by all tasks + max_disk_write_node (int): + Name of the Node where max_disk_write occured + max_disk_write_task (int): + ID of the Task where max_disk_write occured + max_page_faults (int): + Highest peak number of page faults by all tasks + max_page_faults_node (int): + Name of the Node where max_page_faults occured + max_page_faults_task (int): + ID of the Task where max_page_faults occured + max_resident_memory (int): + Highest peak Resident Set Size (RSS) in bytes by all tasks + max_resident_memory_node (int): + Name of the Node where max_resident_memory occured + max_resident_memory_task (int): + ID of the Task where max_resident_memory occured + max_virtual_memory (int): + Highest peak Virtual Memory Size (VSZ) in bytes by all tasks + max_virtual_memory_node (int): + Name of the Node where max_virtual_memory occured + max_virtual_memory_task (int): + ID of the Task where max_virtual_memory occured + min_cpu_time (int): + Lowest peak CPU-Time (System + User) in seconds of all tasks + min_cpu_time_node (int): + Name of the Node where min_cpu_time occured + min_cpu_time_task (int): + ID of the Task where min_cpu_time occured + total_cpu_time (int): + Sum of user_cpu_time and system_cpu_time, in seconds + user_cpu_time (int): + Amount of Time spent in user space, in seconds + system_cpu_time (int): + Amount of Time spent in kernel space, in seconds + """ + cdef slurmdb_job_rec_t *job + + cdef public: + consumed_energy + elapsed_cpu_time + avg_cpu_time + avg_cpu_frequency + avg_disk_read + avg_disk_write + avg_page_faults + avg_resident_memory + avg_virtual_memory + max_disk_read + max_disk_read_node + max_disk_read_task + max_disk_write + max_disk_write_node + max_disk_write_task + max_page_faults + max_page_faults_node + max_page_faults_task + max_resident_memory + max_resident_memory_node + max_resident_memory_task + max_virtual_memory + max_virtual_memory_node + max_virtual_memory_task + min_cpu_time + min_cpu_time_node + min_cpu_time_task + total_cpu_time + user_cpu_time + system_cpu_time + + @staticmethod + cdef JobStatistics from_job_steps(Job job) + + @staticmethod + cdef JobStatistics from_step(JobStep step) + diff --git a/pyslurm/db/stats.pyx b/pyslurm/db/stats.pyx new file mode 100644 index 00000000..c2da1145 --- /dev/null +++ b/pyslurm/db/stats.pyx @@ -0,0 +1,236 @@ +######################################################################### +# stats.pyx - pyslurm slurmdbd job stats +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.utils.helpers import ( + nodelist_from_range_str, + instance_to_dict, +) + + +def reset_stats_for_job_collection(jobs): + jobs.consumed_energy = 0 + jobs.disk_read = 0 + jobs.disk_write = 0 + jobs.page_faults = 0 + jobs.resident_memory = 0 + jobs.virtual_memory = 0 + jobs.elapsed_cpu_time = 0 + jobs.total_cpu_time = 0 + jobs.user_cpu_time = 0 + jobs.system_cpu_time = 0 + + +def add_stats_to_job_collection(jobs, JobStatistics js): + jobs.consumed_energy += js.consumed_energy + jobs.disk_read += js.avg_disk_read + jobs.disk_write += js.avg_disk_write + jobs.page_faults += js.avg_page_faults + jobs.resident_memory += js.avg_resident_memory + jobs.virtual_memory += js.avg_virtual_memory + jobs.elapsed_cpu_time += js.elapsed_cpu_time + jobs.total_cpu_time += js.total_cpu_time + jobs.user_cpu_time += js.user_cpu_time + jobs.system_cpu_time += js.system_cpu_time + + +cdef class JobStatistics: + + def __init__(self): + for attr, val in instance_to_dict(self).items(): + setattr(self, attr, 0) + + self.max_disk_read_node = None + self.max_disk_read_task = None + self.max_disk_write_node = None + self.max_disk_write_task = None + self.max_page_faults_node = None + self.max_page_faults_task = None + self.max_resident_memory_node = None + self.max_resident_memory_task = None + self.max_virtual_memory_node = None + self.max_virtual_memory_task = None + self.min_cpu_time_node = None + self.min_cpu_time_task = None + + def to_dict(self): + return instance_to_dict(self) + + @staticmethod + cdef JobStatistics from_job_steps(Job job): + cdef JobStatistics job_stats = JobStatistics() + + for step in job.steps.values(): + job_stats._add_base_stats(step.stats) + + job_stats._sum_cpu_time(job) + + step_count = len(job.steps) + if step_count: + job_stats.avg_cpu_frequency /= step_count + + return job_stats + + @staticmethod + cdef JobStatistics from_step(JobStep step): + cdef JobStatistics wrap = JobStatistics() + if not &step.ptr.stats: + return wrap + + cdef: + list nodes = nodelist_from_range_str( + cstr.to_unicode(step.ptr.nodes)) + cpu_time_adj = 1000 + slurmdb_stats_t *ptr = &step.ptr.stats + + if ptr.consumed_energy != slurm.NO_VAL64: + wrap.consumed_energy = ptr.consumed_energy + + wrap.avg_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_CPU) / cpu_time_adj + + elapsed = step.elapsed_time if step.elapsed_time else 0 + cpus = step.cpus if step.cpus else 0 + wrap.elapsed_cpu_time = elapsed * cpus + + ave_freq = int(ptr.act_cpufreq) + if ave_freq != slurm.NO_VAL: + wrap.avg_cpu_frequency = ptr.act_cpufreq + + wrap.avg_disk_read = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_FS_DISK) + wrap.avg_disk_write = TrackableResources.find_count_in_str( + ptr.tres_usage_out_ave, slurm.TRES_FS_DISK) + wrap.avg_page_faults = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_PAGES) + wrap.avg_resident_memory = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_MEM) + wrap.avg_virtual_memory = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_VMEM) + + wrap.max_disk_read = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_FS_DISK) + max_disk_read_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_FS_DISK) + wrap.max_disk_read_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_FS_DISK) + + wrap.max_disk_write = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max, slurm.TRES_FS_DISK) + max_disk_write_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max_nodeid, slurm.TRES_FS_DISK) + wrap.max_disk_write_task = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max_taskid, slurm.TRES_FS_DISK) + + wrap.max_resident_memory = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_MEM) + max_resident_memory_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_MEM) + wrap.max_resident_memory_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_MEM) + + wrap.max_virtual_memory = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_VMEM) + max_virtual_memory_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_VMEM) + wrap.max_virtual_memory_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_VMEM) + + wrap.min_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min, slurm.TRES_CPU) / cpu_time_adj + min_cpu_time_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min_nodeid, slurm.TRES_CPU) + wrap.min_cpu_time_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min_taskid, slurm.TRES_CPU) + + wrap.total_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_tot, slurm.TRES_CPU) + + if nodes: + wrap.max_disk_write_node = nodes[max_disk_write_nodeid] + wrap.max_disk_read_node = nodes[max_disk_read_nodeid] + wrap.max_resident_memory_node = nodes[max_resident_memory_nodeid] + wrap.max_virtual_memory_node = nodes[max_virtual_memory_nodeid] + wrap.min_cpu_time_node = nodes[min_cpu_time_nodeid] + + if step.ptr.user_cpu_sec != slurm.NO_VAL64: + wrap.user_cpu_time = step.ptr.user_cpu_sec + + if step.ptr.sys_cpu_sec != slurm.NO_VAL64: + wrap.system_cpu_time = step.ptr.sys_cpu_sec + + return wrap + + def _add_base_stats(self, JobStatistics src): + self.consumed_energy += src.consumed_energy + self.avg_cpu_time += src.avg_cpu_time + self.avg_cpu_frequency += src.avg_cpu_frequency + self.avg_disk_read += src.avg_disk_read + self.avg_disk_write += src.avg_disk_write + self.avg_page_faults += src.avg_page_faults + + if src.max_disk_read >= self.max_disk_read: + self.max_disk_read = src.max_disk_read + self.max_disk_read_node = src.max_disk_read_node + self.max_disk_read_task = src.max_disk_read_task + + if src.max_disk_write >= self.max_disk_write: + self.max_disk_write = src.max_disk_write + self.max_disk_write_node = src.max_disk_write_node + self.max_disk_write_task = src.max_disk_write_task + + if src.max_page_faults >= self.max_page_faults: + self.max_page_faults = src.max_page_faults + self.max_page_faults_node = src.max_page_faults_node + self.max_page_faults_task = src.max_page_faults_task + + if src.max_resident_memory >= self.max_resident_memory: + self.max_resident_memory = src.max_resident_memory + self.max_resident_memory_node = src.max_resident_memory_node + self.max_resident_memory_task = src.max_resident_memory_task + self.avg_resident_memory = self.max_resident_memory + + if src.max_virtual_memory >= self.max_virtual_memory: + self.max_virtual_memory = src.max_virtual_memory + self.max_virtual_memory_node = src.max_virtual_memory_node + self.max_virtual_memory_task = src.max_virtual_memory_task + self.avg_virtual_memory = self.max_virtual_memory + + if src.min_cpu_time >= self.min_cpu_time: + self.min_cpu_time = src.min_cpu_time + self.min_cpu_time_node = src.min_cpu_time_node + self.min_cpu_time_task = src.min_cpu_time_task + + def _sum_cpu_time(self, Job job): + if job.ptr.tot_cpu_sec != slurm.NO_VAL64: + self.total_cpu_time += job.ptr.tot_cpu_sec + + if job.ptr.user_cpu_sec != slurm.NO_VAL64: + self.user_cpu_time += job.ptr.user_cpu_sec + + if job.ptr.sys_cpu_sec != slurm.NO_VAL64: + self.system_cpu_time += job.ptr.sys_cpu_sec + + elapsed = job.elapsed_time if job.elapsed_time else 0 + cpus = job.cpus if job.cpus else 0 + self.elapsed_cpu_time += elapsed * cpus diff --git a/pyslurm/db/step.pxd b/pyslurm/db/step.pxd new file mode 100644 index 00000000..ab0ff70c --- /dev/null +++ b/pyslurm/db/step.pxd @@ -0,0 +1,102 @@ +######################################################################### +# step.pxd - pyslurm slurmdbd step api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_job_rec_t, + slurmdb_job_cond_t, + slurmdb_step_rec_t, + slurmdb_jobs_get, + slurmdb_destroy_job_cond, + slurmdb_destroy_job_rec, + slurmdb_destroy_step_rec, + try_xmalloc, + slurmdb_job_cond_def_start_end, + slurm_job_state_string, + slurm_job_reason_string, +) +from pyslurm.db.util cimport SlurmList, SlurmListItem +from pyslurm.db.connection cimport Connection +from pyslurm.utils cimport cstr +from pyslurm.db.stats cimport JobStatistics +from pyslurm.db.tres cimport TrackableResources, TrackableResource + + +cdef class JobSteps(dict): + """A [dict][] of [pyslurm.db.JobStep][] objects""" + pass + + +cdef class JobStep: + """A Slurm Database JobStep. + + Attributes: + stats (pyslurm.db.JobStatistics): + Utilization statistics for this Step + num_nodes (int): + Amount of nodes this Step has allocated + cpus (int): + Amount of CPUs the Step has/had allocated + memory (int): + Amount of memory the Step requested + container (str): + Path to OCI Container bundle + elapsed_time (int): + Amount of seconds elapsed for the Step + end_time (int): + When the Step ended, as a unix timestamp + eligible_time (int): + When the Step became eligible to run, as a unix timestamp + start_time (int): + Time when the Step started, as a unix timestamp + exit_code (int): + Exit code of the step + ntasks (int): + Number of tasks the Step uses + cpu_frequency_min (str): + Minimum CPU-Frequency requested for the Step + cpu_frequency_max (str): + Maximum CPU-Frequency requested for the Step + cpu_frequency_governor (str): + CPU-Frequency Governor requested for the Step + nodelist (str): + Nodes this Step is using + id (Union[str, int]): + ID of the Step + job_id (int): + ID of the Job this Step is a part of + state (str): + State of the Step + cancelled_by (str): + Name of the User who cancelled this Step + submit_command (str): + Full command issued to start the Step + suspended_time (int): + Amount of seconds the Step was suspended + """ + cdef slurmdb_step_rec_t *ptr + cdef public JobStatistics stats + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step) diff --git a/pyslurm/db/step.pyx b/pyslurm/db/step.pyx new file mode 100644 index 00000000..2d71ca73 --- /dev/null +++ b/pyslurm/db/step.pyx @@ -0,0 +1,202 @@ +######################################################################### +# step.pyx - pyslurm slurmdbd step api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS +from pyslurm.core.error import RPCError +from typing import Union +from pyslurm.utils.uint import * +from pyslurm.utils.ctime import _raw_time +from pyslurm import xcollections +from pyslurm.utils.helpers import ( + gid_to_name, + uid_to_name, + instance_to_dict, + _get_exit_code, + humanize_step_id, +) +from pyslurm.core.job.util import cpu_freq_int_to_str + + +cdef class JobSteps(dict): + + def __repr__(self): + data = super().__repr__() + return f'pyslurm.db.{self.__class__.__name__}({data})' + + def to_dict(self): + return xcollections.dict_recursive(self) + + +cdef class JobStep: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self): + raise RuntimeError("You can not instantiate this class directly " + " at the moment") + + def __dealloc__(self): + slurmdb_destroy_step_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step): + cdef JobStep wrap = JobStep.__new__(JobStep) + wrap.ptr = step + wrap.stats = JobStatistics.from_step(wrap) + return wrap + + def to_dict(self): + """Convert Database JobStep information to a dictionary. + + Returns: + (dict): Database JobStep information as dict + """ + cdef dict out = instance_to_dict(self) + out["stats"] = self.stats.to_dict() + return out + + def __repr__(self): + return f'pyslurm.db.{self.__class__.__name__}({self.id})' + + @property + def num_nodes(self): + nnodes = u32_parse(self.ptr.nnodes) + if not nnodes and self.ptr.tres_alloc_str: + return TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_NODE) + else: + return nnodes + + @property + def cpus(self): + req_cpus = TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_CPU) + + if req_cpus == slurm.INFINITE64: + return 0 + + return req_cpus +# if req_cpus == slurm.INFINITE64 and step.job_ptr: +# tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) +# req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, +# slurm.TRES_CPU) +# if not req_cpus: +# tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) +# req_cpus = TrackableResources.find_count_in_str(tres_req_str, +# slurm.TRES_CPU) + + @property + def memory(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_MEM) + return val + + # Only in Parent Job available: + # resvcpu? + + @property + def container(self): + return cstr.to_unicode(self.ptr.container) + + @property + def elapsed_time(self): + # seconds + return _raw_time(self.ptr.elapsed) + + @property + def end_time(self): + return _raw_time(self.ptr.end) + + @property + def eligible_time(self): + return _raw_time(self.ptr.start) + + @property + def start_time(self): + return _raw_time(self.ptr.start) + + @property + def exit_code(self): + ec, _ = _get_exit_code(self.ptr.exitcode) + return ec + + @property + def exit_code_signal(self): + _, sig = _get_exit_code(self.ptr.exitcode) + return sig + + @property + def ntasks(self): + return u32_parse(self.ptr.ntasks) + + @property + def cpu_frequency_min(self): + return cpu_freq_int_to_str(self.ptr.req_cpufreq_min) + + @property + def cpu_frequency_max(self): + return cpu_freq_int_to_str(self.ptr.req_cpufreq_max) + + @property + def cpu_frequency_governor(self): + return cpu_freq_int_to_str(self.ptr.req_cpufreq_gov) + + @property + def nodelist(self): + return cstr.to_unicode(self.ptr.nodes) + + @property + def id(self): + return humanize_step_id(self.ptr.step_id.step_id) + + @property + def job_id(self): + return self.ptr.step_id.job_id + + @property + def name(self): + return cstr.to_unicode(self.ptr.stepname) + +# @property +# def distribution(self): +# # ptr.task_dist +# pass + + @property + def state(self): + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def cancelled_by(self): + return uid_to_name(self.ptr.requid) + + @property + def submit_command(self): + return cstr.to_unicode(self.ptr.submit_line) + + @property + def suspended_time(self): + return _raw_time(self.ptr.elapsed) diff --git a/pyslurm/db/tres.pxd b/pyslurm/db/tres.pxd new file mode 100644 index 00000000..23b44ad2 --- /dev/null +++ b/pyslurm/db/tres.pxd @@ -0,0 +1,86 @@ +######################################################################### +# tres.pxd - pyslurm slurmdbd tres api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.utils cimport cstr +from libc.stdint cimport uint64_t +from pyslurm.slurm cimport ( + slurmdb_tres_rec_t, + slurmdb_tres_cond_t, + slurmdb_destroy_tres_cond, + slurmdb_init_tres_cond, + slurmdb_destroy_tres_rec, + slurmdb_find_tres_count_in_string, + slurmdb_tres_get, + try_xmalloc, +) +from pyslurm.db.util cimport ( + SlurmList, + SlurmListItem, +) +from pyslurm.db.connection cimport Connection + +cdef find_tres_count(char *tres_str, typ, on_noval=*, on_inf=*) +cdef find_tres_limit(char *tres_str, typ) +cdef merge_tres_str(char **tres_str, typ, val) +cdef _tres_ids_to_names(char *tres_str, TrackableResources tres_data) +cdef _set_tres_limits(char **dest, TrackableResourceLimits src, + TrackableResources tres_data) + + +cdef class TrackableResourceLimits: + + cdef public: + cpu + mem + energy + node + billing + fs + vmem + pages + gres + license + + @staticmethod + cdef from_ids(char *tres_id_str, TrackableResources tres_data) + + +cdef class TrackableResourceFilter: + cdef slurmdb_tres_cond_t *ptr + + +cdef class TrackableResources(dict): + cdef public raw_str + + @staticmethod + cdef TrackableResources from_str(char *tres_str) + + @staticmethod + cdef find_count_in_str(char *tres_str, typ, on_noval=*, on_inf=*) + + +cdef class TrackableResource: + cdef slurmdb_tres_rec_t *ptr + + @staticmethod + cdef TrackableResource from_ptr(slurmdb_tres_rec_t *in_ptr) diff --git a/pyslurm/db/tres.pyx b/pyslurm/db/tres.pyx new file mode 100644 index 00000000..78195654 --- /dev/null +++ b/pyslurm/db/tres.pyx @@ -0,0 +1,340 @@ +######################################################################### +# tres.pyx - pyslurm slurmdbd tres api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.utils.uint import * +from pyslurm.constants import UNLIMITED +from pyslurm.core.error import RPCError +from pyslurm.utils.helpers import instance_to_dict +from pyslurm.utils import cstr +from pyslurm.db.connection import _open_conn_or_error +import json + + +TRES_TYPE_DELIM = "/" + + +cdef class TrackableResourceLimits: + + def __init__(self, **kwargs): + self.fs = {} + self.gres = {} + self.license = {} + + for k, v in kwargs.items(): + if TRES_TYPE_DELIM in k: + typ, name = self._unflatten_tres(k) + cur_val = getattr(self, typ) + + if not isinstance(cur_val, dict): + raise ValueError(f"TRES Type {typ} cannot have a name " + f"({name}). Invalid Value: {typ}/{name}") + + cur_val.update({name : int(v)}) + setattr(self, typ, cur_val) + else: + setattr(self, k, v) + + @staticmethod + cdef from_ids(char *tres_id_str, TrackableResources tres_data): + tres_list = _tres_ids_to_names(tres_id_str, tres_data) + if not tres_list: + return None + + cdef TrackableResourceLimits out = TrackableResourceLimits() + + for tres in tres_list: + typ, name, cnt = tres + cur_val = getattr(out, typ, slurm.NO_VAL64) + if cur_val != slurm.NO_VAL64: + if isinstance(cur_val, dict): + cur_val.update({name : cnt}) + setattr(out, typ, cur_val) + else: + setattr(out, typ, cnt) + + return out + + def _validate(self, TrackableResources tres_data): + id_dict = _tres_names_to_ids(self.to_dict(flatten_limits=True), + tres_data) + return id_dict + + def _unflatten_tres(self, type_and_name): + typ, name = type_and_name.split(TRES_TYPE_DELIM, 1) + return typ, name + + def _flatten_tres(self, typ, vals): + cdef dict out = {} + for name, cnt in vals.items(): + out[f"{typ}{TRES_TYPE_DELIM}{name}"] = cnt + + return out + + def to_dict(self, flatten_limits=False): + cdef dict inst_dict = instance_to_dict(self) + + if flatten_limits: + vals = inst_dict.pop("fs") + inst_dict.update(self._flatten_tres("fs", vals)) + + vals = inst_dict.pop("license") + inst_dict.update(self._flatten_tres("license", vals)) + + vals = inst_dict.pop("gres") + inst_dict.update(self._flatten_tres("gres", vals)) + + return inst_dict + + +cdef class TrackableResourceFilter: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_tres_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_tres_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_tres_cond_t") + slurmdb_init_tres_cond(self.ptr, 0) + + def _create(self): + self._alloc() + + +cdef class TrackableResources(dict): + + def __init__(self): + pass + + @staticmethod + def load(Connection db_connection=None, name_is_key=True): + """Load Trackable Resources from the Database. + + Args: + name_is_key (bool, optional): + By default, the keys in this dict are the names of each TRES. + If this is set to `False`, then the unique ID of the TRES will + be used as dict keys. + """ + cdef: + TrackableResources out = TrackableResources() + TrackableResource tres + Connection conn + SlurmList tres_data + SlurmListItem tres_ptr + TrackableResourceFilter db_filter = TrackableResourceFilter() + + # Prepare SQL Filter + db_filter._create() + + # Setup DB Conn + conn = _open_conn_or_error(db_connection) + + # Fetch TRES data + tres_data = SlurmList.wrap(slurmdb_tres_get(conn.ptr, db_filter.ptr)) + + if tres_data.is_null: + raise RPCError(msg="Failed to get TRES data from slurmdbd") + + # Setup TRES objects + for tres_ptr in SlurmList.iter_and_pop(tres_data): + tres = TrackableResource.from_ptr( + tres_ptr.data) + _id = tres.type_and_name if name_is_key else tres.id + out[_id] = tres + + return out + + @staticmethod + cdef TrackableResources from_str(char *tres_str): + cdef: + TrackableResources tres_collection + TrackableResource tres + str raw_str = cstr.to_unicode(tres_str) + dict tres_dict + + tres_collection = TrackableResources.__new__(TrackableResources) + if not raw_str: + return tres_collection + + tres_collection.raw_str = raw_str + tres_dict = cstr.to_dict(tres_str) + for tres_id, val in tres_dict.items(): + tres = TrackableResource(tres_id) + tres.ptr.count = val + + return tres + + @staticmethod + cdef find_count_in_str(char *tres_str, typ, on_noval=0, on_inf=0): + return find_tres_count(tres_str, typ, on_noval, on_inf) + + +cdef class TrackableResource: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, tres_id): + self._alloc_impl() + self.ptr.id = tres_id + + def __dealloc__(self): + self._dealloc_impl() + + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(slurmdb_tres_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_tres_rec_t") + + def _dealloc_impl(self): + slurmdb_destroy_tres_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef TrackableResource from_ptr(slurmdb_tres_rec_t *in_ptr): + cdef TrackableResource wrap = TrackableResource.__new__(TrackableResource) + wrap.ptr = in_ptr + return wrap + + def to_dict(self): + return instance_to_dict(self) + + @property + def id(self): + return self.ptr.id + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def type(self): + return cstr.to_unicode(self.ptr.type) + + @property + def type_and_name(self): + type_and_name = self.type + if self.name: + type_and_name = f"{type_and_name}{TRES_TYPE_DELIM}{self.name}" + + return type_and_name + + @property + def count(self): + return u64_parse(self.ptr.count) + + # rec_count + # alloc_secs + + +cdef find_tres_count(char *tres_str, typ, on_noval=0, on_inf=0): + if not tres_str: + return on_noval + + cdef uint64_t tmp + tmp = slurmdb_find_tres_count_in_string(tres_str, typ) + if tmp == slurm.INFINITE64: + return on_inf + elif tmp == slurm.NO_VAL64: + return on_noval + else: + return tmp + + +cdef find_tres_limit(char *tres_str, typ): + return find_tres_count(tres_str, typ, on_noval=None, on_inf=UNLIMITED) + + +cdef merge_tres_str(char **tres_str, typ, val): + cdef uint64_t _val = u64(dehumanize(val)) + + current = cstr.to_dict(tres_str[0]) + if _val == slurm.NO_VAL64: + current.pop(typ, None) + else: + current.update({typ : _val}) + + cstr.from_dict(tres_str, current) + + +cdef _tres_ids_to_names(char *tres_str, TrackableResources tres_data): + if not tres_str: + return None + + cdef: + dict tdict = cstr.to_dict(tres_str) + list out = [] + + if not tres_data: + return None + + for tid, cnt in tdict.items(): + if isinstance(tid, str) and tid.isdigit(): + _tid = int(tid) + if _tid in tres_data: + out.append( + (tres_data[_tid].type, tres_data[_tid].name, int(cnt)) + ) + + return out + + +def _tres_names_to_ids(dict tres_dict, TrackableResources tres_data): + cdef dict out = {} + if not tres_dict: + return out + + for tid, cnt in tres_dict.items(): + real_id = _validate_tres_single(tid, tres_data) + out[real_id] = cnt + + return out + + +def _validate_tres_single(tid, TrackableResources tres_data): + for tres in tres_data.values(): + if tid == tres.id or tid == tres.type_and_name: + return tres.id + + raise ValueError(f"Invalid TRES specified: {tid}") + + +cdef _set_tres_limits(char **dest, TrackableResourceLimits src, + TrackableResources tres_data): + cstr.from_dict(dest, src._validate(tres_data)) diff --git a/pyslurm/db/util.pxd b/pyslurm/db/util.pxd new file mode 100644 index 00000000..01951de8 --- /dev/null +++ b/pyslurm/db/util.pxd @@ -0,0 +1,66 @@ +######################################################################### +# util.pxd - pyslurm slurmdbd util functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.utils cimport cstr +from pyslurm.slurm cimport ( + ListIterator, + List, + slurm_list_iterator_create, + slurm_list_iterator_destroy, + slurm_list_iterator_reset, + slurm_list_count, + slurm_list_next, + slurm_list_destroy, + slurm_list_create, + slurm_list_pop, + slurm_list_append, + slurm_xfree_ptr, +) + +cdef slurm_list_to_pylist(List in_list) +cdef make_char_list(List *in_list, vals) +cdef qos_list_to_pylist(List in_list, qos_data) + + +cdef class SlurmListItem: + cdef void *data + + @staticmethod + cdef SlurmListItem from_ptr(void *item) + + +cdef class SlurmList: + cdef: + List info + ListIterator itr + + cdef readonly: + owned + int itr_cnt + int cnt + + @staticmethod + cdef SlurmList wrap(List, owned=*) + + @staticmethod + cdef SlurmList create(slurm.ListDelF delf, owned=*) diff --git a/pyslurm/db/util.pyx b/pyslurm/db/util.pyx new file mode 100644 index 00000000..672886c2 --- /dev/null +++ b/pyslurm/db/util.pyx @@ -0,0 +1,197 @@ +######################################################################### +# util.pyx - pyslurm slurmdbd util functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +cdef make_char_list(List *in_list, vals): + if not vals: + return None + + # Make a new SlurmList wrapper with the values + cdef SlurmList slist = SlurmList(vals) + + # Make sure the previous list is deallocated + if in_list[0]: + slurm_list_destroy(in_list[0]) + + # Assign the pointer from slist to in_list, and give up ownership of slist + in_list[0] = slist.info + slist.owned = False + + +cdef slurm_list_to_pylist(List in_list): + return SlurmList.wrap(in_list, owned=False).to_pylist() + + +cdef qos_list_to_pylist(List in_list, qos_data): + if not in_list: + return [] + + cdef list qos_nums = SlurmList.wrap(in_list, owned=False).to_pylist() + return [qos.name for qos_id, qos in qos_data.items() + if qos_id in qos_nums] + + +cdef class SlurmListItem: + + def __cinit__(self): + self.data = NULL + + @staticmethod + cdef SlurmListItem from_ptr(void *item): + cdef SlurmListItem wrap = SlurmListItem.__new__(SlurmListItem) + wrap.data = item + return wrap + + @property + def has_data(self): + if self.data: + return True + else: + return False + + def to_str(self): + # Mostly for debugging purposes. Can only be used "safely" if we have + # a char* list + cdef char* entry = self.data + return cstr.to_unicode(entry) + + +cdef class SlurmList: + """Convenience Wrapper around slurms List type""" + def __cinit__(self): + self.info = NULL + self.itr = NULL + self.itr_cnt = 0 + self.cnt = 0 + self.owned = True + + def __init__(self, vals=None): + self.info = slurm_list_create(slurm_xfree_ptr) + self.append(vals) + + def __dealloc__(self): + self._dealloc_itr() + self._dealloc_list() + + def _dealloc_list(self): + if self.info is not NULL and self.owned: + slurm_list_destroy(self.info) + self.cnt = 0 + self.info = NULL + + def _dealloc_itr(self): + if self.itr: + slurm_list_iterator_destroy(self.itr) + self.itr_cnt = 0 + self.itr = NULL + + def __iter__(self): + self._dealloc_itr() + if not self.is_null: + self.itr = slurm_list_iterator_create(self.info) + + return self + + def __next__(self): + if self.is_null or self.is_itr_null: + raise StopIteration + + if self.itr_cnt < self.cnt: + self.itr_cnt += 1 + return SlurmListItem.from_ptr(slurm_list_next(self.itr)) + + self._dealloc_itr() + raise StopIteration + + @staticmethod + def iter_and_pop(SlurmList li): + while li.cnt > 0: + yield SlurmListItem.from_ptr(slurm_list_pop(li.info)) + li.cnt -= 1 + + @staticmethod + cdef SlurmList create(slurm.ListDelF delfunc, owned=True): + cdef SlurmList wrapper = SlurmList.__new__(SlurmList) + wrapper.info = slurm_list_create(delfunc) + wrapper.owned = owned + return wrapper + + @staticmethod + cdef SlurmList wrap(List li, owned=True): + cdef SlurmList wrapper = SlurmList.__new__(SlurmList) + if not li: + return wrapper + + wrapper.info = li + wrapper.cnt = slurm_list_count(li) + wrapper.owned = owned + return wrapper + + def to_pylist(self): + cdef: + SlurmListItem item + list out = [] + + for item in self: + if not item.has_data: + continue + + pystr = cstr.to_unicode(item.data) + if pystr: + out.append(int(pystr) if pystr.isdigit() else pystr) + + return out + + def append(self, vals): + cdef char *entry = NULL + + if not vals: + return None + + to_add = vals + if not isinstance(vals, list): + # If it is not a list, then anything that can't be casted to str + # will error below anyways + to_add = [vals] + + for val in to_add: + if val: + entry = NULL + cstr.fmalloc(&entry, str(val)) + slurm_list_append(self.info, entry) + self.cnt += 1 + + @property + def is_itr_null(self): + if not self.itr: + return True + else: + return False + + @property + def is_null(self): + if not self.info: + return True + else: + return False diff --git a/pyslurm/pydefines/slurm_defines.pxi b/pyslurm/pydefines/slurm_defines.pxi index a9ac41b6..f700a839 100644 --- a/pyslurm/pydefines/slurm_defines.pxi +++ b/pyslurm/pydefines/slurm_defines.pxi @@ -157,7 +157,6 @@ PRIVATE_DATA_USAGE = slurm.PRIVATE_DATA_USAGE PRIVATE_DATA_USERS = slurm.PRIVATE_DATA_USERS PRIVATE_DATA_ACCOUNTS = slurm.PRIVATE_DATA_ACCOUNTS PRIVATE_DATA_RESERVATIONS = slurm.PRIVATE_DATA_RESERVATIONS -PRIVATE_CLOUD_NODES = slurm.PRIVATE_CLOUD_NODES PRIVATE_DATA_EVENTS = slurm.PRIVATE_DATA_EVENTS PRIORITY_RESET_NONE = slurm.PRIORITY_RESET_NONE @@ -329,7 +328,6 @@ DEBUG_FLAG_POWER = slurm.DEBUG_FLAG_POWER DEBUG_FLAG_TIME_CRAY = slurm.DEBUG_FLAG_TIME_CRAY DEBUG_FLAG_DB_ARCHIVE = slurm.DEBUG_FLAG_DB_ARCHIVE DEBUG_FLAG_DB_TRES = slurm.DEBUG_FLAG_DB_TRES -DEBUG_FLAG_ESEARCH = slurm.DEBUG_FLAG_ESEARCH DEBUG_FLAG_NODE_FEATURES = slurm.DEBUG_FLAG_NODE_FEATURES DEBUG_FLAG_FEDR = slurm.DEBUG_FLAG_FEDR DEBUG_FLAG_HETJOB = slurm.DEBUG_FLAG_HETJOB diff --git a/pyslurm/pydefines/slurm_enums.pxi b/pyslurm/pydefines/slurm_enums.pxi index 73a93c4a..38aab46c 100644 --- a/pyslurm/pydefines/slurm_enums.pxi +++ b/pyslurm/pydefines/slurm_enums.pxi @@ -52,8 +52,6 @@ WAIT_QOS_THRES = slurm.WAIT_QOS_THRES WAIT_QOS_JOB_LIMIT = slurm.WAIT_QOS_JOB_LIMIT WAIT_QOS_RESOURCE_LIMIT = slurm.WAIT_QOS_RESOURCE_LIMIT WAIT_QOS_TIME_LIMIT = slurm.WAIT_QOS_TIME_LIMIT -WAIT_BLOCK_MAX_ERR = slurm.WAIT_BLOCK_MAX_ERR -WAIT_BLOCK_D_ACTION = slurm.WAIT_BLOCK_D_ACTION WAIT_CLEANING = slurm.WAIT_CLEANING WAIT_PROLOG = slurm.WAIT_PROLOG WAIT_QOS = slurm.WAIT_QOS @@ -260,11 +258,7 @@ SWITCH_PLUGIN_CRAY = slurm.SWITCH_PLUGIN_CRAY # enum select_jobdata_type -SELECT_JOBDATA_PAGG_ID = slurm.SELECT_JOBDATA_PAGG_ID -SELECT_JOBDATA_PTR = slurm.SELECT_JOBDATA_PTR -SELECT_JOBDATA_CLEANING = slurm.SELECT_JOBDATA_CLEANING SELECT_JOBDATA_NETWORK = slurm.SELECT_JOBDATA_NETWORK -SELECT_JOBDATA_RELEASED = slurm.SELECT_JOBDATA_RELEASED # end enum select_jobdata_type diff --git a/pyslurm/pydefines/slurm_errno_enums.pxi b/pyslurm/pydefines/slurm_errno_enums.pxi index 9fa6eea6..4cfdabe5 100644 --- a/pyslurm/pydefines/slurm_errno_enums.pxi +++ b/pyslurm/pydefines/slurm_errno_enums.pxi @@ -56,8 +56,6 @@ ESLURM_NOT_SUPPORTED = slurm.ESLURM_NOT_SUPPORTED ESLURM_DISABLED = slurm.ESLURM_DISABLED ESLURM_DEPENDENCY = slurm.ESLURM_DEPENDENCY ESLURM_BATCH_ONLY = slurm.ESLURM_BATCH_ONLY -ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED = slurm.ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED -ESLURM_TASKDIST_REQUIRES_OVERCOMMIT = slurm.ESLURM_TASKDIST_REQUIRES_OVERCOMMIT ESLURM_JOB_HELD = slurm.ESLURM_JOB_HELD ESLURM_INVALID_CRED_TYPE_CHANGE = slurm.ESLURM_INVALID_CRED_TYPE_CHANGE ESLURM_INVALID_TASK_MEMORY = slurm.ESLURM_INVALID_TASK_MEMORY @@ -79,9 +77,6 @@ ESLURM_PORTS_BUSY = slurm.ESLURM_PORTS_BUSY ESLURM_PORTS_INVALID = slurm.ESLURM_PORTS_INVALID ESLURM_PROLOG_RUNNING = slurm.ESLURM_PROLOG_RUNNING ESLURM_NO_STEPS = slurm.ESLURM_NO_STEPS -ESLURM_INVALID_BLOCK_STATE = slurm.ESLURM_INVALID_BLOCK_STATE -ESLURM_INVALID_BLOCK_LAYOUT = slurm.ESLURM_INVALID_BLOCK_LAYOUT -ESLURM_INVALID_BLOCK_NAME = slurm.ESLURM_INVALID_BLOCK_NAME ESLURM_INVALID_QOS = slurm.ESLURM_INVALID_QOS ESLURM_QOS_PREEMPTION_LOOP = slurm.ESLURM_QOS_PREEMPTION_LOOP ESLURM_NODE_NOT_AVAIL = slurm.ESLURM_NODE_NOT_AVAIL @@ -141,53 +136,31 @@ ESLURM_INVALID_TIME_MIN_LIMIT = slurm.ESLURM_INVALID_TIME_MIN_LIMIT ESLURM_DEFER = slurm.ESLURM_DEFER ESLURM_CONFIGLESS_DISABLED = slurm.ESLURM_CONFIGLESS_DISABLED ESLURM_ENVIRONMENT_MISSING = slurm.ESLURM_ENVIRONMENT_MISSING -ESLURMD_PIPE_ERROR_ON_TASK_SPAWN = slurm.ESLURMD_PIPE_ERROR_ON_TASK_SPAWN ESLURMD_KILL_TASK_FAILED = slurm.ESLURMD_KILL_TASK_FAILED ESLURMD_KILL_JOB_ALREADY_COMPLETE = slurm.ESLURMD_KILL_JOB_ALREADY_COMPLETE ESLURMD_INVALID_ACCT_FREQ = slurm.ESLURMD_INVALID_ACCT_FREQ ESLURMD_INVALID_JOB_CREDENTIAL = slurm.ESLURMD_INVALID_JOB_CREDENTIAL -ESLURMD_UID_NOT_FOUND = slurm.ESLURMD_UID_NOT_FOUND -ESLURMD_GID_NOT_FOUND = slurm.ESLURMD_GID_NOT_FOUND ESLURMD_CREDENTIAL_EXPIRED = slurm.ESLURMD_CREDENTIAL_EXPIRED ESLURMD_CREDENTIAL_REVOKED = slurm.ESLURMD_CREDENTIAL_REVOKED ESLURMD_CREDENTIAL_REPLAYED = slurm.ESLURMD_CREDENTIAL_REPLAYED ESLURMD_CREATE_BATCH_DIR_ERROR = slurm.ESLURMD_CREATE_BATCH_DIR_ERROR -ESLURMD_MODIFY_BATCH_DIR_ERROR = slurm.ESLURMD_MODIFY_BATCH_DIR_ERROR -ESLURMD_CREATE_BATCH_SCRIPT_ERROR = slurm.ESLURMD_CREATE_BATCH_SCRIPT_ERROR -ESLURMD_MODIFY_BATCH_SCRIPT_ERROR = slurm.ESLURMD_MODIFY_BATCH_SCRIPT_ERROR ESLURMD_SETUP_ENVIRONMENT_ERROR = slurm.ESLURMD_SETUP_ENVIRONMENT_ERROR -ESLURMD_SHARED_MEMORY_ERROR = slurm.ESLURMD_SHARED_MEMORY_ERROR ESLURMD_SET_UID_OR_GID_ERROR = slurm.ESLURMD_SET_UID_OR_GID_ERROR -ESLURMD_SET_SID_ERROR = slurm.ESLURMD_SET_SID_ERROR -ESLURMD_CANNOT_SPAWN_IO_THREAD = slurm.ESLURMD_CANNOT_SPAWN_IO_THREAD -ESLURMD_FORK_FAILED = slurm.ESLURMD_FORK_FAILED ESLURMD_EXECVE_FAILED = slurm.ESLURMD_EXECVE_FAILED ESLURMD_IO_ERROR = slurm.ESLURMD_IO_ERROR ESLURMD_PROLOG_FAILED = slurm.ESLURMD_PROLOG_FAILED ESLURMD_EPILOG_FAILED = slurm.ESLURMD_EPILOG_FAILED -ESLURMD_SESSION_KILLED = slurm.ESLURMD_SESSION_KILLED ESLURMD_TOOMANYSTEPS = slurm.ESLURMD_TOOMANYSTEPS ESLURMD_STEP_EXISTS = slurm.ESLURMD_STEP_EXISTS ESLURMD_JOB_NOTRUNNING = slurm.ESLURMD_JOB_NOTRUNNING ESLURMD_STEP_SUSPENDED = slurm.ESLURMD_STEP_SUSPENDED ESLURMD_STEP_NOTSUSPENDED = slurm.ESLURMD_STEP_NOTSUSPENDED ESLURMD_INVALID_SOCKET_NAME_LEN = slurm.ESLURMD_INVALID_SOCKET_NAME_LEN -ESCRIPT_CHDIR_FAILED = slurm.ESCRIPT_CHDIR_FAILED -ESCRIPT_OPEN_OUTPUT_FAILED = slurm.ESCRIPT_OPEN_OUTPUT_FAILED -ESCRIPT_NON_ZERO_RETURN = slurm.ESCRIPT_NON_ZERO_RETURN -SLURM_PROTOCOL_SOCKET_IMPL_ZERO_RECV_LENGTH = slurm.SLURM_PROTOCOL_SOCKET_IMPL_ZERO_RECV_LENGTH -SLURM_PROTOCOL_SOCKET_IMPL_NEGATIVE_RECV_LENGTH = slurm.SLURM_PROTOCOL_SOCKET_IMPL_NEGATIVE_RECV_LENGTH -SLURM_PROTOCOL_SOCKET_IMPL_NOT_ALL_DATA_SENT = slurm.SLURM_PROTOCOL_SOCKET_IMPL_NOT_ALL_DATA_SENT ESLURM_PROTOCOL_INCOMPLETE_PACKET = slurm.ESLURM_PROTOCOL_INCOMPLETE_PACKET SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT = slurm.SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT = slurm.SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT ESLURM_AUTH_CRED_INVALID = slurm.ESLURM_AUTH_CRED_INVALID -ESLURM_AUTH_FOPEN_ERROR = slurm.ESLURM_AUTH_FOPEN_ERROR -ESLURM_AUTH_NET_ERROR = slurm.ESLURM_AUTH_NET_ERROR -ESLURM_AUTH_UNABLE_TO_SIGN = slurm.ESLURM_AUTH_UNABLE_TO_SIGN ESLURM_AUTH_BADARG = slurm.ESLURM_AUTH_BADARG -ESLURM_AUTH_MEMORY = slurm.ESLURM_AUTH_MEMORY -ESLURM_AUTH_INVALID = slurm.ESLURM_AUTH_INVALID ESLURM_AUTH_UNPACK = slurm.ESLURM_AUTH_UNPACK ESLURM_DB_CONNECTION = slurm.ESLURM_DB_CONNECTION ESLURM_JOBS_RUNNING_ON_ASSOC = slurm.ESLURM_JOBS_RUNNING_ON_ASSOC diff --git a/pyslurm/pydefines/slurmdb_defines.pxi b/pyslurm/pydefines/slurmdb_defines.pxi index a09be533..8cea05f0 100644 --- a/pyslurm/pydefines/slurmdb_defines.pxi +++ b/pyslurm/pydefines/slurmdb_defines.pxi @@ -60,7 +60,6 @@ SLURMDB_FS_USE_PARENT = slurm.SLURMDB_FS_USE_PARENT SLURMDB_CLASSIFIED_FLAG = slurm.SLURMDB_CLASSIFIED_FLAG SLURMDB_CLASS_BASE = slurm.SLURMDB_CLASS_BASE -CLUSTER_FLAG_A1 = slurm.CLUSTER_FLAG_A1 CLUSTER_FLAG_A2 = slurm.CLUSTER_FLAG_A2 CLUSTER_FLAG_A3 = slurm.CLUSTER_FLAG_A3 CLUSTER_FLAG_A4 = slurm.CLUSTER_FLAG_A4 diff --git a/pyslurm/pyslurm.pyx b/pyslurm/pyslurm.pyx index 9e4a0151..0fbf0ab7 100644 --- a/pyslurm/pyslurm.pyx +++ b/pyslurm/pyslurm.pyx @@ -1,4 +1,3 @@ -# cython: embedsignature=True # cython: profile=False # cython: language_level=3 # cython: auto_pickle=False @@ -57,6 +56,7 @@ import builtins as __builtin__ from pyslurm cimport slurm from pyslurm.slurm cimport xmalloc +import pyslurm.core.job include "pydefines/slurm_errno_defines.pxi" include "pydefines/slurm_errno_enums.pxi" @@ -276,8 +276,8 @@ ctypedef struct config_key_pair_t: def get_controllers(): """Get information about slurm controllers. - :return: Name of primary controller, Name of backup controllers - :rtype: `tuple` + Returns: + (tuple): Name of primary controller, Name of backup controllers """ cdef: slurm.slurm_conf_t *slurm_ctl_conf_ptr = NULL @@ -307,10 +307,11 @@ def get_controllers(): def is_controller(Host=None): """Return slurm controller status for host. - :param string Host: Name of host to check + Args: + Host (str): Name of host to check - :returns: None, primary or backup - :rtype: `string` + Returns: + (str): None, "primary" or "backup" """ control_machs = get_controllers() if not Host: @@ -328,8 +329,8 @@ def is_controller(Host=None): def slurm_api_version(): """Return the slurm API version number. - :returns: version_major, version_minor, version_micro - :rtype: `tuple` + Returns: + (tuple): A tuple of version_major, version_minor, version_micro """ cdef long version = slurm.SLURM_VERSION_NUMBER @@ -341,8 +342,8 @@ def slurm_api_version(): def slurm_load_slurmd_status(): """Issue RPC to get and load the status of Slurmd daemon. - :returns: Slurmd information - :rtype: `dict` + Returns: + (str): Slurmd information """ cdef: dict Status = {}, Status_dict = {} @@ -373,38 +374,16 @@ def slurm_load_slurmd_status(): return Status -def slurm_init(conf_file=None): - """ - This function MUST be called before any internal API calls to ensure - Slurm's internal configuration structures have been populated. - - :param string conf_file: Absolute path to the configuration file - (optional). If None (default value), libslurm automatically locates its - own configuration. - - :returns: None - :rtype: None - """ - if conf_file: - slurm.slurm_init(conf_file.encode('UTF-8')) - else: - slurm.slurm_init(NULL) - -def slurm_fini(): - """Call at process termination to cleanup internal configuration - structures. - - :returns: None - :rtype: None - """ - slurm.slurm_fini() - # # Slurm Config Class # def get_private_data_list(data): - """Return the list of enciphered Private Data configuration.""" + """Retrieve the enciphered Private Data configuration. + + Returns: + (list): Private data + """ result = [] exponent = 7 @@ -420,7 +399,7 @@ def get_private_data_list(data): return result cdef class config: - """Class to access slurm config Information.""" + """Slurm Config Information.""" cdef: slurm.slurm_conf_t *slurm_ctl_conf_ptr @@ -441,25 +420,27 @@ cdef class config: def lastUpdate(self): """Get the time (epoch seconds) the retrieved data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate def ids(self): """Return the config IDs from retrieved data. - :returns: Dictionary of config key IDs - :rtype: `dict` + Returns: + (dict): Dictionary of config key IDs """ return self.__ConfigDict.keys() def find_id(self, char *keyID=''): """Retrieve config ID data. - :param str keyID: Config key string to search - :returns: Dictionary of values for given config key - :rtype: `dict` + Args: + keyID (str): Config key string to search + + Returns: + (dict): Dictionary of values for given config key """ return self.__ConfigDict.get(keyID, {}) @@ -478,8 +459,8 @@ cdef class config: cdef int __load(self) except? -1: """Load the slurm control configuration information. - :returns: slurm error code - :rtype: `integer` + Returns: + int: slurm error code """ cdef: slurm.slurm_conf_t *slurm_ctl_conf_ptr = NULL @@ -497,8 +478,8 @@ cdef class config: def key_pairs(self): """Return a dict of the slurm control data as key pairs. - :returns: Dictionary of slurm key-pair values - :rtype: `dict` + Returns: + (dict): Dictionary of slurm key-pair values """ cdef: void *ret_list = NULL @@ -536,8 +517,8 @@ cdef class config: def get(self): """Return the slurm control configuration information. - :returns: Configuration data - :rtype: `dict` + Returns: + (dict): Configuration data """ self.__load() self.__get() @@ -545,11 +526,7 @@ cdef class config: return self.__ConfigDict cpdef dict __get(self): - """Get the slurm control configuration information. - - :returns: Configuration data - :rtype: `dict` - """ + """Get the slurm control configuration information.""" cdef: void *ret_list = NULL slurm.List config_list = NULL @@ -598,7 +575,7 @@ cdef class config: Ctl_dict['cpu_freq_govs'] = self.__Config_ptr.cpu_freq_govs Ctl_dict['cred_type'] = slurm.stringOrNone(self.__Config_ptr.cred_type, '') Ctl_dict['debug_flags'] = self.__Config_ptr.debug_flags - Ctl_dict['def_mem_per_cp'] = self.__Config_ptr.def_mem_per_cpu + Ctl_dict['def_mem_per_cpu'] = self.__Config_ptr.def_mem_per_cpu Ctl_dict['dependency_params'] = slurm.stringOrNone(self.__Config_ptr.dependency_params, '') Ctl_dict['eio_timeout'] = self.__Config_ptr.eio_timeout Ctl_dict['enforce_part_limits'] = bool(self.__Config_ptr.enforce_part_limits) @@ -645,8 +622,6 @@ cdef class config: Ctl_dict['keep_alive_time'] = slurm.int16orNone(self.__Config_ptr.keepalive_time) Ctl_dict['kill_on_bad_exit'] = bool(self.__Config_ptr.kill_on_bad_exit) Ctl_dict['kill_wait'] = self.__Config_ptr.kill_wait - Ctl_dict['launch_params'] = slurm.stringOrNone(self.__Config_ptr.launch_type, '') - Ctl_dict['launch_type'] = slurm.stringOrNone(self.__Config_ptr.launch_type, '') Ctl_dict['licenses'] = __get_licenses(self.__Config_ptr.licenses) Ctl_dict['log_fmt'] = self.__Config_ptr.log_fmt Ctl_dict['mail_domain'] = slurm.stringOrNone(self.__Config_ptr.mail_domain, '') @@ -655,7 +630,7 @@ cdef class config: Ctl_dict['max_dbd_msgs'] = self.__Config_ptr.max_dbd_msgs Ctl_dict['max_job_cnt'] = self.__Config_ptr.max_job_cnt Ctl_dict['max_job_id'] = self.__Config_ptr.max_job_id - Ctl_dict['max_mem_per_cp'] = self.__Config_ptr.max_mem_per_cpu + Ctl_dict['max_mem_per_cpu'] = self.__Config_ptr.max_mem_per_cpu Ctl_dict['max_step_cnt'] = self.__Config_ptr.max_step_cnt Ctl_dict['max_tasks_per_node'] = self.__Config_ptr.max_tasks_per_node Ctl_dict['min_job_age'] = self.__Config_ptr.min_job_age @@ -742,7 +717,6 @@ cdef class config: # TODO: slurmctld_host Ctl_dict['slurmctld_logfile'] = slurm.stringOrNone(self.__Config_ptr.slurmctld_logfile, '') Ctl_dict['slurmctld_pidfile'] = slurm.stringOrNone(self.__Config_ptr.slurmctld_pidfile, '') - Ctl_dict['slurmctld_plugstack'] = slurm.stringOrNone(self.__Config_ptr.slurmctld_plugstack, '') Ctl_dict['slurmctld_port'] = self.__Config_ptr.slurmctld_port Ctl_dict['slurmctld_port_count'] = self.__Config_ptr.slurmctld_port_count Ctl_dict['slurmctld_primary_off_prog'] = slurm.stringOrNone(self.__Config_ptr.slurmctld_primary_off_prog, '') @@ -819,7 +793,7 @@ cdef class config: cdef class partition: - """Class to access/modify Slurm Partition Information.""" + """Slurm Partition Information.""" cdef: slurm.partition_info_msg_t *_Partition_ptr @@ -838,16 +812,16 @@ cdef class partition: def lastUpdate(self): """Return time (epoch seconds) the partition data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate def ids(self): """Return the partition IDs from retrieved data. - :returns: Dictionary of partition IDs - :rtype: `dict` + Returns: + (dict): Dictionary of partition IDs """ cdef: int rc @@ -874,19 +848,23 @@ cdef class partition: def find_id(self, partID): """Get partition information for a given partition. - :param str partID: Partition key string to search - :returns: Dictionary of values for given partition - :rtype: `dict` + Args: + partID (str): Partition key string to search + + Returns: + (dict): Dictionary of values for given partition """ return self.get().get(partID) def find(self, name='', val=''): """Search for a property and associated value in the retrieved partition data. - :param str name: key string to search - :param str value: value string to match - :returns: List of IDs that match - :rtype: `list` + Args: + name (str): key string to search + val (str): value string to match + + Returns: + (list): List of IDs that match """ cdef: list retList = [] @@ -901,9 +879,10 @@ cdef class partition: return retList def print_info_msg(self, int oneLiner=0): - """Display the partition information from previous load partition method. + """Display partition information from previous load partition method. - :param int oneLiner: Display on one line (default=0) + Args: + oneLiner (int, optional): Display on one line. """ cdef: int rc @@ -926,10 +905,12 @@ cdef class partition: def delete(self, PartID): """Delete a give slurm partition. - :param string PartID: Name of slurm partition + Args: + PartID (str): Name of slurm partition - :returns: 0 for success else set the slurm error code as appropriately. - :rtype: `integer` + Returns: + (int): 0 for success else set the slurm error code as + appropriately. """ cdef: slurm.delete_part_msg_t part_msg @@ -950,8 +931,8 @@ cdef class partition: def get(self): """Get all slurm partition information - :returns: Dictionary of dictionaries whose key is the partition name. - :rtype: `dict` + Returns: + (dict): Dictionary of dictionaries whose key is the partition name. """ cdef: int rc @@ -1023,16 +1004,16 @@ cdef class partition: if record.def_mem_per_cpu & slurm.MEM_PER_CPU: if record.def_mem_per_cpu == slurm.MEM_PER_CPU: - Part_dict['def_mem_per_cp'] = "UNLIMITED" + Part_dict['def_mem_per_cpu'] = "UNLIMITED" Part_dict['def_mem_per_node'] = None else: - Part_dict['def_mem_per_cp'] = record.def_mem_per_cpu & (~slurm.MEM_PER_CPU) + Part_dict['def_mem_per_cpu'] = record.def_mem_per_cpu & (~slurm.MEM_PER_CPU) Part_dict['def_mem_per_node'] = None elif record.def_mem_per_cpu == 0: - Part_dict['def_mem_per_cp'] = None + Part_dict['def_mem_per_cpu'] = None Part_dict['def_mem_per_node'] = "UNLIMITED" else: - Part_dict['def_mem_per_cp'] = None + Part_dict['def_mem_per_cpu'] = None Part_dict['def_mem_per_node'] = record.def_mem_per_cpu if record.default_time == slurm.INFINITE: @@ -1058,16 +1039,16 @@ cdef class partition: if record.max_mem_per_cpu & slurm.MEM_PER_CPU: if record.max_mem_per_cpu == slurm.MEM_PER_CPU: - Part_dict['max_mem_per_cp'] = "UNLIMITED" + Part_dict['max_mem_per_cpu'] = "UNLIMITED" Part_dict['max_mem_per_node'] = None else: - Part_dict['max_mem_per_cp'] = record.max_mem_per_cpu & (~slurm.MEM_PER_CPU) + Part_dict['max_mem_per_cpu'] = record.max_mem_per_cpu & (~slurm.MEM_PER_CPU) Part_dict['max_mem_per_node'] = None elif record.max_mem_per_cpu == 0: - Part_dict['max_mem_per_cp'] = None + Part_dict['max_mem_per_cpu'] = None Part_dict['max_mem_per_node'] = "UNLIMITED" else: - Part_dict['max_mem_per_cp'] = None + Part_dict['max_mem_per_cpu'] = None Part_dict['max_mem_per_node'] = record.max_mem_per_cpu if record.max_nodes == slurm.INFINITE: @@ -1128,11 +1109,13 @@ cdef class partition: def update(self, dict Partition_dict): """Update a slurm partition. - :param dict partition_dict: A populated partition dictionary, - an empty one is created by create_partition_dict - :returns: 0 for success, -1 for error, and the slurm error code - is set appropriately. - :rtype: `integer` + Args: + Partition_dict (dict): A populated partition dictionary, an empty + one is created by create_partition_dict + + Returns: + (int): 0 for success, -1 for error, and the slurm error code is set + appropriately. """ cdef int errCode = slurm_update_partition(Partition_dict) return errCode @@ -1140,11 +1123,13 @@ cdef class partition: def create(self, dict Partition_dict): """Create a slurm partition. - :param dict partition_dict: A populated partition dictionary, - an empty one can be created by create_partition_dict - :returns: 0 for success or -1 for error, and the slurm error - code is set appropriately. - :rtype: `integer` + Args: + Partition_dict (dict): A populated partition dictionary, an empty + one can be created by create_partition_dict + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is + set appropriately. """ cdef int errCode = slurm_create_partition(Partition_dict) return errCode @@ -1154,8 +1139,8 @@ def create_partition_dict(): """Returns a dictionary that can be populated by the user and used for the update_partition and create_partition calls. - :returns: Empty reservation dictionary - :rtype: `dict` + Returns: + (dict): Empty reservation dictionary """ return { 'Alternate': None, @@ -1179,11 +1164,13 @@ def create_partition_dict(): def slurm_create_partition(dict partition_dict): """Create a slurm partition. - :param dict partition_dict: A populated partition dictionary, - an empty one is created by create_partition_dict - :returns: 0 for success or -1 for error, and the slurm error - code is set appropriately. - :rtype: `integer` + Args: + partition_dict (dict): A populated partition dictionary, an empty one + can be created by create_partition_dict + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is set + appropriately. """ cdef: slurm.update_part_msg_t part_msg_ptr @@ -1210,11 +1197,13 @@ def slurm_create_partition(dict partition_dict): def slurm_update_partition(dict partition_dict): """Update a slurm partition. - :param dict partition_dict: A populated partition dictionary, - an empty one is created by create_partition_dict - :returns: 0 for success, -1 for error, and the slurm error - code is set appropriately. - :rtype: `integer` + Args: + partition_dict (dict): A populated partition dictionary, an empty one + is created by create_partition_dict + + Returns: + (int): 0 for success, -1 for error, and the slurm error code is set + appropriately. """ cdef: slurm.update_part_msg_t part_msg_ptr @@ -1275,9 +1264,11 @@ def slurm_update_partition(dict partition_dict): def slurm_delete_partition(PartID): """Delete a slurm partition. - :param string PartID: Name of slurm partition - :returns: 0 for success else set the slurm error code as appropriately. - :rtype: `integer` + Args: + PartID (str): Name of slurm partition + + Returns: + (int): 0 for success else set the slurm error code as appropriately. """ cdef: slurm.delete_part_msg_t part_msg @@ -1303,9 +1294,12 @@ def slurm_delete_partition(PartID): cpdef int slurm_ping(int Controller=0) except? -1: """Issue RPC to check if slurmctld is responsive. - :param int Controller: 0 for primary (Default=0), 1 for backup, 2 for backup2, ... - :returns: 0 for success or slurm error code - :rtype: `integer` + Args: + Controller (int, optional): 0 for primary (Default=0), 1 for backup, 2 + for backup2, ... + + Returns: + 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_ping(Controller) @@ -1320,8 +1314,8 @@ cpdef int slurm_ping(int Controller=0) except? -1: cpdef int slurm_reconfigure() except? -1: """Issue RPC to have slurmctld reload its configuration file. - :returns: 0 for success or a slurm error code - :rtype: `integer` + Returns: + 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_reconfigure() @@ -1338,12 +1332,14 @@ cpdef int slurm_shutdown(uint16_t Options=0) except? -1: Both the primary and backup controller are shutdown. - :param int Options: - 0 - All slurm daemons (default) - 1 - slurmctld generates a core file - 2 - slurmctld is shutdown (no core file) - :returns: 0 for success or a slurm error code - :rtype: `integer` + Args: + Options (int, optional): + 0 - All slurm daemons (default) + 1 - slurmctld generates a core file + 2 - slurmctld is shutdown (no core file) + + Returns: + int: 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_shutdown(Options) @@ -1360,8 +1356,8 @@ cpdef int slurm_takeover(int backup_inx) except? -1: The backup controller takes over the primary controller. - :returns: 0 for success or a slurm error code - :rtype: `integer` + Returns: + int: 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_takeover(backup_inx) @@ -1372,9 +1368,12 @@ cpdef int slurm_takeover(int backup_inx) except? -1: cpdef int slurm_set_debug_level(uint32_t DebugLevel=0) except? -1: """Set the slurm controller debug level. - :param int DebugLevel: 0 (default) to 6 - :returns: 0 for success, -1 for error and set slurm error number - :rtype: `integer` + Args: + DebugLevel (int, optional): The debug level. Possible values are from + 0 to 6. + + Returns: + int: 0 for success, -1 for error and set slurm error number """ cdef int apiError = 0 cdef int errCode = slurm.slurm_set_debug_level(DebugLevel) @@ -1390,10 +1389,12 @@ cpdef int slurm_set_debugflags(uint32_t debug_flags_plus=0, uint32_t debug_flags_minus=0) except? -1: """Set the slurm controller debug flags. - :param int debug_flags_plus: debug flags to be added - :param int debug_flags_minus: debug flags to be removed - :returns: 0 for success, -1 for error and set slurm error number - :rtype: `integer` + Args: + debug_flags_plus (int, optional): Debug flags to be added. + debug_flags_minus (int, optional): Debug flags to be removed. + + Returns: + int: 0 for success, -1 for error and set slurm error number """ cdef int apiError = 0 cdef int errCode = slurm.slurm_set_debugflags(debug_flags_plus, @@ -1409,9 +1410,11 @@ cpdef int slurm_set_debugflags(uint32_t debug_flags_plus=0, cpdef int slurm_set_schedlog_level(uint32_t Enable=0) except? -1: """Set the slurm scheduler debug level. - :param int Enable: True = 0, False = 1 - :returns: 0 for success, -1 for error and set the slurm error number - :rtype: `integer` + Args: + Enable (int, optional): True = 0, False = 1 + + Returns: + int: 0 for success, -1 for error and set the slurm error number """ cdef int apiError = 0 cdef int errCode = slurm.slurm_set_schedlog_level(Enable) @@ -1431,9 +1434,11 @@ cpdef int slurm_set_schedlog_level(uint32_t Enable=0) except? -1: cpdef int slurm_suspend(uint32_t JobID=0) except? -1: """Suspend a running slurm job. - :param int JobID: Job identifier - :returns: 0 for success or a slurm error code - :rtype: `integer` + Args: + JobID (int): The job id. + + Returns: + int: 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_suspend(JobID) @@ -1446,11 +1451,13 @@ cpdef int slurm_suspend(uint32_t JobID=0) except? -1: cpdef int slurm_resume(uint32_t JobID=0) except? -1: - """Resume a running slurm job step. + """ Resume a running slurm job step. + + Args: + JobID (int): The job id. - :param int JobID: Job identifier - :returns: 0 for success or a slurm error code - :rtype: `integer` + Returns: + int: 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_resume(JobID) @@ -1465,9 +1472,11 @@ cpdef int slurm_resume(uint32_t JobID=0) except? -1: cpdef int slurm_requeue(uint32_t JobID=0, uint32_t State=0) except? -1: """Requeue a running slurm job step. - :param int JobID: Job identifier - :returns: 0 for success or a slurm error code - :rtype: `integer` + Args: + JobID (int): The job id. + + Returns: + int: 0 for success or slurm error code """ cdef int apiError = 0 cdef int errCode = slurm.slurm_requeue(JobID, State) @@ -1482,9 +1491,11 @@ cpdef int slurm_requeue(uint32_t JobID=0, uint32_t State=0) except? -1: cpdef long slurm_get_rem_time(uint32_t JobID=0) except? -1: """Get the remaining time in seconds for a slurm job step. - :param int JobID: Job identifier - :returns: Remaining time in seconds or -1 on error - :rtype: `long` + Args: + JobID (int): The job id. + + Returns: + int: Remaining time in seconds or -1 on error """ cdef int apiError = 0 cdef long errCode = slurm.slurm_get_rem_time(JobID) @@ -1499,9 +1510,11 @@ cpdef long slurm_get_rem_time(uint32_t JobID=0) except? -1: cpdef time_t slurm_get_end_time(uint32_t JobID=0) except? -1: """Get the end time in seconds for a slurm job step. - :param int JobID: Job identifier - :returns: Remaining time in seconds or -1 on error - :rtype: `integer` + Args: + JobID (int): The job id. + + Returns: + int: Remaining time in seconds or -1 on error """ cdef time_t EndTime = -1 cdef int apiError = 0 @@ -1517,9 +1530,11 @@ cpdef time_t slurm_get_end_time(uint32_t JobID=0) except? -1: cpdef int slurm_job_node_ready(uint32_t JobID=0) except? -1: """Return if a node could run a slurm job now if dispatched. - :param int JobID: Job identifier - :returns: Node Ready code - :rtype: `integer` + Args: + JobID (int): The job id. + + Returns: + int: Node ready code. """ cdef int apiError = 0 cdef int errCode = slurm.slurm_job_node_ready(JobID) @@ -1530,10 +1545,12 @@ cpdef int slurm_job_node_ready(uint32_t JobID=0) except? -1: cpdef int slurm_signal_job(uint32_t JobID=0, uint16_t Signal=0) except? -1: """Send a signal to a slurm job step. - :param int JobID: Job identifier - :param int Signal: Signal to send (default=0) - :returns: 0 for success or -1 for error and the set Slurm errno - :rtype: `integer` + Args: + JobID (int): The job id. + Signal (int, optional): Signal to send. + + Returns: + int: 0 for success or -1 for error and the set Slurm errno """ cdef int apiError = 0 cdef int errCode = slurm.slurm_signal_job(JobID, Signal) @@ -1554,11 +1571,13 @@ cpdef int slurm_signal_job_step(uint32_t JobID=0, uint32_t JobStep=0, uint16_t Signal=0) except? -1: """Send a signal to a slurm job step. - :param int JobID: Job identifier - :param int JobStep: Job step identifier - :param int Signal: Signal to send (default=0) - :returns: Error code - 0 for success or -1 for error and set the slurm errno - :rtype: `integer` + Args: + JobID (int): The job id. + JobStep: The id of the job step. + Signal (int, optional): Signal to send. + + Returns: + int: 0 for success or -1 for error and set the slurm errno. """ cdef int apiError = 0 cdef int errCode = slurm.slurm_signal_job_step(JobID, JobStep, Signal) @@ -1574,11 +1593,13 @@ cpdef int slurm_kill_job(uint32_t JobID=0, uint16_t Signal=0, uint16_t BatchFlag=0) except? -1: """Terminate a running slurm job step. - :param int JobID: Job identifier - :param int Signal: Signal to send - :param int BatchFlag: Job batch flag (default=0) - :returns: 0 for success or -1 for error and set slurm errno - :rtype: `integer` + Args: + JobID (int): The job id. + Signal (int): Signal to send. + BatchFlag (int, optional): Job batch flag. + + Returns: + int: 0 for success or -1 for error and set slurm errno """ cdef int apiError = 0 cdef int errCode = slurm.slurm_kill_job(JobID, Signal, BatchFlag) @@ -1594,11 +1615,13 @@ cpdef int slurm_kill_job_step(uint32_t JobID=0, uint32_t JobStep=0, uint16_t Signal=0) except? -1: """Terminate a running slurm job step. - :param int JobID: Job identifier - :param int JobStep: Job step identifier - :param int Signal: Signal to send (default=0) - :returns: 0 for success or -1 for error, and the slurm error code is set appropriately. - :rtype: `integer` + Args: + JobID (int): The job id. + JobStep (int): The id of the job step. + Signal (int, optional): Signal to send. + + Returns: + int: 0 for success or -1 for error, and slurm errno is set. """ cdef int apiError = 0 cdef int errCode = slurm.slurm_kill_job_step(JobID, JobStep, Signal) @@ -1614,12 +1637,14 @@ cpdef int slurm_kill_job2(const char *JobID='', uint16_t Signal=0, uint16_t BatchFlag=0, char* sibling=NULL) except? -1: """Terminate a running slurm job step. - :param const char * JobID: Job identifier - :param int Signal: Signal to send - :param int BatchFlag: Job batch flag (default=0) - :param string sibling: optional string of sibling cluster to send the message to - :returns: 0 for success or -1 for error and set slurm errno - :rtype: `integer` + Args: + JobID (str): The job id. + Signal (int): Signal to send. + BatchFlag (int, optional): Job batch flag. + sibling (str, optional): Sibling cluster to send the message to. + + Returns: + int: 0 for success or -1 for error, and slurm errno is set. """ cdef int apiError = 0 cdef int errCode = slurm.slurm_kill_job2(JobID, Signal, BatchFlag, sibling) @@ -1634,10 +1659,12 @@ cpdef int slurm_kill_job2(const char *JobID='', uint16_t Signal=0, cpdef int slurm_complete_job(uint32_t JobID=0, uint32_t JobCode=0) except? -1: """Complete a running slurm job step. - :param int JobID: Job identifier - :param int JobCode: Return code (default=0) - :returns: 0 for success or -1 for error and set slurm errno - :rtype: `integer` + Args: + JobID (int): The job id. + JobCode (int, optional): Return code for the job. + + Returns: + int: 0 for success or -1 for error and set slurm errno """ cdef int apiError = 0 cdef int errCode = slurm.slurm_complete_job(JobID, JobCode) @@ -1652,11 +1679,12 @@ cpdef int slurm_complete_job(uint32_t JobID=0, uint32_t JobCode=0) except? -1: cpdef int slurm_notify_job(uint32_t JobID=0, char* Msg='') except? -1: """Notify a message to a running slurm job step. - :param string JobID: Job identifier (default=0) - :param string Msg: Message string to send to job - :returns: 0 for success or -1 on error - :rtype: `integer` + Args: + JobID (int): The job id + Msg (str): Message to send to the job. + Returns: + int: 0 for success or -1 on error """ cdef int apiError = 0 cdef int errCode = slurm.slurm_notify_job(JobID, Msg) @@ -1671,11 +1699,12 @@ cpdef int slurm_notify_job(uint32_t JobID=0, char* Msg='') except? -1: cpdef int slurm_terminate_job_step(uint32_t JobID=0, uint32_t JobStep=0) except? -1: """Terminate a running slurm job step. - :param int JobID: Job identifier (default=0) - :param int JobStep: Job step identifier (default=0) - :returns: 0 for success or -1 for error, and the slurm error code - is set appropriately. - :rtype: `integer` + Args: + JobID (int): The job id + JobStep (int): The id of the job step + + Returns: + 0 for success or -1 for error, and the slurm error code is set """ cdef int apiError = 0 cdef int errCode = slurm.slurm_terminate_job_step(JobID, JobStep) @@ -1692,7 +1721,7 @@ cpdef int slurm_terminate_job_step(uint32_t JobID=0, uint32_t JobStep=0) except? cdef class job: - """Class to access/modify Slurm Job Information.""" + """Slurm Job Information.""" cdef: slurm.job_info_msg_t *_job_ptr @@ -1714,26 +1743,25 @@ cdef class job: def lastUpdate(self): """Get the time (epoch seconds) the job data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate def lastBackfill(self): """Get the time (epoch seconds) of last backfilling run. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastBackfill - cpdef ids(self): + def ids(self): """Return the job IDs from retrieved data. - :returns: Dictionary of job IDs - :rtype: `dict` + Returns: + (dict): Dictionary of job IDs """ - cdef: int rc int apiError @@ -1756,10 +1784,12 @@ cdef class job: def find(self, name='', val=''): """Search for a property and associated value in the retrieved job data. - :param str name: key string to search - :param str value: value string to match - :returns: List of IDs that match - :rtype: `list` + Args: + name (str): key string to search + val (str): value string to match + + Returns: + (list): List of IDs that match """ cdef: list retList = [] @@ -1774,44 +1804,66 @@ cdef class job: return retList - def find_id(self, jobid): - """Retrieve job ID data. + cdef _load_single_job(self, jobid): + """Uses slurm_load_job to setup the self._job_ptr for a single job given by the jobid. + After calling this, the job pointer can be used in other methods + to operate on the informations of the job. - This method accepts both string and integer formats of the jobid. It - calls slurm_xlate_job_id() to convert the jobid appropriately. - This works for single jobs and job arrays. + This method accepts both string and integer formate of the jobid. It + calls slurm_xlate_job_id to convert the jobid appropriately. - :param str jobid: Job id key string to search - :returns: List of dictionary of values for given job id - :rtype: `list` + Raises an value error if the jobid does not correspond to a existing job. + + Args: + jobid (str): The job id. """ cdef: int apiError int rc + # jobid can be given as int or string if isinstance(jobid, int) or isinstance(jobid, long): jobid = str(jobid).encode("UTF-8") else: jobid = jobid.encode("UTF-8") - + # convert jobid appropriately for slurm jobid_xlate = slurm.slurm_xlate_job_id(jobid) + + # load the job which sets the self._job_ptr pointer rc = slurm.slurm_load_job(&self._job_ptr, jobid_xlate, self._ShowFlags) - if rc == slurm.SLURM_SUCCESS: - return list(self.get_job_ptr().values()) - else: + if rc != slurm.SLURM_SUCCESS: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) + def find_id(self, jobid): + """Retrieve job ID data. + + This method accepts both string and integer formats of the jobid. + This works for single jobs and job arrays. It uses the internal + helper _load_single_job to do slurm_load_job. If the job corresponding + to the jobid does not exist, a ValueError will be raised. + + Args: + jobid (str): Job id key string to search + + Returns: + (list): List of dictionary of values for given job id + """ + self._load_single_job(jobid) + return list(self.get_job_ptr().values()) + def find_user(self, user): """Retrieve a user's job data. - + This method calls slurm_load_job_user to get all job_table records associated with a specific user. - :param str user: User string to search - :returns: Dictionary of values for all user's jobs - :rtype: `dict` + Args: + user (str): User string to search + + Returns: + (dict): Dictionary of values for all user's jobs """ cdef: int apiError @@ -1834,14 +1886,15 @@ cdef class job: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) - cpdef get(self): + def get(self): """Get all slurm jobs information. - This method calls slurm_load_jobs to get job_table records for all jobs + This method calls slurm_load_jobs to get job_table records for all + jobs - :returns: Data where key is the job name, each entry contains a - dictionary of job attributes - :rtype: `dict` + Returns: + (dict): Data where key is the job name, each entry contains a + dictionary of job attributes """ cdef: int apiError @@ -1858,8 +1911,8 @@ cdef class job: cdef dict get_job_ptr(self): """Convert all job arrays in buffer to dictionary. - :returns: dictionary of job attributes - :rtype: `dict` + Returns: + dict: dictionary of job attributes """ cdef: char time_str[32] @@ -2044,13 +2097,13 @@ cdef class job: if self._record.pn_min_memory & slurm.MEM_PER_CPU: self._record.pn_min_memory &= (~slurm.MEM_PER_CPU) - Job_dict['mem_per_cp'] = True - Job_dict['min_memory_cp'] = self._record.pn_min_memory + Job_dict['mem_per_cpu'] = True + Job_dict['min_memory_cpu'] = self._record.pn_min_memory Job_dict['mem_per_node'] = False Job_dict['min_memory_node'] = None else: - Job_dict['mem_per_cp'] = False - Job_dict['min_memory_cp'] = None + Job_dict['mem_per_cpu'] = False + Job_dict['min_memory_cpu'] = None Job_dict['mem_per_node'] = True Job_dict['min_memory_node'] = self._record.pn_min_memory @@ -2193,9 +2246,11 @@ cdef class job: cpdef int __cpus_allocated_on_node_id(self, int nodeID=0): """Get the number of cpus allocated to a job on a node by node name. - :param int nodeID: Numerical node ID - :returns: Num of CPUs allocated to job on this node or -1 on error - :rtype: `integer` + Args: + nodeID (int): Numerical node ID + + Returns: + int: Num of CPUs allocated to job on this node or -1 on error """ cdef: slurm.job_resources_t *job_resrcs_ptr = self._record.job_resrcs @@ -2206,9 +2261,11 @@ cdef class job: cdef int __cpus_allocated_on_node(self, char* nodeName=''): """Get the number of cpus allocated to a slurm job on a node by node name. - :param string nodeName: Name of node - :returns: Num of CPUs allocated to job on this node or -1 on error - :rtype: `integer` + Args: + nodeName (str): Name of the node + + Returns: + Num of CPUs allocated to job on this node or -1 on error """ cdef: slurm.job_resources_t *job_resrcs_ptr = self._record.job_resrcs @@ -2219,9 +2276,11 @@ cdef class job: cdef list __cpus_allocated_list_on_node(self, char* nodeName=''): """Get a list of cpu ids allocated to current slurm job on a node by node name. - :param string nodeName: Name of node - :returns: list of allocated cpus (empty, if nothing found or error) - :rtype: `list` + Args: + nodeName (str): Name of the node + + Returns: + list of allocated cpus (empty, if nothing found or error) """ cdef: int error = 0 @@ -2244,9 +2303,11 @@ cdef class job: def __unrange(self, bit_str): """converts a string describing a bitmap (from slurm_job_cpus_allocated_str_on_node()) to a list. - :param string bit_str: string describing a bitmap (e.g. "0-30,45,50-60") - :returns: list referring to bitmap (empty if not succesful) - :rtype: `list` + Args: + bit_str (str): string describing a bitmap (e.g. "0-30,45,50-60") + + Returns: + (list): List referring to bitmap (empty if not succesful) """ r_list = [] @@ -2264,16 +2325,16 @@ cdef class job: return r_list cpdef __free(self): - """Release the storage generated by the slurm_get_job_steps function.""" + """Release storage generated by the slurm_get_job_steps function.""" if self._job_ptr is not NULL: slurm.slurm_free_job_info_msg(self._job_ptr) - cpdef print_job_info_msg(self, int oneLiner=0): + def print_job_info_msg(self, int oneLiner=0): """Print the data structure describing all job step records. - The job step records are loaded by the slurm_get_job_steps function. - - :param int Flag: Default=0 + Args: + oneLiner (int, optional): Whether to print the data in one line or + not """ cdef: int rc @@ -2291,12 +2352,16 @@ cdef class job: raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) def slurm_job_batch_script(self, jobid): - """ - Retrieve the batch script for a given jobid. + """Return the contents of the batch-script for a Job. + + The string returned also includes all the "\\n" characters (new-line). + + Args: + jobid (Union[str, int]): ID of the Job for which the script should + be retrieved. - :param str jobid: Job id key string to search - :returns: String output of a jobid's batch script - :rtype: `str` + Returns: + (str): The content of the batch script. """ if isinstance(jobid, int) or isinstance(jobid, long): jobid = str(jobid).encode("UTF-8") @@ -2304,7 +2369,8 @@ cdef class job: jobid = jobid.encode("UTF-8") jobid_xlate = slurm.slurm_xlate_job_id(jobid) - return slurm.slurm_job_batch_script(slurm.stdout, jobid_xlate) + + return pyslurm.core.job.Job(jobid_xlate).get_batch_script() cdef int fill_job_desc_from_opts(self, dict job_opts, slurm.job_desc_msg_t *desc): """ @@ -2497,8 +2563,8 @@ cdef class job: if job_opts.get("realmem"): desc.pn_min_memory = job_opts.get("realmem") - elif job_opts.get("mem_per_cp"): - desc.pn_min_memory = job_opts.get("mem_per_cp") | slurm.MEM_PER_CPU + elif job_opts.get("mem_per_cpu"): + desc.pn_min_memory = job_opts.get("mem_per_cpu") | slurm.MEM_PER_CPU if job_opts.get("tmpdisk"): desc.pn_min_tmp_disk = job_opts.get("tmpdisk") @@ -2672,7 +2738,9 @@ cdef class job: return 0 cdef int envcount(self, char **env): - """Return the number of elements in the environment `env`.""" + """ + Return the number of elements in the environment `env`. + """ cdef int envc = 0 while (env[envc] != NULL): envc += 1 @@ -2720,7 +2788,15 @@ cdef class job: def submit_batch_job(self, job_opts): """Submit batch job. - * make sure options match sbatch command line opts and not struct member names. + + Make sure options match sbatch command line opts and not struct member + names. + + Args: + job_opts (dict): Job information. + + Returns: + (int): The job id of the submitted job. """ cdef: slurm.job_desc_msg_t desc @@ -2879,15 +2955,50 @@ cdef class job: #return "Submitted batch job %s" % job_id return job_id + def wait_finished(self, jobid): + """Block until the job given by the jobid finishes. + + This works for single jobs, as well as job arrays. + + Args: + jobid (int): The job id of the slurm job. + To reference a job with job array set, use the first/"master" + jobid (the same as given by squeue) + + Returns: + (int): The exit code of the slurm job. + """ + exit_status = -9999 + complete = False + while not complete: + complete = True + p_time.sleep(5) + self._load_single_job(jobid) + for i in range(0, self._job_ptr.record_count): + self._record = &self._job_ptr.job_array[i] + if IS_JOB_COMPLETED(self._job_ptr.job_array[i]): + exit_status_arrayjob = None + if WIFEXITED(self._record.exit_code): + exit_status_arrayjob = WEXITSTATUS(self._record.exit_code) + else: + exit_status_arrayjob = 1 + # set exit code to the highest of all jobs in job array + exit_status = max([exit_status, exit_status_arrayjob]) + else: + # go on with the next interation, unil all jobs in array are completed + complete = False + slurm.slurm_free_job_info_msg(self._job_ptr) + return exit_status + def slurm_pid2jobid(uint32_t JobPID=0): """Get the slurm job id from a process id. - :param int JobPID: Job process id - :returns: 0 for success or a slurm error code - :rtype: `integer` - :returns: Job Identifier - :rtype: `integer` + Args: + JobPID (int): Job process id + + Returns: + int: 0 for success or a slurm error code """ cdef: uint32_t JobID = 0 @@ -2907,9 +3018,11 @@ cdef secs2time_str(uint32_t time): This method converts time in seconds (86400) to Slurm's string format (1-00:00:00). - :param int time: time in seconds - :returns: time string - :rtype: `str` + Args: + time (int): Time in seconds + + Returns: + str: Slurm time string. """ cdef: char *time_str @@ -2938,9 +3051,11 @@ cdef mins2time_str(uint32_t time): This method converts time in minutes (14400) to Slurm's string format (10-00:00:00). - :param int time: time in minutes - :returns: time string - :rtype: `str` + Args: + time (int): Time in minutes + + Returns: + str: Slurm time string. """ cdef: double days, hours, minutes, seconds @@ -2984,8 +3099,8 @@ class SlurmError(Exception): def slurm_get_errno(): """Return the slurm error as set by a slurm API call. - :returns: slurm error number - :rtype: `integer` + Returns: + (int): Current slurm error number """ cdef int errNum = slurm.slurm_get_errno() @@ -2995,9 +3110,11 @@ def slurm_get_errno(): def slurm_strerror(int Errno=0): """Return slurm error message represented by a given slurm error number. - :param int Errno: slurm error number. - :returns: slurm error string - :rtype: `string` + Args: + Errno (int): slurm error number. + + Returns: + (str): slurm error string """ cdef char* errMsg = slurm.slurm_strerror(Errno) @@ -3007,18 +3124,20 @@ def slurm_strerror(int Errno=0): def slurm_seterrno(int Errno=0): """Set the slurm error number. - :param int Errno: slurm error number + Args: + Errno (int): slurm error number """ slurm.slurm_seterrno(Errno) def slurm_perror(char* Msg=''): """Print to standard error the supplied header. - + Header is followed by a colon, followed by a text description of the last Slurm error code generated. - :param string Msg: slurm program error String + Args: + Msg (str): slurm program error String """ slurm.slurm_perror(Msg) @@ -3030,7 +3149,7 @@ def slurm_perror(char* Msg=''): cdef class node: - """Class to access/modify/update Slurm Node Information.""" + """Access/Modify/Update Slurm Node Information.""" cdef: slurm.node_info_msg_t *_Node_ptr @@ -3051,16 +3170,16 @@ cdef class node: def lastUpdate(self): """Return last time (epoch seconds) the node data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate - cpdef ids(self): + def ids(self): """Return the node IDs from retrieved data. - :returns: Dictionary of node IDs - :rtype: `dict` + Returns: + (dict): Dictionary of node IDs """ cdef: int rc @@ -3084,17 +3203,19 @@ cdef class node: def find_id(self, nodeID): """Get node information for a given node. - :param str nodeID: Node key string to search - :returns: Dictionary of values for given node - :rtype: `dict` + Args: + nodeID (str): Node key string to search + + Returns: + (dict): Dictionary of values for given node """ return list(self.get_node(nodeID).values())[0] def get(self): """Get all slurm node information. - :returns: Dictionary of dictionaries whose key is the node name. - :rtype: `dict` + Returns: + (dict): Dictionary of dictionaries whose key is the node name. """ return self.get_node(None) @@ -3105,9 +3226,11 @@ cdef class node: def get_node(self, nodeID): """Get single slurm node information. - :param str nodeID: Node key string to search. Default NULL. - :returns: Dictionary of give node info data. - :rtype: `dict` + Args: + nodeID (str): Node key string to search. Default NULL. + + Returns: + (dict): Dictionary of node info data. """ cdef: int rc @@ -3330,21 +3453,24 @@ cdef class node: return self._NodeDict - cpdef update(self, dict node_dict): + def update(self, dict node_dict): """Update slurm node information. - :param dict node_dict: A populated node dictionary, an empty one is - created by create_node_dict - :returns: 0 for success or -1 for error, and the slurm error code - is set appropriately. - :rtype: `integer` + Args: + node_dict (dict): A populated node dictionary, an empty one is + created by create_node_dict + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is + set appropriately. """ return slurm_update_node(node_dict) - cpdef print_node_info_msg(self, int oneLiner=False): + def print_node_info_msg(self, int oneLiner=False): """Output information about all slurm nodes. - :param int oneLiner: Print on one line - False (Default) or True + Args: + oneLiner (int, optional): Print on one line """ cdef: int rc @@ -3365,11 +3491,13 @@ cdef class node: def slurm_update_node(dict node_dict): """Update slurm node information. - :param dict node_dict: A populated node dictionary, an empty one is - created by create_node_dict - :returns: 0 for success or -1 for error, and the slurm error code - is set appropriately. - :rtype: `integer` + Args: + node_dict (dict): A populated node dictionary, an empty one is created + by create_node_dict + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is set + appropriately. """ cdef: slurm.update_node_msg_t node_msg @@ -3416,12 +3544,12 @@ def slurm_update_node(dict node_dict): def create_node_dict(): """Return a an update_node dictionary - + This dictionary can be populated by the user and used for the update_node call. - :returns: Empty node dictionary - :rtype: `dict` + Returns: + (dict): Empty node dictionary """ return { 'node_names': None, @@ -3439,7 +3567,7 @@ def create_node_dict(): cdef class jobstep: - """Class to access/modify Slurm Jobstep Information.""" + """Access/Modify Slurm Jobstep Information.""" cdef: slurm.time_t _lastUpdate @@ -3466,8 +3594,8 @@ cdef class jobstep: def lastUpdate(self): """Get the time (epoch seconds) the jobstep data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate @@ -3495,11 +3623,11 @@ cdef class jobstep: return retDict - cpdef get(self): + def get(self): """Get slurm jobstep information. - :returns: Data whose key is the jobstep ID. - :rtype: `dict` + Returns: + (dict): Data whose key is the jobstep ID. """ self.__get() @@ -3511,12 +3639,6 @@ cdef class jobstep: This method loads details about job steps that satisfy the job_id and/or step_id specifications provided if the data has been updated since the update_time specified. - - :param int JobID: Job Identifier - :param int StepID: Jobstep Identifier - :param int ShowFlags: Display flags (Default=0) - :returns: Data whose key is the job and step ID - :rtype: `dict` """ cdef: slurm.job_step_info_response_msg_t *job_step_info_ptr = NULL @@ -3633,13 +3755,15 @@ cdef class jobstep: self._JobStepDict = Steps - cpdef layout(self, uint32_t JobID=0, uint32_t StepID=0): + def layout(self, uint32_t JobID=0, uint32_t StepID=0): """Get the slurm job step layout from a given job and step id. - :param int JobID: slurm job id (Default=0) - :param int StepID: slurm step id (Default=0) - :returns: List of job step layout. - :rtype: `list` + Args: + JobID (int): The job id. + StepID (int): The id of the job step. + + Returns: + (list): List of job step layout. """ cdef: slurm.slurm_step_id_t step_id @@ -3693,7 +3817,7 @@ cdef class jobstep: cdef class hostlist: - """Wrapper class for Slurm hostlist functions.""" + """Wrapper for Slurm hostlist functions.""" cdef slurm.hostlist_t hl @@ -3722,14 +3846,14 @@ cdef class hostlist: def count(self): return slurm.slurm_hostlist_count(self.hl) - cpdef get_list(self): + def get_list(self): """Get the list of hostnames composing the hostlist. - For example with a hostlist created with "tux[1-3]" -> [ 'tux1', tux2', - 'tux3' ]. + For example with a hostlist created with "tux[1-3]" -> [ 'tux1', + tux2', 'tux3' ]. - :returns: the list of hostnames in case of success or None on error. - :rtype: list + Returns: + (list): The list of hostnames in case of success or None on error. """ cdef: slurm.hostlist_t hlist = NULL @@ -3817,9 +3941,12 @@ cdef class trigger: def set(self, dict trigger_dict): """Set or create a slurm trigger. - :param dict trigger_dict: A populated dictionary of trigger information - :returns: 0 for success or -1 for error, and the slurm error code is set appropriately. - :rtype: `integer` + Args: + trigger_dict (dict): A populated dictionary of trigger information + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is + set appropriately. """ cdef: slurm.trigger_info_t trigger_set @@ -3893,8 +4020,8 @@ cdef class trigger: def get(self): """Get the information on slurm triggers. - :returns: Where key is the trigger ID - :rtype: `dict` + Returns: + (dict): Dictionary, where keys are the trigger IDs """ cdef: slurm.trigger_info_msg_t *trigger_get = NULL @@ -3924,11 +4051,13 @@ cdef class trigger: def clear(self, TriggerID=0, UserID=slurm.NO_VAL, ID=0): """Clear or remove a slurm trigger. - :param string TriggerID: Trigger Identifier - :param string UserID: User Identifier - :param string ID: Job Identifier - :returns: 0 for success or a slurm error code - :rtype: `integer` + Args: + TriggerID (str): Trigger Identifier + UserID (str): User Identifier + ID (str): Job Identifier + + Returns: + (int): 0 for success or a slurm error code """ cdef: slurm.trigger_info_t trigger_clear @@ -3959,7 +4088,7 @@ cdef class trigger: cdef class reservation: - """Class to access/update/delete slurm reservation Information.""" + """Access/update/delete slurm reservation Information.""" cdef: slurm.reserve_info_msg_t *_Res_ptr @@ -3979,35 +4108,39 @@ cdef class reservation: def lastUpdate(self): """Get the time (epoch seconds) the reservation data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): epoch seconds """ return self._lastUpdate def ids(self): """Return a list of reservation IDs from retrieved data. - :returns: Dictionary of reservation IDs - :rtype: `dict` + Returns: + (dict): Dictionary of reservation IDs """ return self._ResDict.keys() def find_id(self, resID): """Retrieve reservation ID data. - :param str resID: Reservation key string to search - :returns: Dictionary of values for given reservation key - :rtype: `dict` + Args: + resID (str): Reservation key string to search + + Returns: + (dict): Dictionary of values for given reservation key """ return self._ResDict.get(resID, {}) def find(self, name='', val=''): """Search for property and associated value in reservation data. - :param str name: key string to search - :param str value: value string to match - :returns: List of IDs that match - :rtype: `list` + Args: + name (str): key string to search + val (str): value string to match + + Returns: + (list): List of IDs that match """ # [ key for key, value in self._ResDict.items() if self._ResDict[key]['state'] == 'error'] @@ -4024,7 +4157,6 @@ cdef class reservation: cdef int __load(self) except? -1: """Load slurm reservation information.""" - cdef: slurm.reserve_info_msg_t *new_reserve_info_ptr = NULL slurm.time_t last_time = NULL @@ -4061,8 +4193,8 @@ cdef class reservation: def get(self): """Get slurm reservation information. - :returns: Data whose key is the Reservation ID - :rtype: `dict` + Returns: + (dict): Data whose key is the Reservation ID """ self.load() self.__get() @@ -4103,29 +4235,43 @@ cdef class reservation: self._ResDict = Reservations def create(self, dict reservation_dict={}): - """Create slurm reservation.""" + """Create slurm reservation. + + Args: + reservation_dict (dict): Reservation information + + Returns: + (int): 0 for success or a slurm error code + """ return slurm_create_reservation(reservation_dict) def delete(self, ResID): """Delete slurm reservation. - :returns: 0 for success or a slurm error code - :rtype: `integer` + Args: + ResID (int): ID of the reservation to delete + + Returns: + (int): 0 for success or a slurm error code """ return slurm_delete_reservation(ResID) def update(self, dict reservation_dict={}): """Update a slurm reservation attributes. - :returns: 0 for success or -1 for error, and the slurm error code is set appropriately. - :rtype: `integer` + Args: + reservation_dict (dict): Reservation information + + Returns: + (int): 0 for success or -1 for error and slurm error code is set """ return slurm_update_reservation(reservation_dict) def print_reservation_info_msg(self, int oneLiner=0): """Output information about all slurm reservations. - :param int Flags: Print on one line - 0 (Default) or 1 + Args: + oneLiner (int, optional): Print reservation info in one-line """ if self._Res_ptr is not NULL: slurm.slurm_print_reservation_info_msg(slurm.stdout, self._Res_ptr, oneLiner) @@ -4139,11 +4285,12 @@ cdef class reservation: def slurm_create_reservation(dict reservation_dict={}): """Create a slurm reservation. - :param dict reservation_dict: A populated reservation dictionary, - an empty one is created by create_reservation_dict - :returns: 0 for success or -1 for error, and the slurm error code - is set appropriately. - :rtype: `string` + Args: + reservation_dict (dict): A populated reservation dictionary, an empty + one is created by create_reservation_dict + + Returns: + (str): The name of the reservation created. """ cdef: slurm.resv_desc_msg_t resv_msg @@ -4239,11 +4386,13 @@ def slurm_create_reservation(dict reservation_dict={}): def slurm_update_reservation(dict reservation_dict={}): """Update a slurm reservation. - :param dict reservation_dict: A populated reservation dictionary, - an empty one is created by create_reservation_dict - :returns: 0 for success or -1 for error, and the slurm error code - is set appropriately. - :rtype: `integer` + Args: + reservation_dict (dict): A populated reservation dictionary, an empty + one is created by create_reservation_dict + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is set + appropriately. """ cdef: slurm.resv_desc_msg_t resv_msg @@ -4262,6 +4411,9 @@ def slurm_update_reservation(dict reservation_dict={}): if time_value != -1: resv_msg.start_time = time_value + if reservation_dict.get('end_time'): + resv_msg.end_time = reservation_dict['end_time'] + if reservation_dict.get('duration'): resv_msg.duration = reservation_dict.get('duration') @@ -4320,6 +4472,9 @@ def slurm_update_reservation(dict reservation_dict={}): resv_msg.flags = int_value errCode = slurm.slurm_update_reservation(&resv_msg) + if errCode != 0: + apiError = slurm.slurm_get_errno() + raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) return errCode @@ -4327,9 +4482,12 @@ def slurm_update_reservation(dict reservation_dict={}): def slurm_delete_reservation(ResID): """Delete a slurm reservation. - :param string ResID: Reservation Identifier - :returns: 0 for success or -1 for error, and the slurm error code is set appropriately. - :rtype: `integer` + Args: + ResID (str): Reservation Identifier + + Returns: + (int): 0 for success or -1 for error, and the slurm error code is set + appropriately. """ cdef slurm.reservation_name_msg_t resv_msg @@ -4351,12 +4509,12 @@ def slurm_delete_reservation(ResID): def create_reservation_dict(): """Create and empty dict for use with create_reservation method. - + Returns a dictionary that can be populated by the user an used for the update_reservation and create_reservation calls. - :returns: Empty Reservation dictionary - :rtype: `dict` + Returns: + (dict): Empty Reservation dictionary """ return { 'start_time': 0, @@ -4380,8 +4538,7 @@ def create_reservation_dict(): cdef class topology: - """Class to access/update slurm topology information.""" - + """Access/update slurm topology information.""" cdef: slurm.topo_info_response_msg_t *_topo_info_ptr dict _TopoDict @@ -4396,8 +4553,8 @@ cdef class topology: def lastUpdate(self): """Get the time (epoch seconds) the retrieved data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate @@ -4429,8 +4586,8 @@ cdef class topology: def get(self): """Get slurm topology information. - :returns: Dictionary whose key is the Topology ID - :rtype: `dict` + Returns: + (dict): Dictionary whose key is the Topology ID """ self.__load() self.__get() @@ -4464,9 +4621,8 @@ cdef class topology: self._print_topo_info_msg() cpdef _print_topo_info_msg(self): - """Output information about topology based upon message as loaded using slurm_load_topo. - - :param int Flags: Print on one line - False (Default), True + """Output information about topology based upon message as loaded + using slurm_load_topo. """ if self._topo_info_ptr is not NULL: @@ -4481,6 +4637,7 @@ cdef class topology: cdef class statistics: + """Slurm Controller statistics.""" cdef: slurm.stats_info_request_msg_t _req @@ -4494,10 +4651,11 @@ cdef class statistics: def __dealloc__(self): pass - cpdef dict get(self): + def get(self): """Get slurm statistics information. - :rtype: `dict` + Returns: + (dict): Slurm Controller statistics """ cdef: int errCode @@ -4553,7 +4711,10 @@ cdef class statistics: rpc_type_stats = {} for i in range(self._buf.rpc_type_size): - rpc_type = self.__rpc_num2string(self._buf.rpc_type_id[i]) + try: + rpc_type = self.__rpc_num2string(self._buf.rpc_type_id[i]) + except KeyError: + rpc_type = "UNKNOWN" rpc_type_stats[rpc_type] = {} rpc_type_stats[rpc_type]['id'] = self._buf.rpc_type_id[i] rpc_type_stats[rpc_type]['count'] = self._buf.rpc_type_cnt[i] @@ -4590,10 +4751,10 @@ cdef class statistics: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) - cpdef int reset(self): + def reset(self): """Reset scheduling statistics - This method required root privileges. + This method requires root privileges. """ cdef: int apiError @@ -4615,10 +4776,10 @@ cdef class statistics: 1001: "REQUEST_NODE_REGISTRATION_STATUS", 1002: "MESSAGE_NODE_REGISTRATION_STATUS", 1003: "REQUEST_RECONFIGURE", - 1004: "RESPONSE_RECONFIGURE", + 1004: "REQUEST_RECONFIGURE_WITH_CONFIG", 1005: "REQUEST_SHUTDOWN", - 1006: "REQUEST_SHUTDOWN_IMMEDIATE", - 1007: "RESPONSE_SHUTDOWN", + 1006: "DEFUNCT_RPC_1006", + 1007: "DEFUNCT_RPC_1007", 1008: "REQUEST_PING", 1009: "REQUEST_CONTROL", 1010: "REQUEST_SET_DEBUG_LEVEL", @@ -4635,9 +4796,10 @@ cdef class statistics: 1021: "REQUEST_LICENSE_INFO", 1022: "RESPONSE_LICENSE_INFO", 1023: "REQUEST_SET_FS_DAMPENING_FACTOR", + 1024: "RESPONSE_NODE_REGISTRATION", - 1400: "DBD_MESSAGES_START", 1433: "PERSIST_RC", + 2000: "DBD_MESSAGES_END", 2001: "REQUEST_BUILD_INFO", @@ -4650,12 +4812,12 @@ cdef class statistics: 2008: "RESPONSE_NODE_INFO", 2009: "REQUEST_PARTITION_INFO", 2010: "RESPONSE_PARTITION_INFO", - 2011: "REQUEST_ACCTING_INFO", - 2012: "RESPONSE_ACCOUNTING_INFO", + 2011: "DEFUNCT_RPC_2011", + 2012: "DEFUNCT_RPC_2012", 2013: "REQUEST_JOB_ID", 2014: "RESPONSE_JOB_ID", - 2015: "REQUEST_BLOCK_INFO", - 2016: "RESPONSE_BLOCK_INFO", + 2015: "REQUEST_CONFIG", + 2016: "RESPONSE_CONFIG", 2017: "REQUEST_TRIGGER_SET", 2018: "REQUEST_TRIGGER_GET", 2019: "REQUEST_TRIGGER_CLEAR", @@ -4672,22 +4834,22 @@ cdef class statistics: 2030: "REQUEST_TRIGGER_PULL", 2031: "REQUEST_FRONT_END_INFO", 2032: "RESPONSE_FRONT_END_INFO", - 2033: "REQUEST_SPANK_ENVIRONMENT", - 2034: "RESPONCE_SPANK_ENVIRONMENT", + 2033: "DEFUNCT_RPC_2033", + 2034: "DEFUNCT_RPC_2034", 2035: "REQUEST_STATS_INFO", 2036: "RESPONSE_STATS_INFO", 2037: "REQUEST_BURST_BUFFER_INFO", 2038: "RESPONSE_BURST_BUFFER_INFO", 2039: "REQUEST_JOB_USER_INFO", 2040: "REQUEST_NODE_INFO_SINGLE", - 2041: "REQUEST_POWERCAP_INFO", - 2042: "RESPONSE_POWERCAP_INFO", + 2041: "DEFUNCT_RPC_2041", + 2042: "DEFUNCT_RPC_2042", 2043: "REQUEST_ASSOC_MGR_INFO", 2044: "RESPONSE_ASSOC_MGR_INFO", - 2045: "REQUEST_SICP_INFO_DEFUNCT", - 2046: "RESPONSE_SICP_INFO_DEFUNCT", - 2047: "REQUEST_LAYOUT_INFO", - 2048: "RESPONSE_LAYOUT_INFO", + 2045: "DEFUNCT_RPC_2045", + 2046: "DEFUNCT_RPC_2046", + 2047: "DEFUNCT_RPC_2047", + 2048: "DEFUNCT_RPC_2048", 2049: "REQUEST_FED_INFO", 2050: "RESPONSE_FED_INFO", 2051: "REQUEST_BATCH_SCRIPT", @@ -4697,6 +4859,11 @@ cdef class statistics: 2055: "REQUEST_BURST_BUFFER_STATUS", 2056: "RESPONSE_BURST_BUFFER_STATUS", + 2200: "REQUEST_CRONTAB", + 2201: "RESPONSE_CRONTAB", + 2202: "REQUEST_UPDATE_CRONTAB", + 2203: "RESPONSE_UPDATE_CRONTAB", + 3001: "REQUEST_UPDATE_JOB", 3002: "REQUEST_UPDATE_NODE", 3003: "REQUEST_CREATE_PARTITION", @@ -4706,10 +4873,12 @@ cdef class statistics: 3007: "RESPONSE_CREATE_RESERVATION", 3008: "REQUEST_DELETE_RESERVATION", 3009: "REQUEST_UPDATE_RESERVATION", - 3010: "REQUEST_UPDATE_BLOCK", + 3010: "DEFUNCT_RPC_3010", 3011: "REQUEST_UPDATE_FRONT_END", - 3012: "REQUEST_UPDATE_LAYOUT", - 3013: "REQUEST_UPDATE_POWERCAP", + 3012: "DEFUNCT_RPC_3012", + 3013: "DEFUNCT_RPC_3013", + 3014: "REQUEST_DELETE_NODE", + 3015: "REQUEST_CREATE_NODE", 4001: "REQUEST_RESOURCE_ALLOCATION", 4002: "RESPONSE_RESOURCE_ALLOCATION", @@ -4717,50 +4886,52 @@ cdef class statistics: 4004: "RESPONSE_SUBMIT_BATCH_JOB", 4005: "REQUEST_BATCH_JOB_LAUNCH", 4006: "REQUEST_CANCEL_JOB", - 4007: "RESPONSE_CANCEL_JOB", - 4008: "REQUEST_JOB_RESOURCE", - 4009: "RESPONSE_JOB_RESOURCE", - 4010: "REQUEST_JOB_ATTACH", - 4011: "RESPONSE_JOB_ATTACH", + 4007: "DEFUNCT_RPC_4007", + 4008: "DEFUNCT_RPC_4008", + 4009: "DEFUNCT_RPC_4009", + 4010: "DEFUNCT_RPC_4010", + 4011: "DEFUNCT_RPC_4011", 4012: "REQUEST_JOB_WILL_RUN", 4013: "RESPONSE_JOB_WILL_RUN", 4014: "REQUEST_JOB_ALLOCATION_INFO", 4015: "RESPONSE_JOB_ALLOCATION_INFO", - 4016: "DEFUNCT_REQUEST_JOB_ALLOCATION_INFO_LITE", - 4017: "DEFUNCT_RESPONSE_JOB_ALLOCATION_INFO_LITE", - 4018: "REQUEST_UPDATE_JOB_TIME", + 4016: "DEFUNCT_RPC_4017", + 4017: "DEFUNCT_RPC_4018", + 4018: "DEFUNCT_RPC_4019", 4019: "REQUEST_JOB_READY", 4020: "RESPONSE_JOB_READY", 4021: "REQUEST_JOB_END_TIME", 4022: "REQUEST_JOB_NOTIFY", 4023: "REQUEST_JOB_SBCAST_CRED", 4024: "RESPONSE_JOB_SBCAST_CRED", - 4025: "REQUEST_JOB_PACK_ALLOCATION", - 4026: "RESPONSE_JOB_PACK_ALLOCATION", - 4027: "REQUEST_JOB_PACK_ALLOC_INFO", - 4028: "REQUEST_SUBMIT_BATCH_JOB_PACK", + 4025: "REQUEST_HET_JOB_ALLOCATION", + 4026: "RESPONSE_HET_JOB_ALLOCATION", + 4027: "REQUEST_HET_JOB_ALLOC_INFO", + 4028: "REQUEST_SUBMIT_BATCH_HET_JOB", 4500: "REQUEST_CTLD_MULT_MSG", 4501: "RESPONSE_CTLD_MULT_MSG", 4502: "REQUEST_SIB_MSG", 4503: "REQUEST_SIB_JOB_LOCK", 4504: "REQUEST_SIB_JOB_UNLOCK", + 4505: "REQUEST_SEND_DEP", + 4506: "REQUEST_UPDATE_ORIGIN_DEP", 5001: "REQUEST_JOB_STEP_CREATE", 5002: "RESPONSE_JOB_STEP_CREATE", - 5003: "REQUEST_RUN_JOB_STEP", - 5004: "RESPONSE_RUN_JOB_STEP", + 5003: "DEFUNCT_RPC_5003", + 5004: "DEFUNCT_RPC_5004", 5005: "REQUEST_CANCEL_JOB_STEP", - 5006: "RESPONSE_CANCEL_JOB_STEP", + 5006: "DEFUNCT_RPC_5006", 5007: "REQUEST_UPDATE_JOB_STEP", - 5008: "DEFUNCT_RESPONSE_COMPLETE_JOB_STEP", - 5009: "REQUEST_CHECKPOINT", - 5010: "RESPONSE_CHECKPOINT", - 5011: "REQUEST_CHECKPOINT_COMP", - 5012: "REQUEST_CHECKPOINT_TASK_COMP", - 5013: "RESPONSE_CHECKPOINT_COMP", + 5008: "DEFUNCT_RPC_5008", + 5009: "DEFUNCT_RPC_5009", + 5010: "DEFUNCT_RPC_5010", + 5011: "DEFUNCT_RPC_5011", + 5012: "DEFUNCT_RPC_5012", + 5013: "DEFUNCT_RPC_5013", 5014: "REQUEST_SUSPEND", - 5015: "RESPONSE_SUSPEND", + 5015: "DEFUNCT_RPC_5015", 5016: "REQUEST_STEP_COMPLETE", 5017: "REQUEST_COMPLETE_JOB_ALLOCATION", 5018: "REQUEST_COMPLETE_BATCH_SCRIPT", @@ -4771,51 +4942,60 @@ cdef class statistics: 5023: "REQUEST_JOB_REQUEUE", 5024: "REQUEST_DAEMON_STATUS", 5025: "RESPONSE_SLURMD_STATUS", - 5026: "RESPONSE_SLURMCTLD_STATUS", + 5026: "DEFUNCT_RPC_5026", 5027: "REQUEST_JOB_STEP_PIDS", 5028: "RESPONSE_JOB_STEP_PIDS", 5029: "REQUEST_FORWARD_DATA", - 5030: "REQUEST_COMPLETE_BATCH_JOB", + 5030: "DEFUNCT_RPC_5030", 5031: "REQUEST_SUSPEND_INT", 5032: "REQUEST_KILL_JOB", - 5033: "REQUEST_KILL_JOBSTEP", + 5033: "DEFUNCT_RPC_5033", 5034: "RESPONSE_JOB_ARRAY_ERRORS", 5035: "REQUEST_NETWORK_CALLERID", 5036: "RESPONSE_NETWORK_CALLERID", - 5037: "REQUEST_STEP_COMPLETE_AGGR", + 5037: "DEFUNCT_RPC_5037", 5038: "REQUEST_TOP_JOB", + 5039: "REQUEST_AUTH_TOKEN", + 5040: "RESPONSE_AUTH_TOKEN", 6001: "REQUEST_LAUNCH_TASKS", 6002: "RESPONSE_LAUNCH_TASKS", 6003: "MESSAGE_TASK_EXIT", 6004: "REQUEST_SIGNAL_TASKS", - 6005: "REQUEST_CHECKPOINT_TASKS", + 6005: "DEFUNCT_RPC_6005", 6006: "REQUEST_TERMINATE_TASKS", 6007: "REQUEST_REATTACH_TASKS", 6008: "RESPONSE_REATTACH_TASKS", 6009: "REQUEST_KILL_TIMELIMIT", - 6010: "DEFUNCT_REQUEST_SIGNAL_JOB", + 6010: "DEFUNCT_RPC_6010", 6011: "REQUEST_TERMINATE_JOB", 6012: "MESSAGE_EPILOG_COMPLETE", 6013: "REQUEST_ABORT_JOB", + 6014: "REQUEST_FILE_BCAST", - 6015: "TASK_USER_MANAGED_IO_STREAM", + 6015: "DEFUNCT_RPC_6015", 6016: "REQUEST_KILL_PREEMPTED", + 6017: "REQUEST_LAUNCH_PROLOG", 6018: "REQUEST_COMPLETE_PROLOG", 6019: "RESPONSE_PROLOG_EXECUTING", + 6500: "REQUEST_PERSIST_INIT", + 7001: "SRUN_PING", 7002: "SRUN_TIMEOUT", 7003: "SRUN_NODE_FAIL", 7004: "SRUN_JOB_COMPLETE", 7005: "SRUN_USER_MSG", - 7006: "SRUN_EXEC", + 7006: "DEFUNCT_RPC_7006", 7007: "SRUN_STEP_MISSING", 7008: "SRUN_REQUEST_SUSPEND", + 7009: "SRUN_STEP_SIGNAL", + + 7010: "SRUN_NET_FORWARD", 7201: "PMI_KVS_PUT_REQ", - 7202: "PMI_KVS_PUT_RESP", + 7202: "DEFUNCT_RPC_7202", 7203: "PMI_KVS_GET_REQ", 7204: "PMI_KVS_GET_RESP", @@ -4831,9 +5011,15 @@ cdef class statistics: 10004: "ACCOUNTING_TRES_CHANGE_DB", 10005: "ACCOUNTING_NODES_CHANGE_DB", - 11001: "MESSAGE_COMPOSITE", - 11002: "RESPONSE_MESSAGE_COMPOSITE"} - + 11001: "SLURMSCRIPTD_REQUEST_FLUSH", + 11002: "SLURMSCRIPTD_REQUEST_FLUSH_JOB", + 11003: "SLURMSCRIPTD_REQUEST_RECONFIG", + 11004: "SLURMSCRIPTD_REQUEST_RUN_SCRIPT", + 11005: "SLURMSCRIPTD_REQUEST_SCRIPT_COMPLETE", + 11006: "SLURMSCRIPTD_REQUEST_UPDATE_DEBUG_FLAGS", + 11007: "SLURMSCRIPTD_REQUEST_UPDATE_LOG", + 11008: "SLURMSCRIPTD_SHUTDOWN", + } return num2string[opcode] @@ -4843,7 +5029,7 @@ cdef class statistics: cdef class front_end: - """Class to access/update slurm front end node information.""" + """Access/update slurm front end node information.""" cdef: slurm.time_t Time @@ -4895,24 +5081,24 @@ cdef class front_end: def lastUpdate(self): """Return last time (sepoch seconds) the node data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate def ids(self): """Return the node IDs from retrieved data. - :returns: Dictionary of node IDs - :rtype: `dict` + Returns: + (dict): Dictionary of node IDs """ return list(self._FrontEndDict.keys()) def get(self): """Get front end node information. - :returns: Dictionary whose key is the Topology ID - :rtype: `dict` + Returns: + (dict): Dictionary whose key is the Topology ID """ self.__load() self.__get() @@ -4955,7 +5141,7 @@ cdef class front_end: cdef class qos: - """Class to access/update slurm QOS information.""" + """Access/update slurm QOS information.""" cdef: void *dbconn @@ -4975,7 +5161,6 @@ cdef class qos: def load(self): """Load slurm QOS information.""" - self.__load() cdef int __load(self) except? -1: @@ -4998,24 +5183,24 @@ cdef class qos: def lastUpdate(self): """Return last time (sepoch seconds) the QOS data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + int: epoch seconds """ return self._lastUpdate def ids(self): """Return the QOS IDs from retrieved data. - :returns: Dictionary of QOS IDs - :rtype: `dict` + Returns: + (dict): Dictionary of QOS IDs """ return self._QOSDict.keys() def get(self): """Get slurm QOS information. - :returns: Dictionary whose key is the QOS ID - :rtype: `dict` + Returns: + (dict): Dictionary whose key is the QOS ID """ self.__load() self.__get() @@ -5054,17 +5239,17 @@ cdef class qos: QOS_info['grp_tres_run_mins'] = slurm.stringOrNone(qos.grp_tres_run_mins, '') # QOS_info['grp_tres_run_mins_ctld'] QOS_info['grp_wall'] = qos.grp_wall - QOS_info['max_jobs_p'] = qos.max_jobs_pu - QOS_info['max_submit_jobs_p'] = qos.max_submit_jobs_pu + QOS_info['max_jobs_pu'] = qos.max_jobs_pu + QOS_info['max_submit_jobs_pu'] = qos.max_submit_jobs_pu QOS_info['max_tres_mins_pj'] = slurm.stringOrNone(qos.max_tres_mins_pj, '') # QOS_info['max_tres_min_pj_ctld'] QOS_info['max_tres_pj'] = slurm.stringOrNone(qos.max_tres_pj, '') # QOS_info['max_tres_min_pj_ctld'] QOS_info['max_tres_pn'] = slurm.stringOrNone(qos.max_tres_pn, '') # QOS_info['max_tres_min_pn_ctld'] - QOS_info['max_tres_p'] = slurm.stringOrNone(qos.max_tres_pu, '') + QOS_info['max_tres_pu'] = slurm.stringOrNone(qos.max_tres_pu, '') # QOS_info['max_tres_min_pu_ctld'] - QOS_info['max_tres_run_mins_p'] = slurm.stringOrNone( + QOS_info['max_tres_run_mins_pu'] = slurm.stringOrNone( qos.max_tres_run_mins_pu, '') QOS_info['max_wall_pj'] = qos.max_wall_pj @@ -5095,8 +5280,7 @@ cdef class qos: # slurmdbd jobs Class # cdef class slurmdb_jobs: - """Class to access Slurmdbd Jobs information.""" - + """Access Slurmdbd Jobs information.""" cdef: void* db_conn slurm.slurmdb_job_cond_t *job_cond @@ -5109,25 +5293,35 @@ cdef class slurmdb_jobs: slurm.xfree(self.job_cond) slurm.slurmdb_connection_close(&self.db_conn) - def get(self, jobids=[], userids=[], starttime=0, endtime=0, flags = None, db_flags = None, clusters = []): + def get(self, jobids=[], userids=[], starttime=0, endtime=0, flags = None, + db_flags = None, clusters = []): """Get Slurmdb information about some jobs. - + Input formats for start and end times: - * today or tomorrow - * midnight, noon, teatime (4PM) - * HH:MM [AM|PM] - * MMDDYY or MM/DD/YY or MM.DD.YY - * YYYY-MM-DD[THH[:MM[:SS]]] - * now + count [minutes | hours | days | weeks] - * + * today or tomorrow + * midnight, noon, teatime (4PM) + * HH:MM [AM|PM] + * MMDDYY or MM/DD/YY or MM.DD.YY + * YYYY-MM-DD[THH[:MM[:SS]]] + * now + count [minutes | hours | days | weeks] + * + Invalid time input results in message to stderr and return value of zero. - :param jobids: Ids of the jobs to search. Defaults to all jobs. - :param starttime: Select jobs eligible after this timestamp - :param endtime: Select jobs eligible before this timestamp - :returns: Dictionary whose key is the JOBS ID - :rtype: `dict` + Args: + jobids (list): Ids of the jobs to search. Defaults to all jobs. + starttime (int, optional): Select jobs eligible after this + timestamp + endtime (int, optional): Select jobs eligible before this + timestamp + userids (list): List of userids + flags (int): Flags + db_flags (int): DB Flags + clusters (list): List of clusters + + Returns: + (dict): Dictionary whose key is the JOBS ID """ cdef: int i = 0 @@ -5229,10 +5423,10 @@ cdef class slurmdb_jobs: if job.req_mem & slurm.MEM_PER_CPU: JOBS_info['req_mem'] = job.req_mem & (~slurm.MEM_PER_CPU) - JOBS_info['req_mem_per_cp'] = True + JOBS_info['req_mem_per_cpu'] = True else: JOBS_info['req_mem'] = job.req_mem - JOBS_info['req_mem_per_cp'] = False + JOBS_info['req_mem_per_cpu'] = False JOBS_info['requid'] = job.requid JOBS_info['resvid'] = job.resvid @@ -5350,8 +5544,7 @@ cdef class slurmdb_jobs: # slurmdbd Reservations Class # cdef class slurmdb_reservations: - """Class to access Slurmdbd reservations information.""" - + """Access Slurmdbd reservations information.""" cdef: void *dbconn slurm.slurmdb_reservation_cond_t *reservation_cond @@ -5363,10 +5556,14 @@ cdef class slurmdb_reservations: slurm.slurmdb_destroy_reservation_cond(self.reservation_cond) def set_reservation_condition(self, start_time, end_time): - """Limit the next get() call to reservations that start after and before a certain time. - - :param start_time: Select reservations that start after this timestamp - :param end_time: Select reservations that end before this timestamp + """Limit the next get() call to reservations that start after and + before a certain time. + + Args: + start_time (int): Select reservations that start after this + unix timestamp + end_time (int): Select reservations that end before this unix + timestamp """ if self.reservation_cond == NULL: self.reservation_cond = xmalloc(sizeof(slurm.slurmdb_reservation_cond_t)) @@ -5381,8 +5578,8 @@ cdef class slurmdb_reservations: def get(self): """Get slurm reservations information. - :returns: Dictionary whose keys are the reservations ids - :rtype: `dict` + Returns: + (dict): Dictionary whose keys are the reservations ids """ cdef: slurm.List reservation_list @@ -5451,8 +5648,7 @@ cdef class slurmdb_reservations: # slurmdbd clusters Class # cdef class slurmdb_clusters: - """Class to access Slurmdbd Clusters information.""" - + """Access Slurmdbd Clusters information.""" cdef: void *db_conn slurm.slurmdb_cluster_cond_t *cluster_cond @@ -5470,8 +5666,11 @@ cdef class slurmdb_clusters: """Limit the next get() call to clusters that existed after and before a certain time. - :param start_time: Select clusters that existed after this timestamp - :param end_time: Select clusters that existed before this timestamp + Args: + start_time (int): Select clusters that existed after this unix + timestamp + end_time (int): Select clusters that existed before this unix + timestamp """ if self.cluster_cond == NULL: self.cluster_cond = xmalloc(sizeof(slurm.slurmdb_cluster_cond_t)) @@ -5488,8 +5687,8 @@ cdef class slurmdb_clusters: def get(self): """Get slurm clusters information. - :returns: Dictionary whose keys are the clusters ids - :rtype: `dict` + Returns: + (dict): Dictionary whose keys are the clusters ids """ cdef: slurm.List clusters_list @@ -5567,8 +5766,7 @@ cdef class slurmdb_clusters: # slurmdbd Events Class # cdef class slurmdb_events: - """Class to access Slurmdbd events information.""" - + """Access Slurmdbd events information.""" cdef: void *dbconn slurm.slurmdb_event_cond_t *event_cond @@ -5580,10 +5778,12 @@ cdef class slurmdb_events: slurm.slurmdb_destroy_event_cond(self.event_cond) def set_event_condition(self, start_time, end_time): - """Limit the next get() call to conditions that existed after and before a certain time. + """Limit the next get() call to conditions that existed after and + before a certain time. - :param start_time: Select conditions that existed after this timestamp - :param end_time: Select conditions that existed before this timestamp + Args: + start_time (int): Select conditions that existed after this unix timestamp + end_time (int): Select conditions that existed before this unix timestamp """ if self.event_cond == NULL: self.event_cond = xmalloc(sizeof(slurm.slurmdb_event_cond_t)) @@ -5598,8 +5798,8 @@ cdef class slurmdb_events: def get(self): """Get slurm events information. - :returns: Dictionary whose keys are the events ids - :rtype: `dict` + Returns: + (dict): Dictionary whose keys are the events ids """ cdef: slurm.List event_list @@ -5645,8 +5845,7 @@ cdef class slurmdb_events: # cdef class slurmdb_reports: - """Class to access Slurmdbd reports.""" - + """Access Slurmdbd reports.""" cdef: void *db_conn slurm.slurmdb_assoc_cond_t *assoc_cond @@ -5657,9 +5856,16 @@ cdef class slurmdb_reports: def __dealloc__(self): slurm.slurmdb_destroy_assoc_cond(self.assoc_cond) - def report_cluster_account_by_user(self, starttime=None, endtime=None): - """ - sreport cluster AccountUtilizationByUser + def report_cluster_account_by_user(self, starttime=None, + endtime=None): + """sreport cluster AccountUtilizationByUser + + Args: + starttime (Union[str, int]): Start time + endtime (Union[str, int]): Start time + + Returns: + (dict): sreport information. """ cdef: slurm.List slurmdb_report_cluster_list = NULL @@ -5752,10 +5958,8 @@ cdef class slurmdb_reports: def get_last_slurm_error(): """Get and return the last error from a slurm API call. - :returns: Slurm error number and the associated error string - :rtype: `integer` - :returns: Slurm error string - :rtype: `string` + Returns: + (int): Slurm error number and the associated error string """ rc = slurm.slurm_get_errno() @@ -5767,9 +5971,11 @@ def get_last_slurm_error(): cdef inline dict __get_licenses(char *licenses): """Returns a dict of licenses from the slurm license string. - :param string licenses: String containing license information - :returns: Dictionary of licenses and associated value. - :rtype: `dict` + Args: + licenses (str): String containing license information + + Returns: + dict: Dictionary of licenses and associated value. """ if (licenses is NULL): return {} @@ -5795,9 +6001,11 @@ cdef inline dict __get_licenses(char *licenses): def get_node_use(inx): """Returns a string that represents the block node mode. - :param int ResType: Slurm block node usage - :returns: Block node usage string - :rtype: `string` + Args: + ResType: Slurm block node usage + + Returns: + use (str): Block node usage string """ return slurm.slurm_node_state_string(inx) @@ -5805,16 +6013,18 @@ def get_node_use(inx): def get_trigger_res_type(uint16_t inx): """Returns a string that represents the slurm trigger res type. - :param int ResType: Slurm trigger res state - - TRIGGER_RES_TYPE_JOB 1 - - TRIGGER_RES_TYPE_NODE 2 - - TRIGGER_RES_TYPE_SLURMCTLD 3 - - TRIGGER_RES_TYPE_SLURMDBD 4 - - TRIGGER_RES_TYPE_DATABASE 5 - - TRIGGER_RES_TYPE_FRONT_END 6 - - TRIGGER_RES_TYPE_OTHER 7 - :returns: Trigger reservation state string - :rtype: `string` + Args: + ResType (int): Slurm trigger res state + * TRIGGER_RES_TYPE_JOB 1 + * TRIGGER_RES_TYPE_NODE 2 + * TRIGGER_RES_TYPE_SLURMCTLD 3 + * TRIGGER_RES_TYPE_SLURMDBD 4 + * TRIGGER_RES_TYPE_DATABASE 5 + * TRIGGER_RES_TYPE_FRONT_END 6 + * TRIGGER_RES_TYPE_OTHER 7 + + Returns: + (str): Trigger reservation state string """ return __get_trigger_res_type(inx) @@ -5842,29 +6052,31 @@ cdef inline object __get_trigger_res_type(uint16_t ResType): def get_trigger_type(uint32_t inx): """Returns a string that represents the state of the slurm trigger. - :param int TriggerType: Slurm trigger type - - TRIGGER_TYPE_UP 0x00000001 - - TRIGGER_TYPE_DOWN 0x00000002 - - TRIGGER_TYPE_FAIL 0x00000004 - - TRIGGER_TYPE_TIME 0x00000008 - - TRIGGER_TYPE_FINI 0x00000010 - - TRIGGER_TYPE_RECONFIG 0x00000020 - - TRIGGER_TYPE_IDLE 0x00000080 - - TRIGGER_TYPE_DRAINED 0x00000100 - - TRIGGER_TYPE_PRI_CTLD_FAIL 0x00000200 - - TRIGGER_TYPE_PRI_CTLD_RES_OP 0x00000400 - - TRIGGER_TYPE_PRI_CTLD_RES_CTRL 0x00000800 - - TRIGGER_TYPE_PRI_CTLD_ACCT_FULL 0x00001000 - - TRIGGER_TYPE_BU_CTLD_FAIL 0x00002000 - - TRIGGER_TYPE_BU_CTLD_RES_OP 0x00004000 - - TRIGGER_TYPE_BU_CTLD_AS_CTRL 0x00008000 - - TRIGGER_TYPE_PRI_DBD_FAIL 0x00010000 - - TRIGGER_TYPE_PRI_DBD_RES_OP 0x00020000 - - TRIGGER_TYPE_PRI_DB_FAIL 0x00040000 - - TRIGGER_TYPE_PRI_DB_RES_OP 0x00080000 - - TRIGGER_TYPE_BURST_BUFFER 0x00100000 - :returns: Trigger state string - :rtype: `string` + Args: + TriggerType (int): Slurm trigger type + * TRIGGER_TYPE_UP 0x00000001 + * TRIGGER_TYPE_DOWN 0x00000002 + * TRIGGER_TYPE_FAIL 0x00000004 + * TRIGGER_TYPE_TIME 0x00000008 + * TRIGGER_TYPE_FINI 0x00000010 + * TRIGGER_TYPE_RECONFIG 0x00000020 + * TRIGGER_TYPE_IDLE 0x00000080 + * TRIGGER_TYPE_DRAINED 0x00000100 + * TRIGGER_TYPE_PRI_CTLD_FAIL 0x00000200 + * TRIGGER_TYPE_PRI_CTLD_RES_OP 0x00000400 + * TRIGGER_TYPE_PRI_CTLD_RES_CTRL 0x00000800 + * TRIGGER_TYPE_PRI_CTLD_ACCT_FULL 0x00001000 + * TRIGGER_TYPE_BU_CTLD_FAIL 0x00002000 + * TRIGGER_TYPE_BU_CTLD_RES_OP 0x00004000 + * TRIGGER_TYPE_BU_CTLD_AS_CTRL 0x00008000 + * TRIGGER_TYPE_PRI_DBD_FAIL 0x00010000 + * TRIGGER_TYPE_PRI_DBD_RES_OP 0x00020000 + * TRIGGER_TYPE_PRI_DB_FAIL 0x00040000 + * TRIGGER_TYPE_PRI_DB_RES_OP 0x00080000 + * TRIGGER_TYPE_BURST_BUFFER 0x00100000 + + Returns: + (str): Trigger state string """ return __get_trigger_type(inx) @@ -5939,7 +6151,7 @@ cdef inline object __get_trigger_type(uint32_t TriggerType): # - RESERVE_FLAG_TIME_FLOAT 0x00020000 # - RESERVE_FLAG_REPLACE 0x00040000 # :returns: Reservation state string -# :rtype: `string` +# :rtype: string # """ # try: # return slurm.slurm_reservation_flags_string(inx) @@ -5948,11 +6160,13 @@ cdef inline object __get_trigger_type(uint32_t TriggerType): def get_debug_flags(uint64_t inx): - """ Returns a string that represents the slurm debug flags. + """Returns a string that represents the slurm debug flags. + + Args: + flags (int): Slurm debug flags - :param int flags: Slurm debug flags - :returns: Debug flag string - :rtype: `string` + Returns: + (str): Debug flag string """ return debug_flags2str(inx) @@ -6016,9 +6230,6 @@ cdef inline list debug_flags2str(uint64_t debug_flags): if (debug_flags & DEBUG_FLAG_DB_WCKEY): debugFlags.append('DB_WCKey') - if (debug_flags & DEBUG_FLAG_ESEARCH): - debugFlags.append('Elasticsearch') - if (debug_flags & DEBUG_FLAG_ENERGY): debugFlags.append('Energy') @@ -6100,9 +6311,11 @@ cdef inline list debug_flags2str(uint64_t debug_flags): def get_node_state(uint32_t inx): """Returns a string that represents the state of the slurm node. - :param int inx: Slurm node state - :returns: Node state string - :rtype: `string` + Args: + inx (int): Slurm node state + + Returns: + state (str): Node state string """ return slurm.slurm_node_state_string(inx) @@ -6110,9 +6323,11 @@ def get_node_state(uint32_t inx): def get_rm_partition_state(int inx): """Returns a string that represents the partition state. - :param int inx: Slurm partition state - :returns: Partition state string - :rtype: `string` + Args: + inx (int): Slurm partition state + + Returns: + (str): Partition state string """ return __get_rm_partition_state(inx) @@ -6140,14 +6355,16 @@ cdef inline object __get_rm_partition_state(int inx): def get_preempt_mode(uint16_t inx): """Returns a string that represents the preempt mode. - :param int inx: Slurm preempt mode - - PREEMPT_MODE_OFF 0x0000 - - PREEMPT_MODE_SUSPEND 0x0001 - - PREEMPT_MODE_REQUEUE 0x0002 - - PREEMPT_MODE_CANCEL 0x0008 - - PREEMPT_MODE_GANG 0x8000 - :returns: Preempt mode string - :rtype: `string` + Args: + inx (int): Slurm preempt mode + * PREEMPT_MODE_OFF 0x0000 + * PREEMPT_MODE_SUSPEND 0x0001 + * PREEMPT_MODE_REQUEUE 0x0002 + * PREEMPT_MODE_CANCEL 0x0008 + * PREEMPT_MODE_GANG 0x8000 + + Returns: + mode (str): Preempt mode string """ return slurm.slurm_preempt_mode_string(inx) @@ -6155,13 +6372,15 @@ def get_preempt_mode(uint16_t inx): def get_partition_state(uint16_t inx): """Returns a string that represents the state of the slurm partition. - :param int inx: Slurm partition state - - PARTITION_DOWN 0x01 - - PARTITION_UP 0x01 | 0x02 - - PARTITION_DRAIN 0x02 - - PARTITION_INACTIVE 0x00 - :returns: Partition state string - :rtype: `string` + Args: + inx (int): Slurm partition state + * PARTITION_DOWN 0x01 + * PARTITION_UP 0x01 | 0x02 + * PARTITION_DRAIN 0x02 + * PARTITION_INACTIVE 0x00 + + Returns: + (str): Partition state string """ state = "" if inx: @@ -6181,10 +6400,12 @@ def get_partition_state(uint16_t inx): cdef inline object __get_partition_state(int inx, int extended=0): """Returns a string that represents the state of the partition. - :param int inx: Slurm partition type - :param int extended: - :returns: Partition state - :rtype: `string` + Args: + inx (int): Slurm partition type + extended (int): extended flag + + Returns: + str: Partition state """ cdef: int drain_flag = (inx & 0x0200) @@ -6238,9 +6459,12 @@ cdef inline object __get_partition_state(int inx, int extended=0): def get_partition_mode(uint16_t flags=0, uint16_t max_share=0): """Returns a string represents the state of the partition mode. - :param int inx: Slurm partition mode - :returns: Partition mode string - :rtype: `string` + Args: + flags (int): Flags + max_share (int): Max share + + Returns: + (dict): Partition mode dict """ return __get_partition_mode(flags, max_share) @@ -6295,22 +6519,24 @@ cdef inline dict __get_partition_mode(uint16_t flags=0, uint16_t max_share=0): def get_job_state(inx): """Return the state of the slurm job state. - :param int inx: Slurm job state - - JOB_PENDING 0 - - JOB_RUNNING 1 - - JOB_SUSPENDED 2 - - JOB_COMPLETE 3 - - JOB_CANCELLED 4 - - JOB_FAILED 5 - - JOB_TIMEOUT 6 - - JOB_NODE_FAIL 7 - - JOB_PREEMPTED 8 - - JOB_BOOT_FAIL 10 - - JOB_DEADLINE 11 - - JOB_OOM 12 - - JOB_END - :returns: Job state string - :rtype: `string` + Args: + inx (int): Slurm job state + * JOB_PENDING 0 + * JOB_RUNNING 1 + * JOB_SUSPENDED 2 + * JOB_COMPLETE 3 + * JOB_CANCELLED 4 + * JOB_FAILED 5 + * JOB_TIMEOUT 6 + * JOB_NODE_FAIL 7 + * JOB_PREEMPTED 8 + * JOB_BOOT_FAIL 10 + * JOB_DEADLINE 11 + * JOB_OOM 12 + * JOB_END + + Returns: + (str): Job state string """ try: job_state = slurm.stringOrNone(slurm.slurm_job_state_string(inx), '') @@ -6322,9 +6548,11 @@ def get_job_state(inx): def get_job_state_reason(inx): """Returns a reason why the slurm job is in a provided state. - :param int inx: Slurm job state reason - :returns: Reason string - :rtype: `string` + Args: + inx (int): Slurm job state reason + + Returns: + (str): Reason string """ job_reason = slurm.stringOrNone(slurm.slurm_job_reason_string(inx), '') return job_reason @@ -6333,9 +6561,11 @@ def get_job_state_reason(inx): def epoch2date(epochSecs): """Convert epoch secs to a python time string. - :param int epochSecs: Seconds since epoch - :returns: Date - :rtype: `string` + Args: + epochSecs (int): Seconds since epoch + + Returns: + (str): Date str """ try: dateTime = p_time.gmtime(epochSecs) @@ -6371,7 +6601,7 @@ class Dict(defaultdict): cdef class licenses: - """Class to access slurm controller license information.""" + """Access slurm controller license information.""" cdef: slurm.license_info_msg_t *_msg @@ -6391,20 +6621,20 @@ cdef class licenses: def lastUpdate(self): """Return last time (epoch seconds) license data was updated. - :returns: epoch seconds - :rtype: `integer` + Returns: + (int): Epoch seconds """ return self._lastUpdate def ids(self): """Return the current license names from retrieved license data. - + This method calls slurm_load_licenses to retrieve license information from the controller. slurm_free_license_info_msg is used to free the license message buffer. - :returns: Dictionary of licenses - :rtype: `dict` + Returns: + (dict): Dictionary of licenses """ cdef: int rc @@ -6428,15 +6658,15 @@ cdef class licenses: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) - cpdef get(self): + def get(self): """Get full license information from the slurm controller. This method calls slurm_load_licenses to retrieve license information from the controller. slurm_free_license_info_msg is used to free the license message buffer. - :returns: Dictionary whose key is the license name - :rtype: `dict` + Returns: + (dict): Dictionary whose key is the license name """ cdef: int rc @@ -6464,6 +6694,3 @@ cdef class licenses: else: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) - -# Automatically load Slurm configuration data structure at pyslurm module load -slurm_init() diff --git a/pyslurm/settings.pyx b/pyslurm/settings.pyx new file mode 100644 index 00000000..5085a9f5 --- /dev/null +++ b/pyslurm/settings.pyx @@ -0,0 +1,33 @@ +######################################################################### +# settings.pyx - pyslurm global settings +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core import slurmctld +from pyslurm cimport slurm +from pyslurm.utils cimport cstr + + +LOCAL_CLUSTER = cstr.to_unicode(slurm.slurm_conf.cluster_name) +if not LOCAL_CLUSTER: + slurm_conf = slurmctld.Config.load() + LOCAL_CLUSTER = slurm_conf.cluster diff --git a/pyslurm/slurm/SLURM_DISCLAIMER b/pyslurm/slurm/SLURM_DISCLAIMER new file mode 100644 index 00000000..5fb615d5 --- /dev/null +++ b/pyslurm/slurm/SLURM_DISCLAIMER @@ -0,0 +1,159 @@ +Slurm was produced at Lawrence Livermore National Laboratory in collaboration +with various organizations. + +Copyright (C) 2012-2013 Los Alamos National Security, LLC. +Copyright (C) 2011 Trinity Centre for High Performance Computing +Copyright (C) 2010-2015 SchedMD LLC +Copyright (C) 2009-2013 CEA/DAM/DIF +Copyright (C) 2009-2011 Centro Svizzero di Calcolo Scientifico (CSCS) +Copyright (C) 2008-2011 Lawrence Livermore National Security +Copyright (C) 2008 Vijay Ramasubramanian +Copyright (C) 2007-2008 Red Hat, Inc. +Copyright (C) 2007-2013 National University of Defense Technology, China +Copyright (C) 2007-2015 Bull +Copyright (C) 2005-2008 Hewlett-Packard Development Company, L.P. +Copyright (C) 2004-2009, Marcus Holland-Moritz +Copyright (C) 2002-2007 The Regents of the University of California +Copyright (C) 2002-2003 Linux NetworX +Copyright (C) 2002 University of Chicago +Copyright (C) 2001, Paul Marquess +Copyright (C) 2000 Markus Friedl +Copyright (C) 1999, Kenneth Albanowski +Copyright (C) 1998 Todd C. Miller +Copyright (C) 1996-2003 Maximum Entropy Data Consultants Ltd, +Copyright (C) 1995 Tatu Ylonen , Espoo, Finland +Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc. +Many other organizations contributed code and/or documentation without +including a copyright notice. + +Written by: +Amjad Majid Ali (Colorado State University) +Par Andersson (National Supercomputer Centre, Sweden) +Don Albert (Bull) +Ernest Artiaga (Barcelona Supercomputer Center, Spain) +Danny Auble (LLNL, SchedMD LLC) +Susanne Balle (HP) +Anton Blanchard (Samba) +Janne Blomqvist (Aalto University, Finland) +David Bremer (LLNL) +Jon Bringhurst (LANL) +Bill Brophy (Bull) +Hongjia Cao (National University of Defense Techonogy, China) +Daniel Christians (HP) +Gilles Civario (Bull) +Chuck Clouston (Bull) +Joseph Donaghy (LLNL) +Chris Dunlap (LLNL) +Joey Ekstrom (LLNL/Bringham Young University) +Josh England (TGS Management Corporation) +Kent Engstrom (National Supercomputer Centre, Sweden) +Jim Garlick (LLNL) +Didier Gazen (Laboratoire d'Aerologie, France) +Raphael Geissert (Debian) +Yiannis Georgiou (Bull) +Andriy Grytsenko (Massive Solutions Limited, Ukraine) +Mark Grondona (LLNL) +Takao Hatazaki (HP, Japan) +Matthieu Hautreux (CEA, France) +Chris Holmes (HP) +David Hoppner +Nathan Huff (North Dakota State University) +David Jackson (Adaptive Computing) +Morris Jette (LLNL, SchedMD LLC) +Klaus Joas (University Karlsruhe, Germany) +Greg Johnson (LANL) +Jason King (LLNL) +Aaron Knister (Environmental Protection Agency) +Nancy Kritkausky (Bull) +Roman Kurakin (Institute of Natural Science and Ecology, Russia) +Eric Lin (Bull) +Don Lipari (LLNL) +Puenlap Lee (Bull) +Dennis Leepow +Bernard Li (Genome Sciences Centre, Canada) +Donald Lipari (LLNL) +Steven McDougall (SiCortex) +Donna Mecozzi (LLNL) +Bjorn-Helge Mevik (University of Oslo, Norway) +Chris Morrone (LLNL) +Pere Munt (Barcelona Supercomputer Center, Spain) +Michal Novotny (Masaryk University, Czech Republic) +Bryan O'Sullivan (Pathscale) +Gennaro Oliva (Institute of High Performance Computing and Networking, Italy) +Alejandro Lucero Palau (Barcelona Supercomputer Center, Spain) +Daniel Palermo (HP) +Dan Phung (LLNL/Columbia University) +Ashley Pittman (Quadrics, UK) +Vijay Ramasubramanian (University of Maryland) +Krishnakumar Ravi[KK] (HP) +Petter Reinholdtsen (University of Oslo, Norway) +Gerrit Renker (Swiss National Computer Centre) +Andy Riebs (HP) +Asier Roa (Barcelona Supercomputer Center, Spain) +Miguel Ros (Barcelona Supercomputer Center, Spain) +Beat Rubischon (DALCO AG, Switzerland) +Dan Rusak (Bull) +Eygene Ryabinkin (Kurchatov Institute, Russia) +Federico Sacerdoti (D.E. Shaw) +Rod Schultz (Bull) +Tyler Strickland (University of Florida) +Jeff Squyres (LAM MPI) +Prashanth Tamraparni (HP, India) +Jimmy Tang (Trinity College, Ireland) +Kevin Tew (LLNL/Bringham Young University) +Adam Todorski (Rensselaer Polytechnic Institute) +Nathan Weeks (Iowa State University) +Tim Wickberg (Rensselaer Polytechnic Institute) +Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil) +Jay Windley (Linux NetworX) +Anne-Marie Wunderlin (Bull) + +CODE-OCEC-09-009. All rights reserved. + +This file is part of Slurm, a resource management program. +For details, see . +Please also read the supplied file: DISCLAIMER. + +Slurm is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +Slurm is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along +with Slurm; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +OUR NOTICE AND TERMS OF AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE + +Our Preamble Notice + +Auspices + +This work performed under the auspices of the U.S. Department of Energy by +Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344. + +Disclaimer + +This work was sponsored by an agency of the United States government. +Neither the United States Government nor Lawrence Livermore National +Security, LLC, nor any of their employees, makes any warranty, express +or implied, or assumes any liability or responsibility for the accuracy, +completeness, or usefulness of any information, apparatus, product, or +process disclosed, or represents that its use would not infringe privately +owned rights. References herein to any specific commercial products, process, +or services by trade names, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or +favoring by the United States Government or the Lawrence Livermore National +Security, LLC. The views and opinions of authors expressed herein do not +necessarily state or reflect those of the United States government or +Lawrence Livermore National Security, LLC, and shall not be used for +advertising or product endorsement purposes. + +The precise terms and conditions for copying, distribution and modification +is provided in the file named "COPYING" in this directory. diff --git a/pyslurm/slurm/SLURM_LICENSE b/pyslurm/slurm/SLURM_LICENSE new file mode 100644 index 00000000..0fd4db48 --- /dev/null +++ b/pyslurm/slurm/SLURM_LICENSE @@ -0,0 +1,389 @@ + SLURM LICENSE AGREEMENT + +All Slurm code and documentation is available under the GNU General Public +License. Some tools in the "contribs" directory have other licenses. See +the documentation for individual contributed tools for details. + +In addition, as a special exception, the copyright holders give permission +to link the code of portions of this program with the OpenSSL library under +certain conditions as described in each individual source file, and distribute +linked combinations including the two. You must obey the GNU General Public +License in all respects for all of the code used other than OpenSSL. If you +modify file(s) with this exception, you may extend this exception to your +version of the file(s), but you are not obligated to do so. If you do not +wish to do so, delete this exception statement from your version. If you +delete this exception statement from all source files in the program, then +also delete it here. + +NO WARRANTY: Because the program is licensed free of charge, there is no +warranty for the program. See section 11 below for full details. + +============================================================================= + +OUR NOTICE AND TERMS OF AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE + +Auspices + +Portions of this work were performed under the auspices of the U.S. Department +of Energy by Lawrence Livermore National Laboratory under Contract +DE-AC52-07NA27344. + +Disclaimer + +This work was sponsored by an agency of the United States government. +Neither the United States Government nor Lawrence Livermore National +Security, LLC, nor any of their employees, makes any warranty, express +or implied, or assumes any liability or responsibility for the accuracy, +completeness, or usefulness of any information, apparatus, product, or +process disclosed, or represents that its use would not infringe privately +owned rights. References herein to any specific commercial products, process, +or services by trade names, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or +favoring by the United States Government or the Lawrence Livermore National +Security, LLC. The views and opinions of authors expressed herein do not +necessarily state or reflect those of the United States government or +Lawrence Livermore National Security, LLC, and shall not be used for +advertising or product endorsement purposes. + +============================================================================= + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/pyslurm/slurm/__init__.pxd b/pyslurm/slurm/__init__.pxd index d779bb18..f29bfc00 100644 --- a/pyslurm/slurm/__init__.pxd +++ b/pyslurm/slurm/__init__.pxd @@ -1,36 +1,66 @@ from libcpp cimport bool -from posix.unistd cimport uid_t, pid_t, gid_t -from libc.stdint cimport int8_t, int16_t, int32_t, int64_t -from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from cpython.version cimport PY_MAJOR_VERSION -from libc.string cimport strlen, memcpy -cdef extern from "" nogil: - ctypedef struct sockaddr_in - ctypedef struct sockaddr_storage +from posix.unistd cimport ( + uid_t, + pid_t, + gid_t, +) + +from libc.stdint cimport ( + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +from libc.string cimport ( + strlen, + memcpy, +) + + +cdef extern from '' nogil: + ctypedef struct sockaddr_storage: + pass + cdef extern from '' nogil: ctypedef struct FILE cdef FILE *stdout -cdef extern from 'time.h' nogil: + +cdef extern from '' nogil: ctypedef long time_t double difftime(time_t time1, time_t time2) time_t time(time_t *t) + cdef extern from '' nogil: cdef FILE *PyFile_AsFile(object file) char *__FILE__ cdef int __LINE__ char *__FUNCTION__ -cdef extern from "" nogil: - ctypedef union pthread_mutex_t + +cdef extern from '' nogil: + ctypedef struct pthread_mutex_t: + pass + + ctypedef struct pthread_cond_t: + pass + + ctypedef struct pthread_t: + pass + cdef extern from *: ctypedef struct slurm_job_credential ctypedef struct switch_jobinfo - ctypedef struct job_resources ctypedef struct select_jobinfo ctypedef struct select_nodeinfo ctypedef struct jobacctinfo @@ -42,8 +72,12 @@ cdef extern from *: ctypedef struct slurm_step_ctx_struct ctypedef struct slurm_ctl_conf_t -# Header definitions combined from slurm.h, slurmdb.h and slurm_errno.h -include "header.pxi" + +# Header definitions +include "slurm_version.h.pxi" +include "slurm_errno.h.pxi" +include "slurm.h.pxi" +include "slurmdb.h.pxi" # Any other definitions which are not directly in # the header files, but exported in libslurm.so diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index f48572d4..3557b0b9 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -1,24 +1,184 @@ -# Global Environment -cdef extern char **environ +# +# Structs that are not in the Slurm headers, which need to be redefined +# in order to implement certain features. +# +# For example: to communicate with the slurmctld directly in order +# to retrieve the actual batch-script as a string. +# +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L51 +ctypedef enum persist_conn_type_t: + PERSIST_TYPE_NONE = 0 + PERSIST_TYPE_DBD + PERSIST_TYPE_FED + PERSIST_TYPE_HA_CTL + PERSIST_TYPE_HA_DBD + PERSIST_TYPE_ACCT_UPDATE + +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L59 +ctypedef struct persist_msg_t: + void *conn + void *data + uint32_t data_size + uint16_t msg_type + +ctypedef int (*_slurm_persist_conn_t_callback_proc) (void *arg, persist_msg_t *msg, buf_t **out_buffer, uint32_t *uid) + +ctypedef void (*_slurm_persist_conn_t_callback_fini)(void *arg) + +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L66 +ctypedef struct slurm_persist_conn_t: + void *auth_cred + _slurm_persist_conn_t_callback_proc callback_proc + _slurm_persist_conn_t_callback_fini callback_fini + char *cluster_name + time_t comm_fail_time + uint16_t my_port + int fd + uint16_t flags + bool inited + persist_conn_type_t persist_type + uid_t r_uid + char *rem_host + uint16_t rem_port + time_t *shutdown + pthread_t thread_id + int timeout + slurm_trigger_callbacks_t trigger_callbacks; + uint16_t version + +# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 +ctypedef struct buf_t: + uint32_t magic + char *head + uint32_t size + uint32_t processed + bool mmaped + +# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 +ctypedef struct return_code_msg_t: + uint32_t return_code + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L650 +ctypedef struct job_id_msg_t: + uint32_t job_id + uint16_t show_flags + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L216 +# Only partially defined - not everything needed at the moment. +ctypedef enum slurm_msg_type_t: + REQUEST_SHARE_INFO = 2022 + REQUEST_BATCH_SCRIPT = 2051 + RESPONSE_BATCH_SCRIPT = 2052 + RESPONSE_SLURM_RC = 8001 + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L469 +ctypedef struct forward_t: + uint16_t cnt + uint16_t init + char *nodelist + uint32_t timeout + uint16_t tree_width + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L491 +ctypedef struct forward_struct_t: + char *buf + int buf_len + uint16_t fwd_cnt + pthread_mutex_t forward_mutex + pthread_cond_t notify + List ret_list + uint32_t timeout + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L514 +ctypedef struct slurm_msg_t: + slurm_addr_t address + void *auth_cred + int auth_index + uid_t auth_uid + bool auth_uid_set + uid_t restrict_uid + bool restrict_uid_set + uint32_t body_offset + buf_t *buffer + slurm_persist_conn_t *conn + int conn_fd + void *data + uint32_t data_size + uint16_t flags + uint8_t hash_index + uint16_t msg_type + uint16_t protocol_version + forward_t forward + forward_struct_t *forward_struct + slurm_addr_t orig_addr + List ret_list + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L865 +cdef extern void slurm_free_return_code_msg(return_code_msg_t *msg) + +# https://github.com/SchedMD/slurm/blob/2d2e83674b59410a7ed8ab6fc8d8acfcfa8beaf9/src/common/slurm_protocol_api.c#L2401 +cdef extern int slurm_send_recv_controller_msg(slurm_msg_t *request_msg, + slurm_msg_t *response_msg, + slurmdb_cluster_rec_t *working_cluster_rec) + +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L168 +cdef extern void slurm_msg_t_init(slurm_msg_t *msg) + +# https://github.com/SchedMD/slurm/blob/master/src/common/job_resources.h +ctypedef struct job_resources: + bitstr_t *core_bitmap + bitstr_t *core_bitmap_used + uint32_t cpu_array_cnt + uint16_t *cpu_array_value + uint32_t *cpu_array_reps + uint16_t *cpus + uint16_t *cpus_used + uint16_t *cores_per_socket + uint16_t cr_type + uint64_t *memory_allocated + uint64_t *memory_used + uint32_t nhosts + bitstr_t *node_bitmap + uint32_t node_req + char *nodes + uint32_t ncpus + uint32_t *sock_core_rep_count + uint16_t *sockets_per_node + uint16_t *tasks_per_node + uint16_t threads_per_core + uint8_t whole_node # -# Slurm Memory routines +# TRES # +ctypedef enum tres_types_t: + TRES_CPU = 1 + TRES_MEM + TRES_ENERGY + TRES_NODE + TRES_BILLING + TRES_FS_DISK + TRES_VMEM + TRES_PAGES + TRES_STATIC_CNT -cdef extern void slurm_xfree (void **) -cdef extern void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *) +# Global Environment +cdef extern char **environ -cdef inline xfree(void *__p): - slurm_xfree(&__p) +# Local slurm config +cdef extern slurm_conf_t slurm_conf -cdef inline void *xmalloc(size_t __sz): - return slurm_xcalloc(1, __sz, True, False, __FILE__, __LINE__, __FUNCTION__) +# +# Slurm Memory routines +# We simply use the macros from xmalloc.h - more convenient +# -cdef inline void *try_xmalloc(size_t __sz): - return slurm_xcalloc(1, __sz, True, True, __FILE__, __LINE__, __FUNCTION__) +cdef extern from "pyslurm/slurm/xmalloc.h" nogil: + void xfree(void *__p) + void *xmalloc(size_t __sz) + void *try_xmalloc(size_t __sz) -cdef inline void xfree_ptr(void *__p): - slurm_xfree(&__p) +cdef extern void slurm_xfree_ptr(void *) # # Slurm xstring functions @@ -49,6 +209,16 @@ cdef extern void slurm_free_job_step_info_members(job_step_info_t *msg) cdef extern char *slurm_job_state_string(uint16_t inx) cdef extern char *slurm_job_reason_string(int inx) cdef extern char *slurm_job_share_string(uint16_t shared) +cdef extern void slurm_free_update_step_msg(step_update_request_msg_t *msg) + +# +# Slurm Node functions +# + +cdef extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo, select_nodedata_type data_type, node_states state, void *data) +cdef extern char *slurm_node_state_string_complete(uint32_t inx) +cdef extern void slurm_free_update_node_msg(update_node_msg_t *msg) +cdef extern void slurm_free_node_info_members(node_info_t *node) # # Slurm environment functions @@ -63,6 +233,7 @@ cdef extern void slurm_env_array_free(char **env_array) # cdef extern char *slurm_preempt_mode_string (uint16_t preempt_mode) +cdef extern uint16_t slurm_preempt_mode_num (const char *preempt_mode) cdef extern char *slurm_node_state_string (uint32_t inx) cdef extern char *slurm_step_layout_type_name (task_dist_states_t task_dist) cdef extern char *slurm_reservation_flags_string (reserve_info_t *resv_ptr) @@ -71,3 +242,45 @@ cdef extern int slurm_addto_char_list_with_case(List char_list, char *names, boo cdef extern int slurm_addto_step_list(List step_list, char *names) cdef extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) cdef extern uint16_t slurm_get_track_wckey() +cdef extern void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type) + +# Slurm bit functions + +cdef extern bitstr_t *slurm_bit_alloc(bitoff_t nbits) +cdef extern void slurm_bit_set(bitstr_t *b, bitoff_t bit) +cdef extern int slurm_bit_test(bitstr_t *b, bitoff_t bit) +cdef extern char *slurm_bit_fmt(char *str, int32_t len, bitstr_t *b) +cdef extern void slurm_bit_free(bitstr_t **b) + + +cdef extern from *: + """ + #define bit_free(__b) slurm_bit_free((bitstr_t **)&(__b)) + #define FREE_NULL_BITMAP(_X) \ + do { \ + if (_X) \ + bit_free(_X); \ + _X = NULL; \ + } while(0) \ + """ + void bit_free(bitstr_t *_X) + void FREE_NULL_BITMAP(bitstr_t *_X) + +cdef extern char *slurm_hostlist_deranged_string_malloc(hostlist_t hl) + +# +# slurmdb functions +# + +cdef extern void slurmdb_job_cond_def_start_end(slurmdb_job_cond_t *job_cond) +cdef extern uint64_t slurmdb_find_tres_count_in_string(char *tres_str_in, int id) +cdef extern slurmdb_job_rec_t *slurmdb_create_job_rec() +cdef extern void slurmdb_init_assoc_rec(slurmdb_assoc_rec_t *assoc, bool free_it) +cdef extern void slurmdb_init_tres_cond(slurmdb_tres_cond_t *tres, bool free_it) + +# +# Slurm Partition functions +# + +cdef extern void slurm_free_update_part_msg(update_part_msg_t *msg) +cdef extern void slurm_free_partition_info_members(partition_info_t *node) diff --git a/pyslurm/slurm/header.pxi b/pyslurm/slurm/slurm.h.pxi similarity index 63% rename from pyslurm/slurm/header.pxi rename to pyslurm/slurm/slurm.h.pxi index 8dc36a95..3605e5a7 100644 --- a/pyslurm/slurm/header.pxi +++ b/pyslurm/slurm/slurm.h.pxi @@ -1,272 +1,58 @@ -cdef extern from "slurm/slurm_errno.h": - - uint8_t SLURM_SUCCESS - uint8_t ESPANK_SUCCESS - int8_t SLURM_ERROR - -cdef extern from "slurm/slurm_errno.h": - - ctypedef enum slurm_err_t: - SLURM_UNEXPECTED_MSG_ERROR - SLURM_COMMUNICATIONS_CONNECTION_ERROR - SLURM_COMMUNICATIONS_SEND_ERROR - SLURM_COMMUNICATIONS_RECEIVE_ERROR - SLURM_COMMUNICATIONS_SHUTDOWN_ERROR - SLURM_PROTOCOL_VERSION_ERROR - SLURM_PROTOCOL_IO_STREAM_VERSION_ERROR - SLURM_PROTOCOL_AUTHENTICATION_ERROR - SLURM_PROTOCOL_INSANE_MSG_LENGTH - SLURM_MPI_PLUGIN_NAME_INVALID - SLURM_MPI_PLUGIN_PRELAUNCH_SETUP_FAILED - SLURM_PLUGIN_NAME_INVALID - SLURM_UNKNOWN_FORWARD_ADDR - SLURM_COMMUNICATIONS_MISSING_SOCKET_ERROR - SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR - SLURMCTLD_COMMUNICATIONS_SEND_ERROR - SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR - SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR - SLURM_NO_CHANGE_IN_DATA - ESLURM_INVALID_PARTITION_NAME - ESLURM_DEFAULT_PARTITION_NOT_SET - ESLURM_ACCESS_DENIED - ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP - ESLURM_REQUESTED_NODES_NOT_IN_PARTITION - ESLURM_TOO_MANY_REQUESTED_CPUS - ESLURM_INVALID_NODE_COUNT - ESLURM_ERROR_ON_DESC_TO_RECORD_COPY - ESLURM_JOB_MISSING_SIZE_SPECIFICATION - ESLURM_JOB_SCRIPT_MISSING - ESLURM_USER_ID_MISSING - ESLURM_DUPLICATE_JOB_ID - ESLURM_PATHNAME_TOO_LONG - ESLURM_NOT_TOP_PRIORITY - ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE - ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE - ESLURM_NODES_BUSY - ESLURM_INVALID_JOB_ID - ESLURM_INVALID_NODE_NAME - ESLURM_WRITING_TO_FILE - ESLURM_TRANSITION_STATE_NO_UPDATE - ESLURM_ALREADY_DONE - ESLURM_INTERCONNECT_FAILURE - ESLURM_BAD_DIST - ESLURM_JOB_PENDING - ESLURM_BAD_TASK_COUNT - ESLURM_INVALID_JOB_CREDENTIAL - ESLURM_IN_STANDBY_MODE - ESLURM_INVALID_NODE_STATE - ESLURM_INVALID_FEATURE - ESLURM_INVALID_AUTHTYPE_CHANGE - ESLURM_ACTIVE_FEATURE_NOT_SUBSET - ESLURM_INVALID_SCHEDTYPE_CHANGE - ESLURM_INVALID_SELECTTYPE_CHANGE - ESLURM_INVALID_SWITCHTYPE_CHANGE - ESLURM_FRAGMENTATION - ESLURM_NOT_SUPPORTED - ESLURM_DISABLED - ESLURM_DEPENDENCY - ESLURM_BATCH_ONLY - ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED - ESLURM_TASKDIST_REQUIRES_OVERCOMMIT - ESLURM_JOB_HELD - ESLURM_INVALID_CRED_TYPE_CHANGE - ESLURM_INVALID_TASK_MEMORY - ESLURM_INVALID_ACCOUNT - ESLURM_INVALID_PARENT_ACCOUNT - ESLURM_SAME_PARENT_ACCOUNT - ESLURM_INVALID_LICENSES - ESLURM_NEED_RESTART - ESLURM_ACCOUNTING_POLICY - ESLURM_INVALID_TIME_LIMIT - ESLURM_RESERVATION_ACCESS - ESLURM_RESERVATION_INVALID - ESLURM_INVALID_TIME_VALUE - ESLURM_RESERVATION_BUSY - ESLURM_RESERVATION_NOT_USABLE - ESLURM_INVALID_WCKEY - ESLURM_RESERVATION_OVERLAP - ESLURM_PORTS_BUSY - ESLURM_PORTS_INVALID - ESLURM_PROLOG_RUNNING - ESLURM_NO_STEPS - ESLURM_INVALID_BLOCK_STATE - ESLURM_INVALID_BLOCK_LAYOUT - ESLURM_INVALID_BLOCK_NAME - ESLURM_INVALID_QOS - ESLURM_QOS_PREEMPTION_LOOP - ESLURM_NODE_NOT_AVAIL - ESLURM_INVALID_CPU_COUNT - ESLURM_PARTITION_NOT_AVAIL - ESLURM_CIRCULAR_DEPENDENCY - ESLURM_INVALID_GRES - ESLURM_JOB_NOT_PENDING - ESLURM_QOS_THRES - ESLURM_PARTITION_IN_USE - ESLURM_STEP_LIMIT - ESLURM_JOB_SUSPENDED - ESLURM_CAN_NOT_START_IMMEDIATELY - ESLURM_INTERCONNECT_BUSY - ESLURM_RESERVATION_EMPTY - ESLURM_INVALID_ARRAY - ESLURM_RESERVATION_NAME_DUP - ESLURM_JOB_STARTED - ESLURM_JOB_FINISHED - ESLURM_JOB_NOT_RUNNING - ESLURM_JOB_NOT_PENDING_NOR_RUNNING - ESLURM_JOB_NOT_SUSPENDED - ESLURM_JOB_NOT_FINISHED - ESLURM_TRIGGER_DUP - ESLURM_INTERNAL - ESLURM_INVALID_BURST_BUFFER_CHANGE - ESLURM_BURST_BUFFER_PERMISSION - ESLURM_BURST_BUFFER_LIMIT - ESLURM_INVALID_BURST_BUFFER_REQUEST - ESLURM_PRIO_RESET_FAIL - ESLURM_CANNOT_MODIFY_CRON_JOB - ESLURM_INVALID_MCS_LABEL - ESLURM_BURST_BUFFER_WAIT - ESLURM_PARTITION_DOWN - ESLURM_DUPLICATE_GRES - ESLURM_JOB_SETTING_DB_INX - ESLURM_RSV_ALREADY_STARTED - ESLURM_SUBMISSIONS_DISABLED - ESLURM_NOT_HET_JOB - ESLURM_NOT_HET_JOB_LEADER - ESLURM_NOT_WHOLE_HET_JOB - ESLURM_CORE_RESERVATION_UPDATE - ESLURM_DUPLICATE_STEP_ID - ESLURM_INVALID_CORE_CNT - ESLURM_X11_NOT_AVAIL - ESLURM_GROUP_ID_MISSING - ESLURM_BATCH_CONSTRAINT - ESLURM_INVALID_TRES - ESLURM_INVALID_TRES_BILLING_WEIGHTS - ESLURM_INVALID_JOB_DEFAULTS - ESLURM_RESERVATION_MAINT - ESLURM_INVALID_GRES_TYPE - ESLURM_REBOOT_IN_PROGRESS - ESLURM_MULTI_KNL_CONSTRAINT - ESLURM_UNSUPPORTED_GRES - ESLURM_INVALID_NICE - ESLURM_INVALID_TIME_MIN_LIMIT - ESLURM_DEFER - ESLURM_CONFIGLESS_DISABLED - ESLURM_ENVIRONMENT_MISSING - ESLURM_RESERVATION_NO_SKIP - ESLURM_RESERVATION_USER_GROUP - ESLURM_PARTITION_ASSOC - ESLURM_IN_STANDBY_USE_BACKUP - ESLURM_BAD_THREAD_PER_CORE - ESLURM_INVALID_PREFER - ESLURM_INSUFFICIENT_GRES - ESPANK_ERROR - ESPANK_BAD_ARG - ESPANK_NOT_TASK - ESPANK_ENV_EXISTS - ESPANK_ENV_NOEXIST - ESPANK_NOSPACE - ESPANK_NOT_REMOTE - ESPANK_NOEXIST - ESPANK_NOT_EXECD - ESPANK_NOT_AVAIL - ESPANK_NOT_LOCAL - ESLURMD_PIPE_ERROR_ON_TASK_SPAWN - ESLURMD_KILL_TASK_FAILED - ESLURMD_KILL_JOB_ALREADY_COMPLETE - ESLURMD_INVALID_ACCT_FREQ - ESLURMD_INVALID_JOB_CREDENTIAL - ESLURMD_UID_NOT_FOUND - ESLURMD_GID_NOT_FOUND - ESLURMD_CREDENTIAL_EXPIRED - ESLURMD_CREDENTIAL_REVOKED - ESLURMD_CREDENTIAL_REPLAYED - ESLURMD_CREATE_BATCH_DIR_ERROR - ESLURMD_MODIFY_BATCH_DIR_ERROR - ESLURMD_CREATE_BATCH_SCRIPT_ERROR - ESLURMD_MODIFY_BATCH_SCRIPT_ERROR - ESLURMD_SETUP_ENVIRONMENT_ERROR - ESLURMD_SHARED_MEMORY_ERROR - ESLURMD_SET_UID_OR_GID_ERROR - ESLURMD_SET_SID_ERROR - ESLURMD_CANNOT_SPAWN_IO_THREAD - ESLURMD_FORK_FAILED - ESLURMD_EXECVE_FAILED - ESLURMD_IO_ERROR - ESLURMD_PROLOG_FAILED - ESLURMD_EPILOG_FAILED - ESLURMD_SESSION_KILLED - ESLURMD_TOOMANYSTEPS - ESLURMD_STEP_EXISTS - ESLURMD_JOB_NOTRUNNING - ESLURMD_STEP_SUSPENDED - ESLURMD_STEP_NOTSUSPENDED - ESLURMD_INVALID_SOCKET_NAME_LEN - ESLURMD_CONTAINER_RUNTIME_INVALID - ESLURMD_CPU_BIND_ERROR - ESCRIPT_CHDIR_FAILED - ESCRIPT_OPEN_OUTPUT_FAILED - ESCRIPT_NON_ZERO_RETURN - SLURM_PROTOCOL_SOCKET_IMPL_ZERO_RECV_LENGTH - SLURM_PROTOCOL_SOCKET_IMPL_NEGATIVE_RECV_LENGTH - SLURM_PROTOCOL_SOCKET_IMPL_NOT_ALL_DATA_SENT - ESLURM_PROTOCOL_INCOMPLETE_PACKET - SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT - SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT - ESLURM_AUTH_CRED_INVALID - ESLURM_AUTH_FOPEN_ERROR - ESLURM_AUTH_NET_ERROR - ESLURM_AUTH_UNABLE_TO_SIGN - ESLURM_AUTH_BADARG - ESLURM_AUTH_MEMORY - ESLURM_AUTH_INVALID - ESLURM_AUTH_UNPACK - ESLURM_AUTH_SKIP - ESLURM_DB_CONNECTION - ESLURM_JOBS_RUNNING_ON_ASSOC - ESLURM_CLUSTER_DELETED - ESLURM_ONE_CHANGE - ESLURM_BAD_NAME - ESLURM_OVER_ALLOCATE - ESLURM_RESULT_TOO_LARGE - ESLURM_DB_QUERY_TOO_WIDE - ESLURM_DB_CONNECTION_INVALID - ESLURM_NO_REMOVE_DEFAULT_ACCOUNT - ESLURM_FED_CLUSTER_MAX_CNT - ESLURM_FED_CLUSTER_MULTIPLE_ASSIGNMENT - ESLURM_INVALID_CLUSTER_FEATURE - ESLURM_JOB_NOT_FEDERATED - ESLURM_INVALID_CLUSTER_NAME - ESLURM_FED_JOB_LOCK - ESLURM_FED_NO_VALID_CLUSTERS - ESLURM_MISSING_TIME_LIMIT - ESLURM_INVALID_KNL - ESLURM_PLUGIN_INVALID - ESLURM_PLUGIN_INCOMPLETE - ESLURM_REST_INVALID_QUERY - ESLURM_REST_FAIL_PARSING - ESLURM_REST_INVALID_JOBS_DESC - ESLURM_REST_EMPTY_RESULT - ESLURM_DATA_PATH_NOT_FOUND - ESLURM_DATA_PTR_NULL - ESLURM_DATA_CONV_FAILED - ESLURM_DATA_REGEX_COMPILE - ESLURM_DATA_UNKNOWN_MIME_TYPE - ESLURM_DATA_TOO_LARGE - ESLURM_CONTAINER_NOT_CONFIGURED - - char* slurm_strerror(int errnum) - - void slurm_seterrno(int errnum) - - int slurm_get_errno() - - void slurm_perror(char* msg) +############################################################################## +# NOTICE: This File has been generated by scripts/pyslurm_bindgen.py, which +# uses the autopxd2 tool in order to generate Cython compatible definitions +# from the slurm.h C-Header file. Basically, this can be seen as a modified +# version of the original header, with the following changes: +# +# * have the correct cython syntax for type definitions, e.g. "typedef struct +# " is converted to "ctypedef struct " +# * C-Macros are listed with their appropriate uint type +# * Any definitions that cannot be translated are not included in this file +# +# Generated on 2023-05-06T18:02:46.408139 +# +# The Original Copyright notice from slurm.h has been included +# below: +# +############################################################################## +# slurm.h - Definitions for all of the Slurm RPCs +############################################################################# +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008-2010 Lawrence Livermore National Security. +# Portions Copyright (C) 2010-2017 SchedMD LLC . +# Portions Copyright (C) 2012-2013 Los Alamos National Security, LLC. +# Portions Copyright 2013 Hewlett Packard Enterprise Development LP +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette , et. al. +# CODE-OCEC-09-009. All rights reserved. +# +# Slurm is licensed under the GNU GPLv2. For the full text of Slurm's License, +# please see here: pyslurm/slurm/SLURM_LICENSE +# +# Please, as mentioned above, also have a look at Slurm's DISCLAIMER under +# pyslurm/slurm/SLURM_DISCLAIMER +############################################################################## +# +# Copyright (C) 2023 PySlurm Developers (Modifications as described above) +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. cdef extern from "slurm/slurm.h": - enum: - SLURM_VERSION_NUMBER - uint8_t SYSTEM_DIMENSIONS uint8_t HIGHEST_DIMENSIONS uint8_t HOST_NAME_MAX @@ -281,6 +67,7 @@ cdef extern from "slurm/slurm.h": uint64_t NO_CONSUME_VAL64 uint16_t MAX_TASKS_PER_NODE uint32_t MAX_JOB_ID + uint8_t MAX_HET_JOB_COMPONENTS uint8_t MAX_FED_CLUSTERS uint32_t SLURM_MAX_NORMAL_STEP_ID uint32_t SLURM_PENDING_STEP @@ -350,6 +137,9 @@ cdef extern from "slurm/slurm.h": uint16_t SLURM_DIST_NODESOCKMASK uint8_t OPEN_MODE_APPEND uint8_t OPEN_MODE_TRUNCATE + uint8_t CPU_BIND_T_TO_MASK + uint32_t CPU_BIND_T_AUTO_TO_MASK + uint16_t CPU_BIND_T_MASK uint32_t CPU_FREQ_RANGE_FLAG uint32_t CPU_FREQ_LOW uint32_t CPU_FREQ_MEDIUM @@ -362,6 +152,8 @@ cdef extern from "slurm/slurm.h": uint32_t CPU_FREQ_USERSPACE uint32_t CPU_FREQ_SCHEDUTIL uint32_t CPU_FREQ_GOV_MASK + uint8_t MEM_BIND_TYPE_MASK + uint8_t MEM_BIND_TYPE_FLAGS_MASK uint8_t NODE_STATE_BASE uint32_t NODE_STATE_FLAGS uint8_t NODE_STATE_NET @@ -415,7 +207,6 @@ cdef extern from "slurm/slurm.h": uint8_t PRIVATE_DATA_USERS uint8_t PRIVATE_DATA_ACCOUNTS uint8_t PRIVATE_DATA_RESERVATIONS - uint8_t PRIVATE_CLOUD_NODES uint16_t PRIVATE_DATA_EVENTS uint8_t PRIORITY_RESET_NONE uint8_t PRIORITY_RESET_NOW @@ -471,7 +262,10 @@ cdef extern from "slurm/slurm.h": uint32_t JOB_PART_ASSIGNED uint64_t BACKFILL_SCHED uint64_t BACKFILL_LAST + uint64_t TASKS_CHANGED uint64_t JOB_SEND_SCRIPT + uint64_t RESET_LIC_TASK + uint64_t RESET_LIC_JOB uint8_t X11_FORWARD_ALL uint8_t X11_FORWARD_BATCH uint8_t X11_FORWARD_FIRST @@ -493,6 +287,7 @@ cdef extern from "slurm/slurm.h": uint8_t PART_FLAG_REQ_RESV uint8_t PART_FLAG_LLN uint8_t PART_FLAG_EXCLUSIVE_USER + uint8_t PART_FLAG_PDOI uint16_t PART_FLAG_DEFAULT_CLR uint16_t PART_FLAG_HIDDEN_CLR uint16_t PART_FLAG_NO_ROOT_CLR @@ -500,6 +295,7 @@ cdef extern from "slurm/slurm.h": uint16_t PART_FLAG_REQ_RESV_CLR uint16_t PART_FLAG_LLN_CLR uint16_t PART_FLAG_EXC_USER_CLR + uint16_t PART_FLAG_PDOI_CLR uint8_t RESERVE_FLAG_MAINT uint8_t RESERVE_FLAG_NO_MAINT uint8_t RESERVE_FLAG_DAILY @@ -580,7 +376,7 @@ cdef extern from "slurm/slurm.h": uint64_t DEBUG_FLAG_TIME_CRAY uint64_t DEBUG_FLAG_DB_ARCHIVE uint64_t DEBUG_FLAG_DB_TRES - uint64_t DEBUG_FLAG_ESEARCH + uint64_t DEBUG_FLAG_JOBCOMP uint64_t DEBUG_FLAG_NODE_FEATURES uint64_t DEBUG_FLAG_FEDR uint64_t DEBUG_FLAG_HETJOB @@ -599,9 +395,11 @@ cdef extern from "slurm/slurm.h": uint16_t PREEMPT_MODE_GANG uint8_t RECONFIG_KEEP_PART_INFO uint8_t RECONFIG_KEEP_PART_STAT + uint8_t RECONFIG_KEEP_POWER_SAVE_SETTINGS uint8_t HEALTH_CHECK_NODE_IDLE uint8_t HEALTH_CHECK_NODE_ALLOC uint8_t HEALTH_CHECK_NODE_MIXED + uint8_t HEALTH_CHECK_NODE_NONDRAINED_IDLE uint16_t HEALTH_CHECK_CYCLE uint8_t HEALTH_CHECK_NODE_ANY uint8_t PROLOG_FLAG_ALLOC @@ -610,6 +408,7 @@ cdef extern from "slurm/slurm.h": uint8_t PROLOG_FLAG_SERIAL uint8_t PROLOG_FLAG_X11 uint8_t PROLOG_FLAG_DEFER_BATCH + uint8_t PROLOG_FLAG_FORCE_REQUEUE_ON_FAIL uint8_t CTL_CONF_OR uint8_t CTL_CONF_SJC uint8_t CTL_CONF_DRJ @@ -618,6 +417,7 @@ cdef extern from "slurm/slurm.h": uint8_t CTL_CONF_WCKEY uint8_t CTL_CONF_IPV4_ENABLED uint8_t CTL_CONF_IPV6_ENABLED + uint16_t CTL_CONF_SJX uint16_t CTL_CONF_SJS uint16_t CTL_CONF_SJE uint8_t LOG_FMT_ISO8601_MS @@ -627,6 +427,7 @@ cdef extern from "slurm/slurm.h": uint8_t LOG_FMT_CLOCK uint8_t LOG_FMT_SHORT uint8_t LOG_FMT_THREAD_ID + uint8_t LOG_FMT_RFC3339 uint8_t STAT_COMMAND_RESET uint8_t STAT_COMMAND_GET uint8_t TRIGGER_FLAG_PERM @@ -657,6 +458,8 @@ cdef extern from "slurm/slurm.h": uint32_t TRIGGER_TYPE_PRI_DB_FAIL uint32_t TRIGGER_TYPE_PRI_DB_RES_OP uint32_t TRIGGER_TYPE_BURST_BUFFER + uint32_t TRIGGER_TYPE_DRAINING + uint32_t TRIGGER_TYPE_RESUME uint8_t ASSOC_MGR_INFO_FLAG_ASSOC uint8_t ASSOC_MGR_INFO_FLAG_USERS uint8_t ASSOC_MGR_INFO_FLAG_QOS @@ -669,6 +472,7 @@ cdef extern from "slurm/slurm.h": uint8_t KILL_OOM uint8_t KILL_NO_SIBS uint16_t KILL_JOB_RESV + uint16_t KILL_NO_CRON uint16_t WARN_SENT uint8_t BB_FLAG_DISABLE_PERSISTENT uint8_t BB_FLAG_ENABLE_PERSISTENT @@ -720,7 +524,7 @@ cdef extern from "slurm/slurm.h": unsigned char type unsigned char hash[32] - cpdef enum job_states: + cdef enum job_states: JOB_PENDING JOB_RUNNING JOB_SUSPENDED @@ -735,7 +539,7 @@ cdef extern from "slurm/slurm.h": JOB_OOM JOB_END - cpdef enum job_state_reason: + cdef enum job_state_reason: WAIT_NO_REASON WAIT_PRIORITY WAIT_DEPENDENCY @@ -769,8 +573,8 @@ cdef extern from "slurm/slurm.h": WAIT_QOS_JOB_LIMIT WAIT_QOS_RESOURCE_LIMIT WAIT_QOS_TIME_LIMIT - WAIT_BLOCK_MAX_ERR - WAIT_BLOCK_D_ACTION + FAIL_SIGNAL + DEFUNCT_WAIT_34 WAIT_CLEANING WAIT_PROLOG WAIT_QOS @@ -936,25 +740,25 @@ cdef extern from "slurm/slurm.h": WAIT_QOS_MIN_BILLING WAIT_RESV_DELETED - cpdef enum job_acct_types: + cdef enum job_acct_types: JOB_START JOB_STEP JOB_SUSPEND JOB_TERMINATED - cpdef enum auth_plugin_type: + cdef enum auth_plugin_type: AUTH_PLUGIN_NONE AUTH_PLUGIN_MUNGE AUTH_PLUGIN_JWT - cpdef enum hash_plugin_type: + cdef enum hash_plugin_type: HASH_PLUGIN_DEFAULT HASH_PLUGIN_NONE HASH_PLUGIN_K12 HASH_PLUGIN_SHA256 HASH_PLUGIN_CNT - cpdef enum select_plugin_type: + cdef enum select_plugin_type: SELECT_PLUGIN_CONS_RES SELECT_PLUGIN_LINEAR SELECT_PLUGIN_SERIAL @@ -963,27 +767,23 @@ cdef extern from "slurm/slurm.h": SELECT_PLUGIN_CONS_TRES SELECT_PLUGIN_CRAY_CONS_TRES - cpdef enum switch_plugin_type: + cdef enum switch_plugin_type: SWITCH_PLUGIN_NONE SWITCH_PLUGIN_GENERIC SWITCH_PLUGIN_CRAY SWITCH_PLUGIN_SLINGSHOT - cpdef enum select_jobdata_type: - SELECT_JOBDATA_PAGG_ID - SELECT_JOBDATA_PTR - SELECT_JOBDATA_CLEANING + cdef enum select_jobdata_type: SELECT_JOBDATA_NETWORK - SELECT_JOBDATA_RELEASED - cpdef enum select_nodedata_type: + cdef enum select_nodedata_type: SELECT_NODEDATA_SUBCNT SELECT_NODEDATA_PTR SELECT_NODEDATA_MEM_ALLOC SELECT_NODEDATA_TRES_ALLOC_FMT_STR SELECT_NODEDATA_TRES_ALLOC_WEIGHTED - cpdef enum select_print_mode: + cdef enum select_print_mode: SELECT_PRINT_HEAD SELECT_PRINT_DATA SELECT_PRINT_MIXED @@ -1002,7 +802,7 @@ cdef extern from "slurm/slurm.h": SELECT_PRINT_RESV_ID SELECT_PRINT_START_LOC - cpdef enum select_node_cnt: + cdef enum select_node_cnt: SELECT_GET_NODE_SCALING SELECT_GET_NODE_CPU_CNT SELECT_GET_MP_CPU_CNT @@ -1011,19 +811,19 @@ cdef extern from "slurm/slurm.h": SELECT_SET_NODE_CNT SELECT_SET_MP_CNT - cpdef enum acct_gather_profile_info: + cdef enum acct_gather_profile_info: ACCT_GATHER_PROFILE_DIR ACCT_GATHER_PROFILE_DEFAULT ACCT_GATHER_PROFILE_RUNNING - cpdef enum jobacct_data_type: + cdef enum jobacct_data_type: JOBACCT_DATA_TOTAL JOBACCT_DATA_PIPE JOBACCT_DATA_RUSAGE JOBACCT_DATA_TOT_VSIZE JOBACCT_DATA_TOT_RSS - cpdef enum acct_energy_type: + cdef enum acct_energy_type: ENERGY_DATA_JOULES_TASK ENERGY_DATA_STRUCT ENERGY_DATA_RECONFIG @@ -1034,7 +834,12 @@ cdef extern from "slurm/slurm.h": ENERGY_DATA_NODE_ENERGY_UP ENERGY_DATA_STEP_PTR - cpdef enum task_dist_states: + ctypedef enum update_mode_t: + UPDATE_SET + UPDATE_ADD + UPDATE_REMOVE + + cdef enum task_dist_states: SLURM_DIST_CYCLIC SLURM_DIST_BLOCK SLURM_DIST_ARBITRARY @@ -1075,7 +880,7 @@ cdef extern from "slurm/slurm.h": ctypedef task_dist_states task_dist_states_t - cpdef enum cpu_bind_type: + cdef enum cpu_bind_type: CPU_BIND_VERBOSE CPU_BIND_TO_THREADS CPU_BIND_TO_CORES @@ -1097,7 +902,7 @@ cdef extern from "slurm/slurm.h": ctypedef cpu_bind_type cpu_bind_type_t - cpdef enum mem_bind_type: + cdef enum mem_bind_type: MEM_BIND_VERBOSE MEM_BIND_NONE MEM_BIND_RANK @@ -1109,14 +914,14 @@ cdef extern from "slurm/slurm.h": ctypedef mem_bind_type mem_bind_type_t - cpdef enum accel_bind_type: + cdef enum accel_bind_type: ACCEL_BIND_VERBOSE ACCEL_BIND_CLOSEST_GPU ACCEL_BIND_CLOSEST_NIC ctypedef accel_bind_type accel_bind_type_t - cpdef enum node_states: + cdef enum node_states: NODE_STATE_UNKNOWN NODE_STATE_DOWN NODE_STATE_IDLE @@ -1136,23 +941,27 @@ cdef extern from "slurm/slurm.h": SSF_MEM_ZERO SSF_OVERLAP_FORCE - void slurm_init(char* conf) + void slurm_init(const char* conf) void slurm_fini() + void slurm_client_init_plugins() + + void slurm_client_fini_plugins() + ctypedef hostlist* hostlist_t - hostlist_t slurm_hostlist_create(char* hostlist) + hostlist_t slurm_hostlist_create(const char* hostlist) int slurm_hostlist_count(hostlist_t hl) void slurm_hostlist_destroy(hostlist_t hl) - int slurm_hostlist_find(hostlist_t hl, char* hostname) + int slurm_hostlist_find(hostlist_t hl, const char* hostname) - int slurm_hostlist_push(hostlist_t hl, char* hosts) + int slurm_hostlist_push(hostlist_t hl, const char* hosts) - int slurm_hostlist_push_host(hostlist_t hl, char* host) + int slurm_hostlist_push_host(hostlist_t hl, const char* host) ssize_t slurm_hostlist_ranged_string(hostlist_t hl, size_t n, char* buf) @@ -1166,8 +975,12 @@ cdef extern from "slurm/slurm.h": ctypedef xlist* List + ctypedef xlist list_t + ctypedef listIterator* ListIterator + ctypedef listIterator list_itr_t + ctypedef void (*ListDelF)(void* x) ctypedef int (*ListCmpF)(void* x, void* y) @@ -1176,29 +989,29 @@ cdef extern from "slurm/slurm.h": ctypedef int (*ListForF)(void* x, void* arg) - void* slurm_list_append(List l, void* x) + void slurm_list_append(list_t* l, void* x) - int slurm_list_count(List l) + int slurm_list_count(list_t* l) - List slurm_list_create(ListDelF f) + list_t* slurm_list_create(ListDelF f) - void slurm_list_destroy(List l) + void slurm_list_destroy(list_t* l) - void* slurm_list_find(ListIterator i, ListFindF f, void* key) + void* slurm_list_find(list_itr_t* i, ListFindF f, void* key) - int slurm_list_is_empty(List l) + int slurm_list_is_empty(list_t* l) - ListIterator slurm_list_iterator_create(List l) + list_itr_t* slurm_list_iterator_create(list_t* l) - void slurm_list_iterator_reset(ListIterator i) + void slurm_list_iterator_reset(list_itr_t* i) - void slurm_list_iterator_destroy(ListIterator i) + void slurm_list_iterator_destroy(list_itr_t* i) - void* slurm_list_next(ListIterator i) + void* slurm_list_next(list_itr_t* i) - void slurm_list_sort(List l, ListCmpF f) + void slurm_list_sort(list_t* l, ListCmpF f) - void* slurm_list_pop(List l) + void* slurm_list_pop(list_t* l) ctypedef int64_t bitstr_t @@ -1261,6 +1074,7 @@ cdef extern from "slurm/slurm.h": char* comment uint16_t contiguous char* container + char* container_id uint16_t core_spec char* cpu_bind uint16_t cpu_bind_type @@ -1276,8 +1090,8 @@ cdef extern from "slurm/slurm.h": char** environment slurm_hash_t env_hash uint32_t env_size - char* extra char* exc_nodes + char* extra char* features uint64_t fed_siblings_active uint64_t fed_siblings_viable @@ -1286,8 +1100,10 @@ cdef extern from "slurm/slurm.h": uint16_t immediate uint32_t job_id char* job_id_str + char* job_size_str uint16_t kill_on_node_fail char* licenses + char* licenses_tot uint16_t mail_type char* mail_user char* mcs_label @@ -1358,7 +1174,6 @@ cdef extern from "slurm/slurm.h": uint32_t pn_min_tmp_disk char* req_context uint32_t req_switch - dynamic_plugin_data_t* select_jobinfo char* selinux_context char* std_err char* std_in @@ -1397,6 +1212,7 @@ cdef extern from "slurm/slurm.h": char* command char* comment char* container + char* container_id uint16_t contiguous uint16_t core_spec uint16_t cores_per_socket @@ -1416,6 +1232,8 @@ cdef extern from "slurm/slurm.h": char* exc_nodes int32_t* exc_node_inx uint32_t exit_code + char* extra + char* failed_node char* features char* fed_origin_str uint64_t fed_siblings_active @@ -1431,6 +1249,7 @@ cdef extern from "slurm/slurm.h": uint32_t het_job_offset uint32_t job_id job_resources_t* job_resrcs + char* job_size_str uint32_t job_state time_t last_sched_eval char* licenses @@ -1474,7 +1293,6 @@ cdef extern from "slurm/slurm.h": uint16_t restart_cnt char* resv_name char* sched_nodes - dynamic_plugin_data_t* select_jobinfo char* selinux_context uint16_t shared uint16_t show_flags @@ -1484,7 +1302,7 @@ cdef extern from "slurm/slurm.h": time_t start_time uint16_t start_protocol_ver char* state_desc - uint16_t state_reason + uint32_t state_reason char* std_err char* std_in char* std_out @@ -1512,29 +1330,34 @@ cdef extern from "slurm/slurm.h": ctypedef slurm_job_info_t job_info_t - cdef struct priority_factors_object: - char* cluster_name - uint32_t job_id - char* partition - uint32_t user_id + ctypedef struct priority_factors_t: + uint32_t nice double priority_age double priority_assoc double priority_fs double priority_js double priority_part double priority_qos - double direct_prio uint32_t priority_site double* priority_tres uint32_t tres_cnt char** tres_names double* tres_weights - uint32_t nice + + cdef struct priority_factors_object: + char* account + char* cluster_name + double direct_prio + uint32_t job_id + char* partition + priority_factors_t* prio_factors + char* qos + uint32_t user_id ctypedef priority_factors_object priority_factors_object_t cdef struct priority_factors_response_msg: - List priority_factors_list + list_t* priority_factors_list ctypedef priority_factors_response_msg priority_factors_response_msg_t @@ -1553,6 +1376,12 @@ cdef extern from "slurm/slurm.h": ctypedef step_update_request_msg step_update_request_msg_t + cdef struct suspend_exc_update_msg: + char* update_str + update_mode_t mode + + ctypedef suspend_exc_update_msg suspend_exc_update_msg_t + ctypedef struct slurm_step_layout_req_t: char* node_list uint16_t* cpus_per_node @@ -1662,7 +1491,7 @@ cdef extern from "slurm/slurm.h": ctypedef srun_step_missing_msg srun_step_missing_msg_t - cpdef enum suspend_opts: + cdef enum suspend_opts: SUSPEND_JOB RESUME_JOB @@ -1700,7 +1529,6 @@ cdef extern from "slurm/slurm.h": char* remote_error_filename char* remote_input_filename slurm_step_io_fds_t local_fds - uint32_t gid bool multi_prog bool no_alloc uint32_t slurmd_debug @@ -1800,6 +1628,7 @@ cdef extern from "slurm/slurm.h": uint32_t array_task_id char* cluster char* container + char* container_id uint32_t cpu_freq_min uint32_t cpu_freq_max uint32_t cpu_freq_gov @@ -1814,7 +1643,6 @@ cdef extern from "slurm/slurm.h": char* partition char* resv_ports time_t run_time - dynamic_plugin_data_t* select_jobinfo char* srun_host uint32_t srun_pid time_t start_time @@ -1846,7 +1674,7 @@ cdef extern from "slurm/slurm.h": uint32_t pid_cnt ctypedef struct job_step_pids_response_msg_t: - List pid_list + list_t* pid_list slurm_step_id_t step_id ctypedef struct job_step_stat_t: @@ -1856,7 +1684,7 @@ cdef extern from "slurm/slurm.h": job_step_pids_t* step_pids ctypedef struct job_step_stat_response_msg_t: - List stats_list + list_t* stats_list slurm_step_id_t step_id cdef struct node_info: @@ -1899,6 +1727,8 @@ cdef extern from "slurm/slurm.h": char* reason time_t reason_time uint32_t reason_uid + time_t resume_after + char* resv_name dynamic_plugin_data_t* select_nodeinfo time_t slurmd_start_time uint16_t sockets @@ -2003,9 +1833,10 @@ cdef extern from "slurm/slurm.h": char* deny_qos uint16_t flags uint32_t grace_time - List job_defaults_list + list_t* job_defaults_list char* job_defaults_str uint32_t max_cpus_per_node + uint32_t max_cpus_per_socket uint64_t max_mem_per_cpu uint32_t max_nodes uint16_t max_share @@ -2048,6 +1879,8 @@ cdef extern from "slurm/slurm.h": uint32_t env_size char** environment uint32_t error_code + gid_t gid + char* group_name char* job_submit_user_msg slurm_addr_t* node_addr uint32_t node_cnt @@ -2061,7 +1894,9 @@ cdef extern from "slurm/slurm.h": uint64_t pn_min_memory char* qos char* resv_name - dynamic_plugin_data_t* select_jobinfo + char* tres_per_node + uid_t uid + char* user_name void* working_cluster_rec ctypedef resource_allocation_response_msg resource_allocation_response_msg_t @@ -2078,7 +1913,7 @@ cdef extern from "slurm/slurm.h": char* job_submit_user_msg char* node_list char* part_name - List preemptee_job_id + list_t* preemptee_job_id uint32_t proc_cnt time_t start_time double sys_usage_per @@ -2094,6 +1929,7 @@ cdef extern from "slurm/slurm.h": cdef struct reserve_info: char* accounts char* burst_buffer + char* comment uint32_t core_cnt uint32_t core_spec_cnt resv_core_spec_t* core_spec @@ -2126,6 +1962,7 @@ cdef extern from "slurm/slurm.h": cdef struct resv_desc_msg: char* accounts char* burst_buffer + char* comment uint32_t* core_cnt uint32_t duration time_t end_time @@ -2210,6 +2047,7 @@ cdef extern from "slurm/slurm.h": char* fed_params uint32_t first_job_id uint16_t fs_dampening_factor + uint16_t getnameinfo_cache_timeout uint16_t get_env_timeout char* gres_plugins uint16_t group_time @@ -2235,7 +2073,7 @@ cdef extern from "slurm/slurm.h": char* job_container_plugin char* job_credential_private_key char* job_credential_public_certificate - List job_defaults_list + list_t* job_defaults_list uint16_t job_file_append uint16_t job_requeue char* job_submit_plugins @@ -2245,12 +2083,12 @@ cdef extern from "slurm/slurm.h": uint16_t kill_on_bad_exit uint16_t kill_wait char* launch_params - char* launch_type char* licenses uint16_t log_fmt char* mail_domain char* mail_prog uint32_t max_array_sz + uint32_t max_batch_requeue uint32_t max_dbd_msgs uint32_t max_job_cnt uint32_t max_job_id @@ -2276,6 +2114,7 @@ cdef extern from "slurm/slurm.h": char* power_plugin uint32_t preempt_exempt_time uint16_t preempt_mode + char* preempt_params char* preempt_type char* prep_params char* prep_plugins @@ -2336,8 +2175,6 @@ cdef extern from "slurm/slurm.h": uint16_t slurmctld_debug char* slurmctld_logfile char* slurmctld_pidfile - char* slurmctld_plugstack - void* slurmctld_plugstack_conf uint32_t slurmctld_port uint16_t slurmctld_port_count char* slurmctld_primary_off_prog @@ -2359,6 +2196,7 @@ cdef extern from "slurm/slurm.h": char* state_save_location char* suspend_exc_nodes char* suspend_exc_parts + char* suspend_exc_states char* suspend_program uint16_t suspend_rate uint32_t suspend_time @@ -2421,6 +2259,7 @@ cdef extern from "slurm/slurm.h": uint32_t node_state char* reason uint32_t reason_uid + uint32_t resume_after uint32_t weight ctypedef slurm_update_node_msg update_node_msg_t @@ -2540,6 +2379,9 @@ cdef extern from "slurm/slurm.h": uint32_t available uint8_t remote uint32_t reserved + uint32_t last_consumed + uint32_t last_deficit + time_t last_update ctypedef slurm_license_info slurm_license_info_t @@ -2554,19 +2396,20 @@ cdef extern from "slurm/slurm.h": uint32_t job_array_count char** job_array_id uint32_t* error_code + char** err_msg ctypedef struct assoc_mgr_info_msg_t: - List assoc_list - List qos_list + list_t* assoc_list + list_t* qos_list uint32_t tres_cnt char** tres_names - List user_list + list_t* user_list ctypedef struct assoc_mgr_info_request_msg_t: - List acct_list + list_t* acct_list uint32_t flags - List qos_list - List user_list + list_t* qos_list + list_t* user_list cdef struct network_callerid_msg: unsigned char ip_src[16] @@ -2583,27 +2426,27 @@ cdef extern from "slurm/slurm.h": ctypedef void (*_slurm_allocate_resources_blocking_pending_callback_ft)(uint32_t job_id) - resource_allocation_response_msg_t* slurm_allocate_resources_blocking(job_desc_msg_t* user_req, time_t timeout, _slurm_allocate_resources_blocking_pending_callback_ft pending_callback) + resource_allocation_response_msg_t* slurm_allocate_resources_blocking(const job_desc_msg_t* user_req, time_t timeout, _slurm_allocate_resources_blocking_pending_callback_ft pending_callback) void slurm_free_resource_allocation_response_msg(resource_allocation_response_msg_t* msg) ctypedef void (*_slurm_allocate_het_job_blocking_pending_callback_ft)(uint32_t job_id) - List slurm_allocate_het_job_blocking(List job_req_list, time_t timeout, _slurm_allocate_het_job_blocking_pending_callback_ft pending_callback) + list_t* slurm_allocate_het_job_blocking(list_t* job_req_list, time_t timeout, _slurm_allocate_het_job_blocking_pending_callback_ft pending_callback) int slurm_allocation_lookup(uint32_t job_id, resource_allocation_response_msg_t** resp) - int slurm_het_job_lookup(uint32_t jobid, List* resp) + int slurm_het_job_lookup(uint32_t jobid, list_t** resp) - char* slurm_read_hostfile(char* filename, int n) + char* slurm_read_hostfile(const char* filename, int n) - allocation_msg_thread_t* slurm_allocation_msg_thr_create(uint16_t* port, slurm_allocation_callbacks_t* callbacks) + allocation_msg_thread_t* slurm_allocation_msg_thr_create(uint16_t* port, const slurm_allocation_callbacks_t* callbacks) void slurm_allocation_msg_thr_destroy(allocation_msg_thread_t* msg_thr) int slurm_submit_batch_job(job_desc_msg_t* job_desc_msg, submit_response_msg_t** slurm_alloc_msg) - int slurm_submit_batch_het_job(List job_req_list, submit_response_msg_t** slurm_alloc_msg) + int slurm_submit_batch_het_job(list_t* job_req_list, submit_response_msg_t** slurm_alloc_msg) void slurm_free_submit_response_response_msg(submit_response_msg_t* msg) @@ -2611,7 +2454,7 @@ cdef extern from "slurm/slurm.h": int slurm_job_will_run(job_desc_msg_t* job_desc_msg) - int slurm_het_job_will_run(List job_req_list) + int slurm_het_job_will_run(list_t* job_req_list) int slurm_job_will_run2(job_desc_msg_t* req, will_run_response_msg_t** will_run_resp) @@ -2644,7 +2487,7 @@ cdef extern from "slurm/slurm.h": int slurm_kill_job_step(uint32_t job_id, uint32_t step_id, uint16_t signal) - int slurm_kill_job2(char* job_id, uint16_t signal, uint16_t flags, char* sibling) + int slurm_kill_job2(const char* job_id, uint16_t signal, uint16_t flags, const char* sibling) int slurm_signal_job(uint32_t job_id, uint16_t signal) @@ -2656,9 +2499,9 @@ cdef extern from "slurm/slurm.h": void slurm_step_launch_params_t_init(slurm_step_launch_params_t* ptr) - int slurm_step_launch(slurm_step_ctx_t* ctx, slurm_step_launch_params_t* params, slurm_step_launch_callbacks_t* callbacks) + int slurm_step_launch(slurm_step_ctx_t* ctx, const slurm_step_launch_params_t* params, const slurm_step_launch_callbacks_t* callbacks) - int slurm_step_launch_add(slurm_step_ctx_t* ctx, slurm_step_ctx_t* first_ctx, slurm_step_launch_params_t* params, char* node_list) + int slurm_step_launch_add(slurm_step_ctx_t* ctx, slurm_step_ctx_t* first_ctx, const slurm_step_launch_params_t* params, char* node_list) int slurm_step_launch_wait_start(slurm_step_ctx_t* ctx) @@ -2698,11 +2541,11 @@ cdef extern from "slurm/slurm.h": int slurm_job_cpus_allocated_on_node_id(job_resources_t* job_resrcs_ptr, int node_id) - int slurm_job_cpus_allocated_on_node(job_resources_t* job_resrcs_ptr, char* node_name) + int slurm_job_cpus_allocated_on_node(job_resources_t* job_resrcs_ptr, const char* node_name) int slurm_job_cpus_allocated_str_on_node_id(char* cpus, size_t cpus_len, job_resources_t* job_resrcs_ptr, int node_id) - int slurm_job_cpus_allocated_str_on_node(char* cpus, size_t cpus_len, job_resources_t* job_resrcs_ptr, char* node_name) + int slurm_job_cpus_allocated_str_on_node(char* cpus, size_t cpus_len, job_resources_t* job_resrcs_ptr, const char* node_name) void slurm_free_job_info_msg(job_info_msg_t* job_buffer_ptr) @@ -2722,7 +2565,7 @@ cdef extern from "slurm/slurm.h": int slurm_load_job(job_info_msg_t** resp, uint32_t job_id, uint16_t show_flags) - int slurm_load_job_prio(priority_factors_response_msg_t** factors_resp, List job_id_list, char* partitions, List uid_list, uint16_t show_flags) + int slurm_load_job_prio(priority_factors_response_msg_t** factors_resp, uint16_t show_flags) int slurm_load_job_user(job_info_msg_t** job_info_msg_pptr, uint32_t user_id, uint16_t show_flags) @@ -2746,6 +2589,8 @@ cdef extern from "slurm/slurm.h": int slurm_get_job_steps(time_t update_time, uint32_t job_id, uint32_t step_id, job_step_info_response_msg_t** step_response_pptr, uint16_t show_flags) + int slurm_find_step_ids_by_container_id(uint16_t show_flags, uid_t uid, const char* container_id, list_t* steps) + void slurm_free_job_step_info_response_msg(job_step_info_response_msg_t* msg) void slurm_print_job_step_info_msg(FILE* out, job_step_info_response_msg_t* job_step_info_msg_ptr, int one_liner) @@ -2864,8 +2709,16 @@ cdef extern from "slurm/slurm.h": void slurm_free_reservation_info_msg(reserve_info_msg_t* resv_info_ptr) + ctypedef struct controller_ping_t: + char* hostname + bool pinged + long latency + int offset + int slurm_ping(int dest) + controller_ping_t* ping_all_controllers() + int slurm_reconfigure() int slurm_shutdown(uint16_t options) @@ -2874,12 +2727,22 @@ cdef extern from "slurm/slurm.h": int slurm_set_debugflags(uint64_t debug_flags_plus, uint64_t debug_flags_minus) + int slurm_set_slurmd_debug_flags(char* node_list, uint64_t debug_flags_plus, uint64_t debug_flags_minus) + + int slurm_set_slurmd_debug_level(char* node_list, uint32_t debug_level) + int slurm_set_debug_level(uint32_t debug_level) int slurm_set_schedlog_level(uint32_t schedlog_level) int slurm_set_fs_dampeningfactor(uint16_t factor) + int slurm_update_suspend_exc_nodes(char* nodes, update_mode_t mode) + + int slurm_update_suspend_exc_parts(char* parts, update_mode_t mode) + + int slurm_update_suspend_exc_states(char* states, update_mode_t mode) + int slurm_suspend(uint32_t job_id) int slurm_suspend2(char* job_id, job_array_resp_msg_t** resp) @@ -2995,1143 +2858,9 @@ cdef extern from "slurm/slurm.h": char* failed_lines uint32_t* jobids uint32_t jobids_count + char* job_submit_user_msg uint32_t return_code - crontab_update_response_msg_t* slurm_update_crontab(uid_t uid, gid_t gid, char* crontab, List jobs) + crontab_update_response_msg_t* slurm_update_crontab(uid_t uid, gid_t gid, char* crontab, list_t* jobs) int slurm_remove_crontab(uid_t uid, gid_t gid) - -cdef extern from "slurm/slurmdb.h": - - uint32_t QOS_FLAG_BASE - uint32_t QOS_FLAG_NOTSET - uint32_t QOS_FLAG_ADD - uint32_t QOS_FLAG_REMOVE - uint8_t QOS_FLAG_PART_MIN_NODE - uint8_t QOS_FLAG_PART_MAX_NODE - uint8_t QOS_FLAG_PART_TIME_LIMIT - uint8_t QOS_FLAG_ENFORCE_USAGE_THRES - uint8_t QOS_FLAG_NO_RESERVE - uint8_t QOS_FLAG_REQ_RESV - uint8_t QOS_FLAG_DENY_LIMIT - uint8_t QOS_FLAG_OVER_PART_QOS - uint16_t QOS_FLAG_NO_DECAY - uint16_t QOS_FLAG_USAGE_FACTOR_SAFE - uint32_t SLURMDB_RES_FLAG_BASE - uint32_t SLURMDB_RES_FLAG_NOTSET - uint32_t SLURMDB_RES_FLAG_ADD - uint32_t SLURMDB_RES_FLAG_REMOVE - uint32_t FEDERATION_FLAG_BASE - uint32_t FEDERATION_FLAG_NOTSET - uint32_t FEDERATION_FLAG_ADD - uint32_t FEDERATION_FLAG_REMOVE - uint8_t CLUSTER_FED_STATE_BASE - uint16_t CLUSTER_FED_STATE_FLAGS - uint8_t CLUSTER_FED_STATE_DRAIN - uint8_t CLUSTER_FED_STATE_REMOVE - uint8_t SLURMDB_JOB_FLAG_NONE - uint8_t SLURMDB_JOB_CLEAR_SCHED - uint8_t SLURMDB_JOB_FLAG_NOTSET - uint8_t SLURMDB_JOB_FLAG_SUBMIT - uint8_t SLURMDB_JOB_FLAG_SCHED - uint8_t SLURMDB_JOB_FLAG_BACKFILL - uint8_t SLURMDB_JOB_FLAG_START_R - uint8_t JOBCOND_FLAG_DUP - uint8_t JOBCOND_FLAG_NO_STEP - uint8_t JOBCOND_FLAG_NO_TRUNC - uint8_t JOBCOND_FLAG_RUNAWAY - uint8_t JOBCOND_FLAG_WHOLE_HETJOB - uint8_t JOBCOND_FLAG_NO_WHOLE_HETJOB - uint8_t JOBCOND_FLAG_NO_WAIT - uint8_t JOBCOND_FLAG_NO_DEFAULT_USAGE - uint16_t JOBCOND_FLAG_SCRIPT - uint16_t JOBCOND_FLAG_ENV - uint16_t SLURMDB_PURGE_BASE - uint32_t SLURMDB_PURGE_FLAGS - uint32_t SLURMDB_PURGE_HOURS - uint32_t SLURMDB_PURGE_DAYS - uint32_t SLURMDB_PURGE_MONTHS - uint32_t SLURMDB_PURGE_ARCHIVE - uint32_t SLURMDB_FS_USE_PARENT - uint16_t SLURMDB_CLASSIFIED_FLAG - uint8_t SLURMDB_CLASS_BASE - uint8_t CLUSTER_FLAG_A1 - uint8_t CLUSTER_FLAG_A2 - uint8_t CLUSTER_FLAG_A3 - uint8_t CLUSTER_FLAG_A4 - uint8_t CLUSTER_FLAG_A5 - uint8_t CLUSTER_FLAG_A6 - uint8_t CLUSTER_FLAG_A7 - uint8_t CLUSTER_FLAG_MULTSD - uint16_t CLUSTER_FLAG_A9 - uint16_t CLUSTER_FLAG_FE - uint16_t CLUSTER_FLAG_CRAY - uint16_t CLUSTER_FLAG_FED - uint16_t CLUSTER_FLAG_EXT - uint8_t ASSOC_FLAG_DELETED - uint8_t SLURMDB_EVENT_COND_OPEN - -cdef extern from "slurm/slurmdb.h": - - ctypedef enum slurmdb_admin_level_t: - SLURMDB_ADMIN_NOTSET - SLURMDB_ADMIN_NONE - SLURMDB_ADMIN_OPERATOR - SLURMDB_ADMIN_SUPER_USER - - ctypedef enum slurmdb_classification_type_t: - SLURMDB_CLASS_NONE - SLURMDB_CLASS_CAPABILITY - SLURMDB_CLASS_CAPACITY - SLURMDB_CLASS_CAPAPACITY - - ctypedef enum slurmdb_event_type_t: - SLURMDB_EVENT_ALL - SLURMDB_EVENT_CLUSTER - SLURMDB_EVENT_NODE - - ctypedef enum slurmdb_problem_type_t: - SLURMDB_PROBLEM_NOT_SET - SLURMDB_PROBLEM_ACCT_NO_ASSOC - SLURMDB_PROBLEM_ACCT_NO_USERS - SLURMDB_PROBLEM_USER_NO_ASSOC - SLURMDB_PROBLEM_USER_NO_UID - - ctypedef enum slurmdb_report_sort_t: - SLURMDB_REPORT_SORT_TIME - SLURMDB_REPORT_SORT_NAME - - ctypedef enum slurmdb_report_time_format_t: - SLURMDB_REPORT_TIME_SECS - SLURMDB_REPORT_TIME_MINS - SLURMDB_REPORT_TIME_HOURS - SLURMDB_REPORT_TIME_PERCENT - SLURMDB_REPORT_TIME_SECS_PER - SLURMDB_REPORT_TIME_MINS_PER - SLURMDB_REPORT_TIME_HOURS_PER - - ctypedef enum slurmdb_resource_type_t: - SLURMDB_RESOURCE_NOTSET - SLURMDB_RESOURCE_LICENSE - - ctypedef enum slurmdb_update_type_t: - SLURMDB_UPDATE_NOTSET - SLURMDB_ADD_USER - SLURMDB_ADD_ASSOC - SLURMDB_ADD_COORD - SLURMDB_MODIFY_USER - SLURMDB_MODIFY_ASSOC - SLURMDB_REMOVE_USER - SLURMDB_REMOVE_ASSOC - SLURMDB_REMOVE_COORD - SLURMDB_ADD_QOS - SLURMDB_REMOVE_QOS - SLURMDB_MODIFY_QOS - SLURMDB_ADD_WCKEY - SLURMDB_REMOVE_WCKEY - SLURMDB_MODIFY_WCKEY - SLURMDB_ADD_CLUSTER - SLURMDB_REMOVE_CLUSTER - SLURMDB_REMOVE_ASSOC_USAGE - SLURMDB_ADD_RES - SLURMDB_REMOVE_RES - SLURMDB_MODIFY_RES - SLURMDB_REMOVE_QOS_USAGE - SLURMDB_ADD_TRES - SLURMDB_UPDATE_FEDS - - cpdef enum cluster_fed_states: - CLUSTER_FED_STATE_NA - CLUSTER_FED_STATE_ACTIVE - CLUSTER_FED_STATE_INACTIVE - - ctypedef struct slurmdb_tres_rec_t: - uint64_t alloc_secs - uint32_t rec_count - uint64_t count - uint32_t id - char* name - char* type - - ctypedef struct slurmdb_assoc_cond_t: - List acct_list - List cluster_list - List def_qos_id_list - List format_list - List id_list - uint16_t only_defs - List parent_acct_list - List partition_list - List qos_list - time_t usage_end - time_t usage_start - List user_list - uint16_t with_usage - uint16_t with_deleted - uint16_t with_raw_qos - uint16_t with_sub_accts - uint16_t without_parent_info - uint16_t without_parent_limits - - ctypedef struct slurmdb_job_cond_t: - List acct_list - List associd_list - List cluster_list - List constraint_list - uint32_t cpus_max - uint32_t cpus_min - uint32_t db_flags - int32_t exitcode - uint32_t flags - List format_list - List groupid_list - List jobname_list - uint32_t nodes_max - uint32_t nodes_min - List partition_list - List qos_list - List reason_list - List resv_list - List resvid_list - List state_list - List step_list - uint32_t timelimit_max - uint32_t timelimit_min - time_t usage_end - time_t usage_start - char* used_nodes - List userid_list - List wckey_list - - ctypedef struct slurmdb_stats_t: - double act_cpufreq - uint64_t consumed_energy - char* tres_usage_in_ave - char* tres_usage_in_max - char* tres_usage_in_max_nodeid - char* tres_usage_in_max_taskid - char* tres_usage_in_min - char* tres_usage_in_min_nodeid - char* tres_usage_in_min_taskid - char* tres_usage_in_tot - char* tres_usage_out_ave - char* tres_usage_out_max - char* tres_usage_out_max_nodeid - char* tres_usage_out_max_taskid - char* tres_usage_out_min - char* tres_usage_out_min_nodeid - char* tres_usage_out_min_taskid - char* tres_usage_out_tot - - ctypedef struct slurmdb_account_cond_t: - slurmdb_assoc_cond_t* assoc_cond - List description_list - List organization_list - uint16_t with_assocs - uint16_t with_coords - uint16_t with_deleted - - cpdef enum: - SLURMDB_ACCT_FLAG_NONE - SLURMDB_ACCT_FLAG_DELETED - - ctypedef struct slurmdb_account_rec_t: - List assoc_list - List coordinators - char* description - uint32_t flags - char* name - char* organization - - ctypedef struct slurmdb_accounting_rec_t: - uint64_t alloc_secs - uint32_t id - time_t period_start - slurmdb_tres_rec_t tres_rec - - ctypedef struct slurmdb_archive_cond_t: - char* archive_dir - char* archive_script - slurmdb_job_cond_t* job_cond - uint32_t purge_event - uint32_t purge_job - uint32_t purge_resv - uint32_t purge_step - uint32_t purge_suspend - uint32_t purge_txn - uint32_t purge_usage - - ctypedef struct slurmdb_archive_rec_t: - char* archive_file - char* insert - - ctypedef struct slurmdb_tres_cond_t: - uint64_t count - List format_list - List id_list - List name_list - List type_list - uint16_t with_deleted - - ctypedef slurmdb_assoc_usage slurmdb_assoc_usage_t - - ctypedef slurmdb_bf_usage slurmdb_bf_usage_t - - ctypedef slurmdb_user_rec slurmdb_user_rec_t - - cdef struct slurmdb_assoc_rec: - List accounting_list - char* acct - slurmdb_assoc_rec* assoc_next - slurmdb_assoc_rec* assoc_next_id - slurmdb_bf_usage_t* bf_usage - char* cluster - uint32_t def_qos_id - uint16_t flags - uint32_t grp_jobs - uint32_t grp_jobs_accrue - uint32_t grp_submit_jobs - char* grp_tres - uint64_t* grp_tres_ctld - char* grp_tres_mins - uint64_t* grp_tres_mins_ctld - char* grp_tres_run_mins - uint64_t* grp_tres_run_mins_ctld - uint32_t grp_wall - uint32_t id - uint16_t is_def - slurmdb_assoc_usage_t* leaf_usage - uint32_t lft - uint32_t max_jobs - uint32_t max_jobs_accrue - uint32_t max_submit_jobs - char* max_tres_mins_pj - uint64_t* max_tres_mins_ctld - char* max_tres_run_mins - uint64_t* max_tres_run_mins_ctld - char* max_tres_pj - uint64_t* max_tres_ctld - char* max_tres_pn - uint64_t* max_tres_pn_ctld - uint32_t max_wall_pj - uint32_t min_prio_thresh - char* parent_acct - uint32_t parent_id - char* partition - uint32_t priority - List qos_list - uint32_t rgt - uint32_t shares_raw - uint32_t uid - slurmdb_assoc_usage_t* usage - char* user - slurmdb_user_rec_t* user_rec - - ctypedef slurmdb_assoc_rec slurmdb_assoc_rec_t - - cdef struct slurmdb_assoc_usage: - uint32_t accrue_cnt - List children_list - bitstr_t* grp_node_bitmap - uint16_t* grp_node_job_cnt - uint64_t* grp_used_tres - uint64_t* grp_used_tres_run_secs - double grp_used_wall - double fs_factor - uint32_t level_shares - slurmdb_assoc_rec_t* parent_assoc_ptr - double priority_norm - slurmdb_assoc_rec_t* fs_assoc_ptr - double shares_norm - uint32_t tres_cnt - long double usage_efctv - long double usage_norm - long double usage_raw - long double* usage_tres_raw - uint32_t used_jobs - uint32_t used_submit_jobs - long double level_fs - bitstr_t* valid_qos - - cdef struct slurmdb_bf_usage: - uint64_t count - time_t last_sched - - ctypedef struct slurmdb_cluster_cond_t: - uint16_t classification - List cluster_list - List federation_list - uint32_t flags - List format_list - List plugin_id_select_list - List rpc_version_list - time_t usage_end - time_t usage_start - uint16_t with_deleted - uint16_t with_usage - - ctypedef struct slurmdb_cluster_fed_t: - List feature_list - uint32_t id - char* name - void* recv - void* send - uint32_t state - bool sync_recvd - bool sync_sent - - cdef struct slurmdb_cluster_rec: - List accounting_list - uint16_t classification - time_t comm_fail_time - # slurm_addr_t control_addr incomplete type complaint - char* control_host - uint32_t control_port - uint16_t dimensions - int* dim_size - slurmdb_cluster_fed_t fed - uint32_t flags - # pthread_mutex_t lock incomplete type complaint - char* name - char* nodes - uint32_t plugin_id_select - slurmdb_assoc_rec_t* root_assoc - uint16_t rpc_version - List send_rpc - char* tres_str - - ctypedef struct slurmdb_cluster_accounting_rec_t: - uint64_t alloc_secs - uint64_t down_secs - uint64_t idle_secs - uint64_t over_secs - uint64_t pdown_secs - time_t period_start - uint64_t plan_secs - slurmdb_tres_rec_t tres_rec - - ctypedef struct slurmdb_clus_res_rec_t: - char* cluster - uint16_t percent_allowed - - ctypedef struct slurmdb_coord_rec_t: - char* name - uint16_t direct - - ctypedef struct slurmdb_event_cond_t: - List cluster_list - uint32_t cond_flags - uint32_t cpus_max - uint32_t cpus_min - uint16_t event_type - List format_list - char* node_list - time_t period_end - time_t period_start - List reason_list - List reason_uid_list - List state_list - - ctypedef struct slurmdb_event_rec_t: - char* cluster - char* cluster_nodes - uint16_t event_type - char* node_name - time_t period_end - time_t period_start - char* reason - uint32_t reason_uid - uint32_t state - char* tres_str - - ctypedef struct slurmdb_federation_cond_t: - List cluster_list - List federation_list - List format_list - uint16_t with_deleted - - ctypedef struct slurmdb_federation_rec_t: - char* name - uint32_t flags - List cluster_list - - ctypedef struct slurmdb_job_rec_t: - char* account - char* admin_comment - uint32_t alloc_nodes - uint32_t array_job_id - uint32_t array_max_tasks - uint32_t array_task_id - char* array_task_str - uint32_t associd - char* blockid - char* cluster - char* constraints - char* container - uint64_t db_index - uint32_t derived_ec - char* derived_es - uint32_t elapsed - time_t eligible - time_t end - char* env - uint32_t exitcode - uint32_t flags - void* first_step_ptr - uint32_t gid - uint32_t het_job_id - uint32_t het_job_offset - uint32_t jobid - char* jobname - uint32_t lft - char* mcs_label - char* nodes - char* partition - uint32_t priority - uint32_t qosid - uint32_t req_cpus - uint64_t req_mem - uint32_t requid - uint32_t resvid - char* resv_name - char* script - uint32_t show_full - time_t start - uint32_t state - uint32_t state_reason_prev - List steps - time_t submit - char* submit_line - uint32_t suspended - char* system_comment - uint64_t sys_cpu_sec - uint64_t sys_cpu_usec - uint32_t timelimit - uint64_t tot_cpu_sec - uint64_t tot_cpu_usec - char* tres_alloc_str - char* tres_req_str - uint32_t uid - char* used_gres - char* user - uint64_t user_cpu_sec - uint64_t user_cpu_usec - char* wckey - uint32_t wckeyid - char* work_dir - - ctypedef struct slurmdb_qos_usage_t: - uint32_t accrue_cnt - List acct_limit_list - List job_list - bitstr_t* grp_node_bitmap - uint16_t* grp_node_job_cnt - uint32_t grp_used_jobs - uint32_t grp_used_submit_jobs - uint64_t* grp_used_tres - uint64_t* grp_used_tres_run_secs - double grp_used_wall - double norm_priority - uint32_t tres_cnt - long double usage_raw - long double* usage_tres_raw - List user_limit_list - - ctypedef struct slurmdb_qos_rec_t: - char* description - uint32_t id - uint32_t flags - uint32_t grace_time - uint32_t grp_jobs_accrue - uint32_t grp_jobs - uint32_t grp_submit_jobs - char* grp_tres - uint64_t* grp_tres_ctld - char* grp_tres_mins - uint64_t* grp_tres_mins_ctld - char* grp_tres_run_mins - uint64_t* grp_tres_run_mins_ctld - uint32_t grp_wall - double limit_factor - uint32_t max_jobs_pa - uint32_t max_jobs_pu - uint32_t max_jobs_accrue_pa - uint32_t max_jobs_accrue_pu - uint32_t max_submit_jobs_pa - uint32_t max_submit_jobs_pu - char* max_tres_mins_pj - uint64_t* max_tres_mins_pj_ctld - char* max_tres_pa - uint64_t* max_tres_pa_ctld - char* max_tres_pj - uint64_t* max_tres_pj_ctld - char* max_tres_pn - uint64_t* max_tres_pn_ctld - char* max_tres_pu - uint64_t* max_tres_pu_ctld - char* max_tres_run_mins_pa - uint64_t* max_tres_run_mins_pa_ctld - char* max_tres_run_mins_pu - uint64_t* max_tres_run_mins_pu_ctld - uint32_t max_wall_pj - uint32_t min_prio_thresh - char* min_tres_pj - uint64_t* min_tres_pj_ctld - char* name - bitstr_t* preempt_bitstr - List preempt_list - uint16_t preempt_mode - uint32_t preempt_exempt_time - uint32_t priority - slurmdb_qos_usage_t* usage - double usage_factor - double usage_thres - time_t blocked_until - - ctypedef struct slurmdb_qos_cond_t: - List description_list - List id_list - List format_list - List name_list - uint16_t preempt_mode - uint16_t with_deleted - - ctypedef struct slurmdb_reservation_cond_t: - List cluster_list - uint64_t flags - List format_list - List id_list - List name_list - char* nodes - time_t time_end - time_t time_start - uint16_t with_usage - - ctypedef struct slurmdb_reservation_rec_t: - char* assocs - char* cluster - uint64_t flags - uint32_t id - char* name - char* nodes - char* node_inx - time_t time_end - time_t time_start - time_t time_start_prev - char* tres_str - double unused_wall - List tres_list - - ctypedef struct slurmdb_step_rec_t: - char* container - uint32_t elapsed - time_t end - int32_t exitcode - slurmdb_job_rec_t* job_ptr - uint32_t nnodes - char* nodes - uint32_t ntasks - char* pid_str - uint32_t req_cpufreq_min - uint32_t req_cpufreq_max - uint32_t req_cpufreq_gov - uint32_t requid - time_t start - uint32_t state - slurmdb_stats_t stats - slurm_step_id_t step_id - char* stepname - char* submit_line - uint32_t suspended - uint64_t sys_cpu_sec - uint32_t sys_cpu_usec - uint32_t task_dist - uint64_t tot_cpu_sec - uint32_t tot_cpu_usec - char* tres_alloc_str - uint64_t user_cpu_sec - uint32_t user_cpu_usec - - ctypedef struct slurmdb_res_cond_t: - List cluster_list - List description_list - uint32_t flags - List format_list - List id_list - List manager_list - List name_list - List percent_list - List server_list - List type_list - uint16_t with_deleted - uint16_t with_clusters - - ctypedef struct slurmdb_res_rec_t: - List clus_res_list - slurmdb_clus_res_rec_t* clus_res_rec - uint32_t count - char* description - uint32_t flags - uint32_t id - char* manager - char* name - uint16_t percent_used - char* server - uint32_t type - - ctypedef struct slurmdb_txn_cond_t: - List acct_list - List action_list - List actor_list - List cluster_list - List format_list - List id_list - List info_list - List name_list - time_t time_end - time_t time_start - List user_list - uint16_t with_assoc_info - - ctypedef struct slurmdb_txn_rec_t: - char* accts - uint16_t action - char* actor_name - char* clusters - uint32_t id - char* set_info - time_t timestamp - char* users - char* where_query - - ctypedef struct slurmdb_used_limits_t: - uint32_t accrue_cnt - char* acct - uint32_t jobs - uint32_t submit_jobs - uint64_t* tres - uint64_t* tres_run_mins - bitstr_t* node_bitmap - uint16_t* node_job_cnt - uint32_t uid - - ctypedef struct slurmdb_user_cond_t: - uint16_t admin_level - slurmdb_assoc_cond_t* assoc_cond - List def_acct_list - List def_wckey_list - uint16_t with_assocs - uint16_t with_coords - uint16_t with_deleted - uint16_t with_wckeys - uint16_t without_defaults - - cpdef enum: - SLURMDB_USER_FLAG_NONE - SLURMDB_USER_FLAG_DELETED - - cdef struct slurmdb_user_rec: - uint16_t admin_level - List assoc_list - slurmdb_bf_usage_t* bf_usage - List coord_accts - char* default_acct - char* default_wckey - uint32_t flags - char* name - char* old_name - uint32_t uid - List wckey_list - - ctypedef struct slurmdb_update_object_t: - List objects - uint16_t type - - ctypedef struct slurmdb_wckey_cond_t: - List cluster_list - List format_list - List id_list - List name_list - uint16_t only_defs - time_t usage_end - time_t usage_start - List user_list - uint16_t with_usage - uint16_t with_deleted - - cpdef enum: - SLURMDB_WCKEY_FLAG_NONE - SLURMDB_WCKEY_FLAG_DELETED - - ctypedef struct slurmdb_wckey_rec_t: - List accounting_list - char* cluster - uint32_t flags - uint32_t id - uint16_t is_def - char* name - uint32_t uid - char* user - - ctypedef struct slurmdb_print_tree_t: - char* name - char* print_name - char* spaces - uint16_t user - - ctypedef struct slurmdb_hierarchical_rec_t: - slurmdb_assoc_rec_t* assoc - char* sort_name - List children - - ctypedef struct slurmdb_report_assoc_rec_t: - char* acct - char* cluster - char* parent_acct - List tres_list - char* user - - ctypedef struct slurmdb_report_user_rec_t: - char* acct - List acct_list - List assoc_list - char* name - List tres_list - uid_t uid - - ctypedef struct slurmdb_report_cluster_rec_t: - List accounting_list - List assoc_list - char* name - List tres_list - List user_list - - ctypedef struct slurmdb_report_job_grouping_t: - uint32_t count - List jobs - uint32_t min_size - uint32_t max_size - List tres_list - - ctypedef struct slurmdb_report_acct_grouping_t: - char* acct - uint32_t count - List groups - uint32_t lft - uint32_t rgt - List tres_list - - ctypedef struct slurmdb_report_cluster_grouping_t: - List acct_list - char* cluster - uint32_t count - List tres_list - - cpdef enum: - DBD_ROLLUP_HOUR - DBD_ROLLUP_DAY - DBD_ROLLUP_MONTH - DBD_ROLLUP_COUNT - - ctypedef struct slurmdb_rollup_stats_t: - char* cluster_name - uint16_t count[4] - time_t timestamp[4] - uint64_t time_last[4] - uint64_t time_max[4] - uint64_t time_total[4] - - ctypedef struct slurmdb_rpc_obj_t: - uint32_t cnt - uint32_t id - uint64_t time - uint64_t time_ave - - ctypedef struct slurmdb_stats_rec_t: - slurmdb_rollup_stats_t* dbd_rollup_stats - List rollup_stats - List rpc_list - time_t time_start - List user_list - - slurmdb_cluster_rec_t* working_cluster_rec - - int slurmdb_accounts_add(void* db_conn, List acct_list) - - List slurmdb_accounts_get(void* db_conn, slurmdb_account_cond_t* acct_cond) - - List slurmdb_accounts_modify(void* db_conn, slurmdb_account_cond_t* acct_cond, slurmdb_account_rec_t* acct) - - List slurmdb_accounts_remove(void* db_conn, slurmdb_account_cond_t* acct_cond) - - int slurmdb_archive(void* db_conn, slurmdb_archive_cond_t* arch_cond) - - int slurmdb_archive_load(void* db_conn, slurmdb_archive_rec_t* arch_rec) - - int slurmdb_associations_add(void* db_conn, List assoc_list) - - List slurmdb_associations_get(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) - - List slurmdb_associations_modify(void* db_conn, slurmdb_assoc_cond_t* assoc_cond, slurmdb_assoc_rec_t* assoc) - - List slurmdb_associations_remove(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) - - int slurmdb_clusters_add(void* db_conn, List cluster_list) - - List slurmdb_clusters_get(void* db_conn, slurmdb_cluster_cond_t* cluster_cond) - - List slurmdb_clusters_modify(void* db_conn, slurmdb_cluster_cond_t* cluster_cond, slurmdb_cluster_rec_t* cluster) - - List slurmdb_clusters_remove(void* db_conn, slurmdb_cluster_cond_t* cluster_cond) - - List slurmdb_report_cluster_account_by_user(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) - - List slurmdb_report_cluster_user_by_account(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) - - List slurmdb_report_cluster_wckey_by_user(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) - - List slurmdb_report_cluster_user_by_wckey(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) - - List slurmdb_report_job_sizes_grouped_by_account(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list, bool flat_view, bool acct_as_parent) - - List slurmdb_report_job_sizes_grouped_by_wckey(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list) - - List slurmdb_report_job_sizes_grouped_by_account_then_wckey(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list, bool flat_view, bool acct_as_parent) - - List slurmdb_report_user_top_usage(void* db_conn, slurmdb_user_cond_t* user_cond, bool group_accounts) - - void* slurmdb_connection_get(uint16_t* persist_conn_flags) - - int slurmdb_connection_close(void** db_conn) - - int slurmdb_connection_commit(void* db_conn, bool commit) - - int slurmdb_coord_add(void* db_conn, List acct_list, slurmdb_user_cond_t* user_cond) - - List slurmdb_coord_remove(void* db_conn, List acct_list, slurmdb_user_cond_t* user_cond) - - int slurmdb_federations_add(void* db_conn, List federation_list) - - List slurmdb_federations_modify(void* db_conn, slurmdb_federation_cond_t* fed_cond, slurmdb_federation_rec_t* fed) - - List slurmdb_federations_remove(void* db_conn, slurmdb_federation_cond_t* fed_cond) - - List slurmdb_federations_get(void* db_conn, slurmdb_federation_cond_t* fed_cond) - - List slurmdb_job_modify(void* db_conn, slurmdb_job_cond_t* job_cond, slurmdb_job_rec_t* job) - - List slurmdb_jobs_get(void* db_conn, slurmdb_job_cond_t* job_cond) - - int slurmdb_jobs_fix_runaway(void* db_conn, List jobs) - - int slurmdb_jobcomp_init(char* jobcomp_loc) - - int slurmdb_jobcomp_fini() - - List slurmdb_jobcomp_jobs_get(slurmdb_job_cond_t* job_cond) - - int slurmdb_reconfig(void* db_conn) - - int slurmdb_shutdown(void* db_conn) - - int slurmdb_clear_stats(void* db_conn) - - int slurmdb_get_stats(void* db_conn, slurmdb_stats_rec_t** stats_pptr) - - List slurmdb_config_get(void* db_conn) - - List slurmdb_events_get(void* db_conn, slurmdb_event_cond_t* event_cond) - - List slurmdb_problems_get(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) - - List slurmdb_reservations_get(void* db_conn, slurmdb_reservation_cond_t* resv_cond) - - List slurmdb_txn_get(void* db_conn, slurmdb_txn_cond_t* txn_cond) - - List slurmdb_get_info_cluster(char* cluster_names) - - int slurmdb_get_first_avail_cluster(job_desc_msg_t* req, char* cluster_names, slurmdb_cluster_rec_t** cluster_rec) - - int slurmdb_get_first_het_job_cluster(List job_req_list, char* cluster_names, slurmdb_cluster_rec_t** cluster_rec) - - void slurmdb_destroy_assoc_usage(void* object) - - void slurmdb_destroy_bf_usage(void* object) - - void slurmdb_destroy_bf_usage_members(void* object) - - void slurmdb_destroy_qos_usage(void* object) - - void slurmdb_destroy_user_rec(void* object) - - void slurmdb_destroy_account_rec(void* object) - - void slurmdb_destroy_coord_rec(void* object) - - void slurmdb_destroy_clus_res_rec(void* object) - - void slurmdb_destroy_cluster_accounting_rec(void* object) - - void slurmdb_destroy_cluster_rec(void* object) - - void slurmdb_destroy_federation_rec(void* object) - - void slurmdb_destroy_accounting_rec(void* object) - - void slurmdb_free_assoc_mgr_state_msg(void* object) - - void slurmdb_free_assoc_rec_members(slurmdb_assoc_rec_t* assoc) - - void slurmdb_destroy_assoc_rec(void* object) - - void slurmdb_destroy_event_rec(void* object) - - void slurmdb_destroy_job_rec(void* object) - - void slurmdb_free_qos_rec_members(slurmdb_qos_rec_t* qos) - - void slurmdb_destroy_qos_rec(void* object) - - void slurmdb_destroy_reservation_rec(void* object) - - void slurmdb_destroy_step_rec(void* object) - - void slurmdb_destroy_res_rec(void* object) - - void slurmdb_destroy_txn_rec(void* object) - - void slurmdb_destroy_wckey_rec(void* object) - - void slurmdb_destroy_archive_rec(void* object) - - void slurmdb_destroy_tres_rec_noalloc(void* object) - - void slurmdb_destroy_tres_rec(void* object) - - void slurmdb_destroy_report_assoc_rec(void* object) - - void slurmdb_destroy_report_user_rec(void* object) - - void slurmdb_destroy_report_cluster_rec(void* object) - - void slurmdb_destroy_user_cond(void* object) - - void slurmdb_destroy_account_cond(void* object) - - void slurmdb_destroy_cluster_cond(void* object) - - void slurmdb_destroy_federation_cond(void* object) - - void slurmdb_destroy_tres_cond(void* object) - - void slurmdb_destroy_assoc_cond(void* object) - - void slurmdb_destroy_event_cond(void* object) - - void slurmdb_destroy_job_cond(void* object) - - void slurmdb_destroy_qos_cond(void* object) - - void slurmdb_destroy_reservation_cond(void* object) - - void slurmdb_destroy_res_cond(void* object) - - void slurmdb_destroy_txn_cond(void* object) - - void slurmdb_destroy_wckey_cond(void* object) - - void slurmdb_destroy_archive_cond(void* object) - - void slurmdb_destroy_update_object(void* object) - - void slurmdb_destroy_used_limits(void* object) - - void slurmdb_destroy_print_tree(void* object) - - void slurmdb_destroy_hierarchical_rec(void* object) - - void slurmdb_destroy_report_job_grouping(void* object) - - void slurmdb_destroy_report_acct_grouping(void* object) - - void slurmdb_destroy_report_cluster_grouping(void* object) - - void slurmdb_destroy_rpc_obj(void* object) - - void slurmdb_destroy_rollup_stats(void* object) - - void slurmdb_free_stats_rec_members(void* object) - - void slurmdb_destroy_stats_rec(void* object) - - void slurmdb_free_slurmdb_stats_members(slurmdb_stats_t* stats) - - void slurmdb_destroy_slurmdb_stats(slurmdb_stats_t* stats) - - void slurmdb_init_assoc_rec(slurmdb_assoc_rec_t* assoc, bool free_it) - - void slurmdb_init_clus_res_rec(slurmdb_clus_res_rec_t* clus_res, bool free_it) - - void slurmdb_init_cluster_rec(slurmdb_cluster_rec_t* cluster, bool free_it) - - void slurmdb_init_federation_rec(slurmdb_federation_rec_t* federation, bool free_it) - - void slurmdb_init_qos_rec(slurmdb_qos_rec_t* qos, bool free_it, uint32_t init_val) - - void slurmdb_init_res_rec(slurmdb_res_rec_t* res, bool free_it) - - void slurmdb_init_wckey_rec(slurmdb_wckey_rec_t* wckey, bool free_it) - - void slurmdb_init_tres_cond(slurmdb_tres_cond_t* tres, bool free_it) - - void slurmdb_init_cluster_cond(slurmdb_cluster_cond_t* cluster, bool free_it) - - void slurmdb_init_federation_cond(slurmdb_federation_cond_t* federation, bool free_it) - - void slurmdb_init_res_cond(slurmdb_res_cond_t* cluster, bool free_it) - - List slurmdb_get_hierarchical_sorted_assoc_list(List assoc_list, bool use_lft) - - List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) - - char* slurmdb_tree_name_get(char* name, char* parent, List tree_list) - - int slurmdb_res_add(void* db_conn, List res_list) - - List slurmdb_res_get(void* db_conn, slurmdb_res_cond_t* res_cond) - - List slurmdb_res_modify(void* db_conn, slurmdb_res_cond_t* res_cond, slurmdb_res_rec_t* res) - - List slurmdb_res_remove(void* db_conn, slurmdb_res_cond_t* res_cond) - - int slurmdb_qos_add(void* db_conn, List qos_list) - - List slurmdb_qos_get(void* db_conn, slurmdb_qos_cond_t* qos_cond) - - List slurmdb_qos_modify(void* db_conn, slurmdb_qos_cond_t* qos_cond, slurmdb_qos_rec_t* qos) - - List slurmdb_qos_remove(void* db_conn, slurmdb_qos_cond_t* qos_cond) - - int slurmdb_tres_add(void* db_conn, List tres_list) - - List slurmdb_tres_get(void* db_conn, slurmdb_tres_cond_t* tres_cond) - - int slurmdb_usage_get(void* db_conn, void* in_, int type, time_t start, time_t end) - - int slurmdb_usage_roll(void* db_conn, time_t sent_start, time_t sent_end, uint16_t archive_data, List* rollup_stats_list_in) - - int slurmdb_users_add(void* db_conn, List user_list) - - List slurmdb_users_get(void* db_conn, slurmdb_user_cond_t* user_cond) - - List slurmdb_users_modify(void* db_conn, slurmdb_user_cond_t* user_cond, slurmdb_user_rec_t* user) - - List slurmdb_users_remove(void* db_conn, slurmdb_user_cond_t* user_cond) - - int slurmdb_wckeys_add(void* db_conn, List wckey_list) - - List slurmdb_wckeys_get(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) - - List slurmdb_wckeys_modify(void* db_conn, slurmdb_wckey_cond_t* wckey_cond, slurmdb_wckey_rec_t* wckey) - - List slurmdb_wckeys_remove(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) diff --git a/pyslurm/slurm/slurm_errno.h.pxi b/pyslurm/slurm/slurm_errno.h.pxi new file mode 100644 index 00000000..3ed2d122 --- /dev/null +++ b/pyslurm/slurm/slurm_errno.h.pxi @@ -0,0 +1,339 @@ +############################################################################## +# NOTICE: This File has been generated by scripts/pyslurm_bindgen.py, which +# uses the autopxd2 tool in order to generate Cython compatible definitions +# from the slurm_errno.h C-Header file. Basically, this can be seen as a modified +# version of the original header, with the following changes: +# +# * have the correct cython syntax for type definitions, e.g. "typedef struct +# " is converted to "ctypedef struct " +# * C-Macros are listed with their appropriate uint type +# * Any definitions that cannot be translated are not included in this file +# +# Generated on 2023-05-06T18:02:46.304407 +# +# The Original Copyright notice from slurm_errno.h has been included +# below: +# +############################################################################## +# slurm_errno.h - error codes and functions for slurm +############################################################################## +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008-2009 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Kevin Tew , +# Jim Garlick , et. al. +# CODE-OCEC-09-009. All rights reserved. +# +# Slurm is licensed under the GNU GPLv2. For the full text of Slurm's License, +# please see here: pyslurm/slurm/SLURM_LICENSE +# +# Please, as mentioned above, also have a look at Slurm's DISCLAIMER under +# pyslurm/slurm/SLURM_DISCLAIMER +############################################################################## +# +# Copyright (C) 2023 PySlurm Developers (Modifications as described above) +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +cdef extern from "slurm/slurm_errno.h": + + uint8_t SLURM_SUCCESS + uint8_t ESPANK_SUCCESS + int8_t SLURM_ERROR + +cdef extern from "slurm/slurm_errno.h": + + ctypedef enum slurm_err_t: + SLURM_UNEXPECTED_MSG_ERROR + SLURM_COMMUNICATIONS_CONNECTION_ERROR + SLURM_COMMUNICATIONS_SEND_ERROR + SLURM_COMMUNICATIONS_RECEIVE_ERROR + SLURM_COMMUNICATIONS_SHUTDOWN_ERROR + SLURM_PROTOCOL_VERSION_ERROR + SLURM_PROTOCOL_IO_STREAM_VERSION_ERROR + SLURM_PROTOCOL_AUTHENTICATION_ERROR + SLURM_PROTOCOL_INSANE_MSG_LENGTH + SLURM_MPI_PLUGIN_NAME_INVALID + SLURM_MPI_PLUGIN_PRELAUNCH_SETUP_FAILED + SLURM_PLUGIN_NAME_INVALID + SLURM_UNKNOWN_FORWARD_ADDR + SLURM_COMMUNICATIONS_MISSING_SOCKET_ERROR + SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR + SLURMCTLD_COMMUNICATIONS_SEND_ERROR + SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR + SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR + SLURMCTLD_COMMUNICATIONS_BACKOFF + SLURM_NO_CHANGE_IN_DATA + ESLURM_INVALID_PARTITION_NAME + ESLURM_DEFAULT_PARTITION_NOT_SET + ESLURM_ACCESS_DENIED + ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP + ESLURM_REQUESTED_NODES_NOT_IN_PARTITION + ESLURM_TOO_MANY_REQUESTED_CPUS + ESLURM_INVALID_NODE_COUNT + ESLURM_ERROR_ON_DESC_TO_RECORD_COPY + ESLURM_JOB_MISSING_SIZE_SPECIFICATION + ESLURM_JOB_SCRIPT_MISSING + ESLURM_USER_ID_MISSING + ESLURM_DUPLICATE_JOB_ID + ESLURM_PATHNAME_TOO_LONG + ESLURM_NOT_TOP_PRIORITY + ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE + ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE + ESLURM_NODES_BUSY + ESLURM_INVALID_JOB_ID + ESLURM_INVALID_NODE_NAME + ESLURM_WRITING_TO_FILE + ESLURM_TRANSITION_STATE_NO_UPDATE + ESLURM_ALREADY_DONE + ESLURM_INTERCONNECT_FAILURE + ESLURM_BAD_DIST + ESLURM_JOB_PENDING + ESLURM_BAD_TASK_COUNT + ESLURM_INVALID_JOB_CREDENTIAL + ESLURM_IN_STANDBY_MODE + ESLURM_INVALID_NODE_STATE + ESLURM_INVALID_FEATURE + ESLURM_INVALID_AUTHTYPE_CHANGE + ESLURM_ACTIVE_FEATURE_NOT_SUBSET + ESLURM_INVALID_SCHEDTYPE_CHANGE + ESLURM_INVALID_SELECTTYPE_CHANGE + ESLURM_INVALID_SWITCHTYPE_CHANGE + ESLURM_FRAGMENTATION + ESLURM_NOT_SUPPORTED + ESLURM_DISABLED + ESLURM_DEPENDENCY + ESLURM_BATCH_ONLY + ESLURM_LICENSES_UNAVAILABLE + ESLURM_JOB_HELD + ESLURM_INVALID_CRED_TYPE_CHANGE + ESLURM_INVALID_TASK_MEMORY + ESLURM_INVALID_ACCOUNT + ESLURM_INVALID_PARENT_ACCOUNT + ESLURM_SAME_PARENT_ACCOUNT + ESLURM_INVALID_LICENSES + ESLURM_NEED_RESTART + ESLURM_ACCOUNTING_POLICY + ESLURM_INVALID_TIME_LIMIT + ESLURM_RESERVATION_ACCESS + ESLURM_RESERVATION_INVALID + ESLURM_INVALID_TIME_VALUE + ESLURM_RESERVATION_BUSY + ESLURM_RESERVATION_NOT_USABLE + ESLURM_INVALID_WCKEY + ESLURM_RESERVATION_OVERLAP + ESLURM_PORTS_BUSY + ESLURM_PORTS_INVALID + ESLURM_PROLOG_RUNNING + ESLURM_NO_STEPS + ESLURM_INVALID_QOS + ESLURM_QOS_PREEMPTION_LOOP + ESLURM_NODE_NOT_AVAIL + ESLURM_INVALID_CPU_COUNT + ESLURM_PARTITION_NOT_AVAIL + ESLURM_CIRCULAR_DEPENDENCY + ESLURM_INVALID_GRES + ESLURM_JOB_NOT_PENDING + ESLURM_QOS_THRES + ESLURM_PARTITION_IN_USE + ESLURM_STEP_LIMIT + ESLURM_JOB_SUSPENDED + ESLURM_CAN_NOT_START_IMMEDIATELY + ESLURM_INTERCONNECT_BUSY + ESLURM_RESERVATION_EMPTY + ESLURM_INVALID_ARRAY + ESLURM_RESERVATION_NAME_DUP + ESLURM_JOB_STARTED + ESLURM_JOB_FINISHED + ESLURM_JOB_NOT_RUNNING + ESLURM_JOB_NOT_PENDING_NOR_RUNNING + ESLURM_JOB_NOT_SUSPENDED + ESLURM_JOB_NOT_FINISHED + ESLURM_TRIGGER_DUP + ESLURM_INTERNAL + ESLURM_INVALID_BURST_BUFFER_CHANGE + ESLURM_BURST_BUFFER_PERMISSION + ESLURM_BURST_BUFFER_LIMIT + ESLURM_INVALID_BURST_BUFFER_REQUEST + ESLURM_PRIO_RESET_FAIL + ESLURM_CANNOT_MODIFY_CRON_JOB + ESLURM_INVALID_JOB_CONTAINER_CHANGE + ESLURM_CANNOT_CANCEL_CRON_JOB + ESLURM_INVALID_MCS_LABEL + ESLURM_BURST_BUFFER_WAIT + ESLURM_PARTITION_DOWN + ESLURM_DUPLICATE_GRES + ESLURM_JOB_SETTING_DB_INX + ESLURM_RSV_ALREADY_STARTED + ESLURM_SUBMISSIONS_DISABLED + ESLURM_NOT_HET_JOB + ESLURM_NOT_HET_JOB_LEADER + ESLURM_NOT_WHOLE_HET_JOB + ESLURM_CORE_RESERVATION_UPDATE + ESLURM_DUPLICATE_STEP_ID + ESLURM_INVALID_CORE_CNT + ESLURM_X11_NOT_AVAIL + ESLURM_GROUP_ID_MISSING + ESLURM_BATCH_CONSTRAINT + ESLURM_INVALID_TRES + ESLURM_INVALID_TRES_BILLING_WEIGHTS + ESLURM_INVALID_JOB_DEFAULTS + ESLURM_RESERVATION_MAINT + ESLURM_INVALID_GRES_TYPE + ESLURM_REBOOT_IN_PROGRESS + ESLURM_MULTI_KNL_CONSTRAINT + ESLURM_UNSUPPORTED_GRES + ESLURM_INVALID_NICE + ESLURM_INVALID_TIME_MIN_LIMIT + ESLURM_DEFER + ESLURM_CONFIGLESS_DISABLED + ESLURM_ENVIRONMENT_MISSING + ESLURM_RESERVATION_NO_SKIP + ESLURM_RESERVATION_USER_GROUP + ESLURM_PARTITION_ASSOC + ESLURM_IN_STANDBY_USE_BACKUP + ESLURM_BAD_THREAD_PER_CORE + ESLURM_INVALID_PREFER + ESLURM_INSUFFICIENT_GRES + ESLURM_INVALID_CONTAINER_ID + ESLURM_EMPTY_JOB_ID + ESLURM_INVALID_JOB_ID_ZERO + ESLURM_INVALID_JOB_ID_NEGATIVE + ESLURM_INVALID_JOB_ID_TOO_LARGE + ESLURM_INVALID_JOB_ID_NON_NUMERIC + ESLURM_EMPTY_JOB_ARRAY_ID + ESLURM_INVALID_JOB_ARRAY_ID_NEGATIVE + ESLURM_INVALID_JOB_ARRAY_ID_TOO_LARGE + ESLURM_INVALID_JOB_ARRAY_ID_NON_NUMERIC + ESLURM_INVALID_HET_JOB_AND_ARRAY + ESLURM_EMPTY_HET_JOB_COMP + ESLURM_INVALID_HET_JOB_COMP_NEGATIVE + ESLURM_INVALID_HET_JOB_COMP_TOO_LARGE + ESLURM_INVALID_HET_JOB_COMP_NON_NUMERIC + ESLURM_EMPTY_STEP_ID + ESLURM_INVALID_STEP_ID_NEGATIVE + ESLURM_INVALID_STEP_ID_TOO_LARGE + ESLURM_INVALID_STEP_ID_NON_NUMERIC + ESLURM_EMPTY_HET_STEP + ESLURM_INVALID_HET_STEP_ZERO + ESLURM_INVALID_HET_STEP_NEGATIVE + ESLURM_INVALID_HET_STEP_TOO_LARGE + ESLURM_INVALID_HET_STEP_NON_NUMERIC + ESLURM_INVALID_HET_STEP_JOB + ESLURM_JOB_TIMEOUT_KILLED + ESLURM_JOB_NODE_FAIL_KILLED + ESPANK_ERROR + ESPANK_BAD_ARG + ESPANK_NOT_TASK + ESPANK_ENV_EXISTS + ESPANK_ENV_NOEXIST + ESPANK_NOSPACE + ESPANK_NOT_REMOTE + ESPANK_NOEXIST + ESPANK_NOT_EXECD + ESPANK_NOT_AVAIL + ESPANK_NOT_LOCAL + ESLURMD_KILL_TASK_FAILED + ESLURMD_KILL_JOB_ALREADY_COMPLETE + ESLURMD_INVALID_ACCT_FREQ + ESLURMD_INVALID_JOB_CREDENTIAL + ESLURMD_CREDENTIAL_EXPIRED + ESLURMD_CREDENTIAL_REVOKED + ESLURMD_CREDENTIAL_REPLAYED + ESLURMD_CREATE_BATCH_DIR_ERROR + ESLURMD_SETUP_ENVIRONMENT_ERROR + ESLURMD_SET_UID_OR_GID_ERROR + ESLURMD_EXECVE_FAILED + ESLURMD_IO_ERROR + ESLURMD_PROLOG_FAILED + ESLURMD_EPILOG_FAILED + ESLURMD_TOOMANYSTEPS + ESLURMD_STEP_EXISTS + ESLURMD_JOB_NOTRUNNING + ESLURMD_STEP_SUSPENDED + ESLURMD_STEP_NOTSUSPENDED + ESLURMD_INVALID_SOCKET_NAME_LEN + ESLURMD_CONTAINER_RUNTIME_INVALID + ESLURMD_CPU_BIND_ERROR + ESLURMD_CPU_LAYOUT_ERROR + ESLURM_PROTOCOL_INCOMPLETE_PACKET + SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT + SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT + ESLURM_AUTH_CRED_INVALID + ESLURM_AUTH_BADARG + ESLURM_AUTH_UNPACK + ESLURM_AUTH_SKIP + ESLURM_AUTH_UNABLE_TO_GENERATE_TOKEN + ESLURM_DB_CONNECTION + ESLURM_JOBS_RUNNING_ON_ASSOC + ESLURM_CLUSTER_DELETED + ESLURM_ONE_CHANGE + ESLURM_BAD_NAME + ESLURM_OVER_ALLOCATE + ESLURM_RESULT_TOO_LARGE + ESLURM_DB_QUERY_TOO_WIDE + ESLURM_DB_CONNECTION_INVALID + ESLURM_NO_REMOVE_DEFAULT_ACCOUNT + ESLURM_FED_CLUSTER_MAX_CNT + ESLURM_FED_CLUSTER_MULTIPLE_ASSIGNMENT + ESLURM_INVALID_CLUSTER_FEATURE + ESLURM_JOB_NOT_FEDERATED + ESLURM_INVALID_CLUSTER_NAME + ESLURM_FED_JOB_LOCK + ESLURM_FED_NO_VALID_CLUSTERS + ESLURM_MISSING_TIME_LIMIT + ESLURM_INVALID_KNL + ESLURM_PLUGIN_INVALID + ESLURM_PLUGIN_INCOMPLETE + ESLURM_PLUGIN_NOT_LOADED + ESLURM_REST_INVALID_QUERY + ESLURM_REST_FAIL_PARSING + ESLURM_REST_INVALID_JOBS_DESC + ESLURM_REST_EMPTY_RESULT + ESLURM_REST_MISSING_UID + ESLURM_REST_MISSING_GID + ESLURM_DATA_PATH_NOT_FOUND + ESLURM_DATA_PTR_NULL + ESLURM_DATA_CONV_FAILED + ESLURM_DATA_REGEX_COMPILE + ESLURM_DATA_UNKNOWN_MIME_TYPE + ESLURM_DATA_TOO_LARGE + ESLURM_DATA_FLAGS_INVALID_TYPE + ESLURM_DATA_FLAGS_INVALID + ESLURM_DATA_EXPECTED_LIST + ESLURM_DATA_EXPECTED_DICT + ESLURM_DATA_AMBIGUOUS_MODIFY + ESLURM_DATA_AMBIGUOUS_QUERY + ESLURM_DATA_PARSE_NOTHING + ESLURM_CONTAINER_NOT_CONFIGURED + + ctypedef struct slurm_errtab_t: + int xe_number + char* xe_name + char* xe_message + + slurm_errtab_t slurm_errtab[] + + unsigned int slurm_errtab_size + + char* slurm_strerror(int errnum) + + void slurm_seterrno(int errnum) + + int slurm_get_errno() + + void slurm_perror(const char* msg) diff --git a/pyslurm/slurm/slurm_version.h.pxi b/pyslurm/slurm/slurm_version.h.pxi new file mode 100644 index 00000000..f9c4f5c6 --- /dev/null +++ b/pyslurm/slurm/slurm_version.h.pxi @@ -0,0 +1,21 @@ +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +cdef extern from "slurm/slurm_version.h": + + uint32_t SLURM_VERSION_NUMBER diff --git a/pyslurm/slurm/slurmdb.h.pxi b/pyslurm/slurm/slurmdb.h.pxi new file mode 100644 index 00000000..d4c16e4e --- /dev/null +++ b/pyslurm/slurm/slurmdb.h.pxi @@ -0,0 +1,1195 @@ +############################################################################## +# NOTICE: This File has been generated by scripts/pyslurm_bindgen.py, which +# uses the autopxd2 tool in order to generate Cython compatible definitions +# from the slurmdb.h C-Header file. Basically, this can be seen as a modified +# version of the original header, with the following changes: +# +# * have the correct cython syntax for type definitions, e.g. "typedef struct +# " is converted to "ctypedef struct " +# * C-Macros are listed with their appropriate uint type +# * Any definitions that cannot be translated are not included in this file +# +# Generated on 2023-05-06T18:02:46.554956 +# +# The Original Copyright notice from slurmdb.h has been included +# below: +# +############################################################################## +# slurmdb.h - Interface codes and functions for slurm +############################################################################## +# Copyright (C) 2010 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Danny Auble da@llnl.gov, et. al. +# CODE-OCEC-09-009. All rights reserved. +# +# Slurm is licensed under the GNU GPLv2. For the full text of Slurm's License, +# please see here: pyslurm/slurm/SLURM_LICENSE +# +# Please, as mentioned above, also have a look at Slurm's DISCLAIMER under +# pyslurm/slurm/SLURM_DISCLAIMER +############################################################################## +# +# Copyright (C) 2023 PySlurm Developers (Modifications as described above) +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +cdef extern from "slurm/slurmdb.h": + + uint32_t QOS_FLAG_BASE + uint32_t QOS_FLAG_NOTSET + uint32_t QOS_FLAG_ADD + uint32_t QOS_FLAG_REMOVE + uint8_t QOS_FLAG_PART_MIN_NODE + uint8_t QOS_FLAG_PART_MAX_NODE + uint8_t QOS_FLAG_PART_TIME_LIMIT + uint8_t QOS_FLAG_ENFORCE_USAGE_THRES + uint8_t QOS_FLAG_NO_RESERVE + uint8_t QOS_FLAG_REQ_RESV + uint8_t QOS_FLAG_DENY_LIMIT + uint8_t QOS_FLAG_OVER_PART_QOS + uint16_t QOS_FLAG_NO_DECAY + uint16_t QOS_FLAG_USAGE_FACTOR_SAFE + uint32_t SLURMDB_RES_FLAG_BASE + uint32_t SLURMDB_RES_FLAG_NOTSET + uint32_t SLURMDB_RES_FLAG_ADD + uint32_t SLURMDB_RES_FLAG_REMOVE + uint8_t SLURMDB_RES_FLAG_ABSOLUTE + uint32_t FEDERATION_FLAG_BASE + uint32_t FEDERATION_FLAG_NOTSET + uint32_t FEDERATION_FLAG_ADD + uint32_t FEDERATION_FLAG_REMOVE + uint8_t CLUSTER_FED_STATE_BASE + uint16_t CLUSTER_FED_STATE_FLAGS + uint8_t CLUSTER_FED_STATE_DRAIN + uint8_t CLUSTER_FED_STATE_REMOVE + uint8_t SLURMDB_JOB_FLAG_NONE + uint8_t SLURMDB_JOB_CLEAR_SCHED + uint8_t SLURMDB_JOB_FLAG_NOTSET + uint8_t SLURMDB_JOB_FLAG_SUBMIT + uint8_t SLURMDB_JOB_FLAG_SCHED + uint8_t SLURMDB_JOB_FLAG_BACKFILL + uint8_t SLURMDB_JOB_FLAG_START_R + uint8_t JOBCOND_FLAG_DUP + uint8_t JOBCOND_FLAG_NO_STEP + uint8_t JOBCOND_FLAG_NO_TRUNC + uint8_t JOBCOND_FLAG_RUNAWAY + uint8_t JOBCOND_FLAG_WHOLE_HETJOB + uint8_t JOBCOND_FLAG_NO_WHOLE_HETJOB + uint8_t JOBCOND_FLAG_NO_WAIT + uint8_t JOBCOND_FLAG_NO_DEFAULT_USAGE + uint16_t JOBCOND_FLAG_SCRIPT + uint16_t JOBCOND_FLAG_ENV + uint16_t SLURMDB_PURGE_BASE + uint32_t SLURMDB_PURGE_FLAGS + uint32_t SLURMDB_PURGE_HOURS + uint32_t SLURMDB_PURGE_DAYS + uint32_t SLURMDB_PURGE_MONTHS + uint32_t SLURMDB_PURGE_ARCHIVE + uint32_t SLURMDB_FS_USE_PARENT + uint16_t SLURMDB_CLASSIFIED_FLAG + uint8_t SLURMDB_CLASS_BASE + uint8_t CLUSTER_FLAG_REGISTER + uint8_t CLUSTER_FLAG_A2 + uint8_t CLUSTER_FLAG_A3 + uint8_t CLUSTER_FLAG_A4 + uint8_t CLUSTER_FLAG_A5 + uint8_t CLUSTER_FLAG_A6 + uint8_t CLUSTER_FLAG_A7 + uint8_t CLUSTER_FLAG_MULTSD + uint16_t CLUSTER_FLAG_A9 + uint16_t CLUSTER_FLAG_FE + uint16_t CLUSTER_FLAG_CRAY + uint16_t CLUSTER_FLAG_FED + uint16_t CLUSTER_FLAG_EXT + uint8_t ASSOC_FLAG_DELETED + uint8_t ASSOC_FLAG_NO_UPDATE + uint8_t SLURMDB_EVENT_COND_OPEN + uint8_t DB_CONN_FLAG_CLUSTER_DEL + uint8_t DB_CONN_FLAG_ROLLBACK + +cdef extern from "slurm/slurmdb.h": + + ctypedef enum slurmdb_admin_level_t: + SLURMDB_ADMIN_NOTSET + SLURMDB_ADMIN_NONE + SLURMDB_ADMIN_OPERATOR + SLURMDB_ADMIN_SUPER_USER + + ctypedef enum slurmdb_classification_type_t: + SLURMDB_CLASS_NONE + SLURMDB_CLASS_CAPABILITY + SLURMDB_CLASS_CAPACITY + SLURMDB_CLASS_CAPAPACITY + + ctypedef enum slurmdb_event_type_t: + SLURMDB_EVENT_ALL + SLURMDB_EVENT_CLUSTER + SLURMDB_EVENT_NODE + + ctypedef enum slurmdb_problem_type_t: + SLURMDB_PROBLEM_NOT_SET + SLURMDB_PROBLEM_ACCT_NO_ASSOC + SLURMDB_PROBLEM_ACCT_NO_USERS + SLURMDB_PROBLEM_USER_NO_ASSOC + SLURMDB_PROBLEM_USER_NO_UID + + ctypedef enum slurmdb_report_sort_t: + SLURMDB_REPORT_SORT_TIME + SLURMDB_REPORT_SORT_NAME + + ctypedef enum slurmdb_report_time_format_t: + SLURMDB_REPORT_TIME_SECS + SLURMDB_REPORT_TIME_MINS + SLURMDB_REPORT_TIME_HOURS + SLURMDB_REPORT_TIME_PERCENT + SLURMDB_REPORT_TIME_SECS_PER + SLURMDB_REPORT_TIME_MINS_PER + SLURMDB_REPORT_TIME_HOURS_PER + + ctypedef enum slurmdb_resource_type_t: + SLURMDB_RESOURCE_NOTSET + SLURMDB_RESOURCE_LICENSE + + ctypedef enum slurmdb_update_type_t: + SLURMDB_UPDATE_NOTSET + SLURMDB_ADD_USER + SLURMDB_ADD_ASSOC + SLURMDB_ADD_COORD + SLURMDB_MODIFY_USER + SLURMDB_MODIFY_ASSOC + SLURMDB_REMOVE_USER + SLURMDB_REMOVE_ASSOC + SLURMDB_REMOVE_COORD + SLURMDB_ADD_QOS + SLURMDB_REMOVE_QOS + SLURMDB_MODIFY_QOS + SLURMDB_ADD_WCKEY + SLURMDB_REMOVE_WCKEY + SLURMDB_MODIFY_WCKEY + SLURMDB_ADD_CLUSTER + SLURMDB_REMOVE_CLUSTER + SLURMDB_REMOVE_ASSOC_USAGE + SLURMDB_ADD_RES + SLURMDB_REMOVE_RES + SLURMDB_MODIFY_RES + SLURMDB_REMOVE_QOS_USAGE + SLURMDB_ADD_TRES + SLURMDB_UPDATE_FEDS + + cdef enum cluster_fed_states: + CLUSTER_FED_STATE_NA + CLUSTER_FED_STATE_ACTIVE + CLUSTER_FED_STATE_INACTIVE + + ctypedef struct slurmdb_tres_rec_t: + uint64_t alloc_secs + uint32_t rec_count + uint64_t count + uint32_t id + char* name + char* type + + ctypedef struct slurmdb_assoc_cond_t: + List acct_list + List cluster_list + List def_qos_id_list + List format_list + List id_list + uint16_t only_defs + List parent_acct_list + List partition_list + List qos_list + time_t usage_end + time_t usage_start + List user_list + uint16_t with_usage + uint16_t with_deleted + uint16_t with_raw_qos + uint16_t with_sub_accts + uint16_t without_parent_info + uint16_t without_parent_limits + + ctypedef struct slurmdb_job_cond_t: + List acct_list + List associd_list + List cluster_list + List constraint_list + uint32_t cpus_max + uint32_t cpus_min + uint32_t db_flags + int32_t exitcode + uint32_t flags + List format_list + List groupid_list + List jobname_list + uint32_t nodes_max + uint32_t nodes_min + List partition_list + List qos_list + List reason_list + List resv_list + List resvid_list + List state_list + List step_list + uint32_t timelimit_max + uint32_t timelimit_min + time_t usage_end + time_t usage_start + char* used_nodes + List userid_list + List wckey_list + + ctypedef struct slurmdb_stats_t: + double act_cpufreq + uint64_t consumed_energy + char* tres_usage_in_ave + char* tres_usage_in_max + char* tres_usage_in_max_nodeid + char* tres_usage_in_max_taskid + char* tres_usage_in_min + char* tres_usage_in_min_nodeid + char* tres_usage_in_min_taskid + char* tres_usage_in_tot + char* tres_usage_out_ave + char* tres_usage_out_max + char* tres_usage_out_max_nodeid + char* tres_usage_out_max_taskid + char* tres_usage_out_min + char* tres_usage_out_min_nodeid + char* tres_usage_out_min_taskid + char* tres_usage_out_tot + + ctypedef struct slurmdb_account_cond_t: + slurmdb_assoc_cond_t* assoc_cond + List description_list + List organization_list + uint16_t with_assocs + uint16_t with_coords + uint16_t with_deleted + + cdef enum: + SLURMDB_ACCT_FLAG_NONE + SLURMDB_ACCT_FLAG_DELETED + + ctypedef struct slurmdb_account_rec_t: + List assoc_list + List coordinators + char* description + uint32_t flags + char* name + char* organization + + ctypedef struct slurmdb_accounting_rec_t: + uint64_t alloc_secs + uint32_t id + time_t period_start + slurmdb_tres_rec_t tres_rec + + ctypedef struct slurmdb_archive_cond_t: + char* archive_dir + char* archive_script + slurmdb_job_cond_t* job_cond + uint32_t purge_event + uint32_t purge_job + uint32_t purge_resv + uint32_t purge_step + uint32_t purge_suspend + uint32_t purge_txn + uint32_t purge_usage + + ctypedef struct slurmdb_archive_rec_t: + char* archive_file + char* insert + + ctypedef struct slurmdb_tres_cond_t: + uint64_t count + List format_list + List id_list + List name_list + List type_list + uint16_t with_deleted + + ctypedef slurmdb_assoc_usage slurmdb_assoc_usage_t + + ctypedef slurmdb_bf_usage slurmdb_bf_usage_t + + ctypedef slurmdb_user_rec slurmdb_user_rec_t + + cdef struct slurmdb_assoc_rec: + List accounting_list + char* acct + slurmdb_assoc_rec* assoc_next + slurmdb_assoc_rec* assoc_next_id + slurmdb_bf_usage_t* bf_usage + char* cluster + char* comment + uint32_t def_qos_id + uint16_t flags + uint32_t grp_jobs + uint32_t grp_jobs_accrue + uint32_t grp_submit_jobs + char* grp_tres + uint64_t* grp_tres_ctld + char* grp_tres_mins + uint64_t* grp_tres_mins_ctld + char* grp_tres_run_mins + uint64_t* grp_tres_run_mins_ctld + uint32_t grp_wall + uint32_t id + uint16_t is_def + slurmdb_assoc_usage_t* leaf_usage + uint32_t lft + uint32_t max_jobs + uint32_t max_jobs_accrue + uint32_t max_submit_jobs + char* max_tres_mins_pj + uint64_t* max_tres_mins_ctld + char* max_tres_run_mins + uint64_t* max_tres_run_mins_ctld + char* max_tres_pj + uint64_t* max_tres_ctld + char* max_tres_pn + uint64_t* max_tres_pn_ctld + uint32_t max_wall_pj + uint32_t min_prio_thresh + char* parent_acct + uint32_t parent_id + char* partition + uint32_t priority + List qos_list + uint32_t rgt + uint32_t shares_raw + uint32_t uid + slurmdb_assoc_usage_t* usage + char* user + slurmdb_user_rec_t* user_rec + + ctypedef slurmdb_assoc_rec slurmdb_assoc_rec_t + + cdef struct slurmdb_assoc_usage: + uint32_t accrue_cnt + List children_list + bitstr_t* grp_node_bitmap + uint16_t* grp_node_job_cnt + uint64_t* grp_used_tres + uint64_t* grp_used_tres_run_secs + double grp_used_wall + double fs_factor + uint32_t level_shares + slurmdb_assoc_rec_t* parent_assoc_ptr + double priority_norm + slurmdb_assoc_rec_t* fs_assoc_ptr + double shares_norm + uint32_t tres_cnt + long double usage_efctv + long double usage_norm + long double usage_raw + long double* usage_tres_raw + uint32_t used_jobs + uint32_t used_submit_jobs + long double level_fs + bitstr_t* valid_qos + + cdef struct slurmdb_bf_usage: + uint64_t count + time_t last_sched + + ctypedef struct slurmdb_cluster_cond_t: + uint16_t classification + List cluster_list + List federation_list + uint32_t flags + List format_list + List plugin_id_select_list + List rpc_version_list + time_t usage_end + time_t usage_start + uint16_t with_deleted + uint16_t with_usage + + ctypedef struct slurmdb_cluster_fed_t: + List feature_list + uint32_t id + char* name + void* recv + void* send + uint32_t state + bool sync_recvd + bool sync_sent + + cdef struct slurmdb_cluster_rec: + List accounting_list + uint16_t classification + time_t comm_fail_time + slurm_addr_t control_addr + char* control_host + uint32_t control_port + uint16_t dimensions + int* dim_size + slurmdb_cluster_fed_t fed + uint32_t flags + pthread_mutex_t lock + char* name + char* nodes + uint32_t plugin_id_select + slurmdb_assoc_rec_t* root_assoc + uint16_t rpc_version + List send_rpc + char* tres_str + + ctypedef struct slurmdb_cluster_accounting_rec_t: + uint64_t alloc_secs + uint64_t down_secs + uint64_t idle_secs + uint64_t over_secs + uint64_t pdown_secs + time_t period_start + uint64_t plan_secs + slurmdb_tres_rec_t tres_rec + + ctypedef struct slurmdb_clus_res_rec_t: + char* cluster + uint32_t allowed + + ctypedef struct slurmdb_coord_rec_t: + char* name + uint16_t direct + + ctypedef struct slurmdb_event_cond_t: + List cluster_list + uint32_t cond_flags + uint32_t cpus_max + uint32_t cpus_min + uint16_t event_type + List format_list + char* node_list + time_t period_end + time_t period_start + List reason_list + List reason_uid_list + List state_list + + ctypedef struct slurmdb_event_rec_t: + char* cluster + char* cluster_nodes + uint16_t event_type + char* node_name + time_t period_end + time_t period_start + char* reason + uint32_t reason_uid + uint32_t state + char* tres_str + + ctypedef struct slurmdb_federation_cond_t: + List cluster_list + List federation_list + List format_list + uint16_t with_deleted + + ctypedef struct slurmdb_federation_rec_t: + char* name + uint32_t flags + List cluster_list + + ctypedef struct slurmdb_job_rec_t: + char* account + char* admin_comment + uint32_t alloc_nodes + uint32_t array_job_id + uint32_t array_max_tasks + uint32_t array_task_id + char* array_task_str + uint32_t associd + char* blockid + char* cluster + char* constraints + char* container + uint64_t db_index + uint32_t derived_ec + char* derived_es + uint32_t elapsed + time_t eligible + time_t end + char* env + uint32_t exitcode + char* extra + char* failed_node + uint32_t flags + void* first_step_ptr + uint32_t gid + uint32_t het_job_id + uint32_t het_job_offset + uint32_t jobid + char* jobname + uint32_t lft + char* licenses + char* mcs_label + char* nodes + char* partition + uint32_t priority + uint32_t qosid + uint32_t req_cpus + uint64_t req_mem + uint32_t requid + uint32_t resvid + char* resv_name + char* script + uint32_t show_full + time_t start + uint32_t state + uint32_t state_reason_prev + List steps + time_t submit + char* submit_line + uint32_t suspended + char* system_comment + uint64_t sys_cpu_sec + uint64_t sys_cpu_usec + uint32_t timelimit + uint64_t tot_cpu_sec + uint64_t tot_cpu_usec + char* tres_alloc_str + char* tres_req_str + uint32_t uid + char* used_gres + char* user + uint64_t user_cpu_sec + uint64_t user_cpu_usec + char* wckey + uint32_t wckeyid + char* work_dir + + ctypedef struct slurmdb_qos_usage_t: + uint32_t accrue_cnt + List acct_limit_list + List job_list + bitstr_t* grp_node_bitmap + uint16_t* grp_node_job_cnt + uint32_t grp_used_jobs + uint32_t grp_used_submit_jobs + uint64_t* grp_used_tres + uint64_t* grp_used_tres_run_secs + double grp_used_wall + double norm_priority + uint32_t tres_cnt + long double usage_raw + long double* usage_tres_raw + List user_limit_list + + ctypedef struct slurmdb_qos_rec_t: + char* description + uint32_t id + uint32_t flags + uint32_t grace_time + uint32_t grp_jobs_accrue + uint32_t grp_jobs + uint32_t grp_submit_jobs + char* grp_tres + uint64_t* grp_tres_ctld + char* grp_tres_mins + uint64_t* grp_tres_mins_ctld + char* grp_tres_run_mins + uint64_t* grp_tres_run_mins_ctld + uint32_t grp_wall + double limit_factor + uint32_t max_jobs_pa + uint32_t max_jobs_pu + uint32_t max_jobs_accrue_pa + uint32_t max_jobs_accrue_pu + uint32_t max_submit_jobs_pa + uint32_t max_submit_jobs_pu + char* max_tres_mins_pj + uint64_t* max_tres_mins_pj_ctld + char* max_tres_pa + uint64_t* max_tres_pa_ctld + char* max_tres_pj + uint64_t* max_tres_pj_ctld + char* max_tres_pn + uint64_t* max_tres_pn_ctld + char* max_tres_pu + uint64_t* max_tres_pu_ctld + char* max_tres_run_mins_pa + uint64_t* max_tres_run_mins_pa_ctld + char* max_tres_run_mins_pu + uint64_t* max_tres_run_mins_pu_ctld + uint32_t max_wall_pj + uint32_t min_prio_thresh + char* min_tres_pj + uint64_t* min_tres_pj_ctld + char* name + bitstr_t* preempt_bitstr + List preempt_list + uint16_t preempt_mode + uint32_t preempt_exempt_time + uint32_t priority + slurmdb_qos_usage_t* usage + double usage_factor + double usage_thres + time_t blocked_until + + ctypedef struct slurmdb_qos_cond_t: + List description_list + List id_list + List format_list + List name_list + uint16_t preempt_mode + uint16_t with_deleted + + ctypedef struct slurmdb_reservation_cond_t: + List cluster_list + uint64_t flags + List format_list + List id_list + List name_list + char* nodes + time_t time_end + time_t time_start + uint16_t with_usage + + ctypedef struct slurmdb_reservation_rec_t: + char* assocs + char* cluster + char* comment + uint64_t flags + uint32_t id + char* name + char* nodes + char* node_inx + time_t time_end + time_t time_start + time_t time_start_prev + char* tres_str + double unused_wall + List tres_list + + ctypedef struct slurmdb_step_rec_t: + char* container + uint32_t elapsed + time_t end + int32_t exitcode + slurmdb_job_rec_t* job_ptr + uint32_t nnodes + char* nodes + uint32_t ntasks + char* pid_str + uint32_t req_cpufreq_min + uint32_t req_cpufreq_max + uint32_t req_cpufreq_gov + uint32_t requid + time_t start + uint32_t state + slurmdb_stats_t stats + slurm_step_id_t step_id + char* stepname + char* submit_line + uint32_t suspended + uint64_t sys_cpu_sec + uint32_t sys_cpu_usec + uint32_t task_dist + uint64_t tot_cpu_sec + uint32_t tot_cpu_usec + char* tres_alloc_str + uint64_t user_cpu_sec + uint32_t user_cpu_usec + + ctypedef struct slurmdb_res_cond_t: + list_t* allowed_list + List cluster_list + List description_list + uint32_t flags + List format_list + List id_list + List manager_list + List name_list + List server_list + List type_list + uint16_t with_deleted + uint16_t with_clusters + + ctypedef struct slurmdb_res_rec_t: + uint32_t allocated + uint32_t last_consumed + List clus_res_list + slurmdb_clus_res_rec_t* clus_res_rec + uint32_t count + char* description + uint32_t flags + uint32_t id + time_t last_update + char* manager + char* name + char* server + uint32_t type + + ctypedef struct slurmdb_txn_cond_t: + List acct_list + List action_list + List actor_list + List cluster_list + List format_list + List id_list + List info_list + List name_list + time_t time_end + time_t time_start + List user_list + uint16_t with_assoc_info + + ctypedef struct slurmdb_txn_rec_t: + char* accts + uint16_t action + char* actor_name + char* clusters + uint32_t id + char* set_info + time_t timestamp + char* users + char* where_query + + ctypedef struct slurmdb_used_limits_t: + uint32_t accrue_cnt + char* acct + uint32_t jobs + uint32_t submit_jobs + uint64_t* tres + uint64_t* tres_run_mins + bitstr_t* node_bitmap + uint16_t* node_job_cnt + uint32_t uid + + ctypedef struct slurmdb_user_cond_t: + uint16_t admin_level + slurmdb_assoc_cond_t* assoc_cond + List def_acct_list + List def_wckey_list + uint16_t with_assocs + uint16_t with_coords + uint16_t with_deleted + uint16_t with_wckeys + uint16_t without_defaults + + cdef enum: + SLURMDB_USER_FLAG_NONE + SLURMDB_USER_FLAG_DELETED + + cdef struct slurmdb_user_rec: + uint16_t admin_level + List assoc_list + slurmdb_bf_usage_t* bf_usage + List coord_accts + char* default_acct + char* default_wckey + uint32_t flags + char* name + char* old_name + uint32_t uid + List wckey_list + + ctypedef struct slurmdb_update_object_t: + List objects + uint16_t type + + ctypedef struct slurmdb_wckey_cond_t: + List cluster_list + List format_list + List id_list + List name_list + uint16_t only_defs + time_t usage_end + time_t usage_start + List user_list + uint16_t with_usage + uint16_t with_deleted + + cdef enum: + SLURMDB_WCKEY_FLAG_NONE + SLURMDB_WCKEY_FLAG_DELETED + + ctypedef struct slurmdb_wckey_rec_t: + List accounting_list + char* cluster + uint32_t flags + uint32_t id + uint16_t is_def + char* name + uint32_t uid + char* user + + ctypedef struct slurmdb_print_tree_t: + char* name + char* print_name + char* spaces + uint16_t user + + ctypedef struct slurmdb_hierarchical_rec_t: + slurmdb_assoc_rec_t* assoc + char* key + char* sort_name + List children + + ctypedef struct slurmdb_report_assoc_rec_t: + char* acct + char* cluster + char* parent_acct + List tres_list + char* user + + ctypedef struct slurmdb_report_user_rec_t: + char* acct + List acct_list + List assoc_list + char* name + List tres_list + uid_t uid + + ctypedef struct slurmdb_report_cluster_rec_t: + List accounting_list + List assoc_list + char* name + List tres_list + List user_list + + ctypedef struct slurmdb_report_job_grouping_t: + uint32_t count + List jobs + uint32_t min_size + uint32_t max_size + List tres_list + + ctypedef struct slurmdb_report_acct_grouping_t: + char* acct + uint32_t count + List groups + uint32_t lft + uint32_t rgt + List tres_list + + ctypedef struct slurmdb_report_cluster_grouping_t: + List acct_list + char* cluster + uint32_t count + List tres_list + + cdef enum: + DBD_ROLLUP_HOUR + DBD_ROLLUP_DAY + DBD_ROLLUP_MONTH + DBD_ROLLUP_COUNT + + ctypedef struct slurmdb_rollup_stats_t: + char* cluster_name + uint16_t count[4] + time_t timestamp[4] + uint64_t time_last[4] + uint64_t time_max[4] + uint64_t time_total[4] + + ctypedef struct slurmdb_rpc_obj_t: + uint32_t cnt + uint32_t id + uint64_t time + uint64_t time_ave + + ctypedef struct slurmdb_stats_rec_t: + slurmdb_rollup_stats_t* dbd_rollup_stats + List rollup_stats + List rpc_list + time_t time_start + List user_list + + slurmdb_cluster_rec_t* working_cluster_rec + + int slurmdb_accounts_add(void* db_conn, List acct_list) + + List slurmdb_accounts_get(void* db_conn, slurmdb_account_cond_t* acct_cond) + + List slurmdb_accounts_modify(void* db_conn, slurmdb_account_cond_t* acct_cond, slurmdb_account_rec_t* acct) + + List slurmdb_accounts_remove(void* db_conn, slurmdb_account_cond_t* acct_cond) + + int slurmdb_archive(void* db_conn, slurmdb_archive_cond_t* arch_cond) + + int slurmdb_archive_load(void* db_conn, slurmdb_archive_rec_t* arch_rec) + + int slurmdb_associations_add(void* db_conn, List assoc_list) + + List slurmdb_associations_get(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) + + List slurmdb_associations_modify(void* db_conn, slurmdb_assoc_cond_t* assoc_cond, slurmdb_assoc_rec_t* assoc) + + List slurmdb_associations_remove(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) + + int slurmdb_clusters_add(void* db_conn, List cluster_list) + + List slurmdb_clusters_get(void* db_conn, slurmdb_cluster_cond_t* cluster_cond) + + List slurmdb_clusters_modify(void* db_conn, slurmdb_cluster_cond_t* cluster_cond, slurmdb_cluster_rec_t* cluster) + + List slurmdb_clusters_remove(void* db_conn, slurmdb_cluster_cond_t* cluster_cond) + + List slurmdb_report_cluster_account_by_user(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) + + List slurmdb_report_cluster_user_by_account(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) + + List slurmdb_report_cluster_wckey_by_user(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) + + List slurmdb_report_cluster_user_by_wckey(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) + + List slurmdb_report_job_sizes_grouped_by_account(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list, bool flat_view, bool acct_as_parent) + + List slurmdb_report_job_sizes_grouped_by_wckey(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list) + + List slurmdb_report_job_sizes_grouped_by_account_then_wckey(void* db_conn, slurmdb_job_cond_t* job_cond, List grouping_list, bool flat_view, bool acct_as_parent) + + List slurmdb_report_user_top_usage(void* db_conn, slurmdb_user_cond_t* user_cond, bool group_accounts) + + void* slurmdb_connection_get(uint16_t* persist_conn_flags) + + int slurmdb_connection_close(void** db_conn) + + int slurmdb_connection_commit(void* db_conn, bool commit) + + int slurmdb_coord_add(void* db_conn, List acct_list, slurmdb_user_cond_t* user_cond) + + List slurmdb_coord_remove(void* db_conn, List acct_list, slurmdb_user_cond_t* user_cond) + + int slurmdb_federations_add(void* db_conn, List federation_list) + + List slurmdb_federations_modify(void* db_conn, slurmdb_federation_cond_t* fed_cond, slurmdb_federation_rec_t* fed) + + List slurmdb_federations_remove(void* db_conn, slurmdb_federation_cond_t* fed_cond) + + List slurmdb_federations_get(void* db_conn, slurmdb_federation_cond_t* fed_cond) + + List slurmdb_job_modify(void* db_conn, slurmdb_job_cond_t* job_cond, slurmdb_job_rec_t* job) + + List slurmdb_jobs_get(void* db_conn, slurmdb_job_cond_t* job_cond) + + int slurmdb_jobs_fix_runaway(void* db_conn, List jobs) + + int slurmdb_jobcomp_init() + + int slurmdb_jobcomp_fini() + + List slurmdb_jobcomp_jobs_get(slurmdb_job_cond_t* job_cond) + + int slurmdb_reconfig(void* db_conn) + + int slurmdb_shutdown(void* db_conn) + + int slurmdb_clear_stats(void* db_conn) + + int slurmdb_get_stats(void* db_conn, slurmdb_stats_rec_t** stats_pptr) + + List slurmdb_config_get(void* db_conn) + + List slurmdb_events_get(void* db_conn, slurmdb_event_cond_t* event_cond) + + List slurmdb_problems_get(void* db_conn, slurmdb_assoc_cond_t* assoc_cond) + + List slurmdb_reservations_get(void* db_conn, slurmdb_reservation_cond_t* resv_cond) + + List slurmdb_txn_get(void* db_conn, slurmdb_txn_cond_t* txn_cond) + + List slurmdb_get_info_cluster(char* cluster_names) + + int slurmdb_get_first_avail_cluster(job_desc_msg_t* req, char* cluster_names, slurmdb_cluster_rec_t** cluster_rec) + + int slurmdb_get_first_het_job_cluster(List job_req_list, char* cluster_names, slurmdb_cluster_rec_t** cluster_rec) + + void slurmdb_destroy_assoc_usage(void* object) + + void slurmdb_destroy_bf_usage(void* object) + + void slurmdb_destroy_bf_usage_members(void* object) + + void slurmdb_destroy_qos_usage(void* object) + + void slurmdb_destroy_user_rec(void* object) + + void slurmdb_destroy_account_rec(void* object) + + void slurmdb_destroy_coord_rec(void* object) + + void slurmdb_destroy_clus_res_rec(void* object) + + void slurmdb_destroy_cluster_accounting_rec(void* object) + + void slurmdb_destroy_cluster_rec(void* object) + + void slurmdb_destroy_federation_rec(void* object) + + void slurmdb_destroy_accounting_rec(void* object) + + void slurmdb_free_assoc_mgr_state_msg(void* object) + + void slurmdb_free_assoc_rec_members(slurmdb_assoc_rec_t* assoc) + + void slurmdb_destroy_assoc_rec(void* object) + + void slurmdb_destroy_event_rec(void* object) + + void slurmdb_destroy_job_rec(void* object) + + void slurmdb_free_qos_rec_members(slurmdb_qos_rec_t* qos) + + void slurmdb_destroy_qos_rec(void* object) + + void slurmdb_destroy_reservation_rec(void* object) + + void slurmdb_destroy_step_rec(void* object) + + void slurmdb_destroy_res_rec(void* object) + + void slurmdb_destroy_txn_rec(void* object) + + void slurmdb_destroy_wckey_rec(void* object) + + void slurmdb_destroy_archive_rec(void* object) + + void slurmdb_destroy_tres_rec_noalloc(void* object) + + void slurmdb_destroy_tres_rec(void* object) + + void slurmdb_destroy_report_assoc_rec(void* object) + + void slurmdb_destroy_report_user_rec(void* object) + + void slurmdb_destroy_report_cluster_rec(void* object) + + void slurmdb_destroy_user_cond(void* object) + + void slurmdb_destroy_account_cond(void* object) + + void slurmdb_destroy_cluster_cond(void* object) + + void slurmdb_destroy_federation_cond(void* object) + + void slurmdb_destroy_tres_cond(void* object) + + void slurmdb_destroy_assoc_cond(void* object) + + void slurmdb_destroy_event_cond(void* object) + + void slurmdb_destroy_job_cond(void* object) + + void slurmdb_destroy_qos_cond(void* object) + + void slurmdb_destroy_reservation_cond(void* object) + + void slurmdb_destroy_res_cond(void* object) + + void slurmdb_destroy_txn_cond(void* object) + + void slurmdb_destroy_wckey_cond(void* object) + + void slurmdb_destroy_archive_cond(void* object) + + void slurmdb_destroy_update_object(void* object) + + void slurmdb_destroy_used_limits(void* object) + + void slurmdb_destroy_print_tree(void* object) + + void slurmdb_destroy_hierarchical_rec(void* object) + + void slurmdb_destroy_report_job_grouping(void* object) + + void slurmdb_destroy_report_acct_grouping(void* object) + + void slurmdb_destroy_report_cluster_grouping(void* object) + + void slurmdb_destroy_rpc_obj(void* object) + + void slurmdb_destroy_rollup_stats(void* object) + + void slurmdb_free_stats_rec_members(void* object) + + void slurmdb_destroy_stats_rec(void* object) + + void slurmdb_free_slurmdb_stats_members(slurmdb_stats_t* stats) + + void slurmdb_destroy_slurmdb_stats(slurmdb_stats_t* stats) + + void slurmdb_init_assoc_rec(slurmdb_assoc_rec_t* assoc, bool free_it) + + void slurmdb_init_clus_res_rec(slurmdb_clus_res_rec_t* clus_res, bool free_it) + + void slurmdb_init_cluster_rec(slurmdb_cluster_rec_t* cluster, bool free_it) + + void slurmdb_init_federation_rec(slurmdb_federation_rec_t* federation, bool free_it) + + void slurmdb_init_qos_rec(slurmdb_qos_rec_t* qos, bool free_it, uint32_t init_val) + + void slurmdb_init_res_rec(slurmdb_res_rec_t* res, bool free_it) + + void slurmdb_init_wckey_rec(slurmdb_wckey_rec_t* wckey, bool free_it) + + void slurmdb_init_tres_cond(slurmdb_tres_cond_t* tres, bool free_it) + + void slurmdb_init_cluster_cond(slurmdb_cluster_cond_t* cluster, bool free_it) + + void slurmdb_init_federation_cond(slurmdb_federation_cond_t* federation, bool free_it) + + void slurmdb_init_res_cond(slurmdb_res_cond_t* cluster, bool free_it) + + List slurmdb_get_hierarchical_sorted_assoc_list(List assoc_list, bool use_lft) + + List slurmdb_get_acct_hierarchical_rec_list(List assoc_list) + + char* slurmdb_tree_name_get(char* name, char* parent, List tree_list) + + int slurmdb_res_add(void* db_conn, List res_list) + + List slurmdb_res_get(void* db_conn, slurmdb_res_cond_t* res_cond) + + List slurmdb_res_modify(void* db_conn, slurmdb_res_cond_t* res_cond, slurmdb_res_rec_t* res) + + List slurmdb_res_remove(void* db_conn, slurmdb_res_cond_t* res_cond) + + int slurmdb_qos_add(void* db_conn, List qos_list) + + List slurmdb_qos_get(void* db_conn, slurmdb_qos_cond_t* qos_cond) + + List slurmdb_qos_modify(void* db_conn, slurmdb_qos_cond_t* qos_cond, slurmdb_qos_rec_t* qos) + + List slurmdb_qos_remove(void* db_conn, slurmdb_qos_cond_t* qos_cond) + + int slurmdb_tres_add(void* db_conn, List tres_list) + + List slurmdb_tres_get(void* db_conn, slurmdb_tres_cond_t* tres_cond) + + int slurmdb_usage_get(void* db_conn, void* in_, int type, time_t start, time_t end) + + int slurmdb_usage_roll(void* db_conn, time_t sent_start, time_t sent_end, uint16_t archive_data, List* rollup_stats_list_in) + + int slurmdb_users_add(void* db_conn, List user_list) + + List slurmdb_users_get(void* db_conn, slurmdb_user_cond_t* user_cond) + + List slurmdb_users_modify(void* db_conn, slurmdb_user_cond_t* user_cond, slurmdb_user_rec_t* user) + + List slurmdb_users_remove(void* db_conn, slurmdb_user_cond_t* user_cond) + + int slurmdb_wckeys_add(void* db_conn, List wckey_list) + + List slurmdb_wckeys_get(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) + + List slurmdb_wckeys_modify(void* db_conn, slurmdb_wckey_cond_t* wckey_cond, slurmdb_wckey_rec_t* wckey) + + List slurmdb_wckeys_remove(void* db_conn, slurmdb_wckey_cond_t* wckey_cond) diff --git a/pyslurm/slurm/xmalloc.h b/pyslurm/slurm/xmalloc.h new file mode 100644 index 00000000..f1db7b5f --- /dev/null +++ b/pyslurm/slurm/xmalloc.h @@ -0,0 +1,117 @@ +/*****************************************************************************\ + * xmalloc.h - enhanced malloc routines for slurm + * - default: never return if errors are encountered. + * - attempt to report file, line, and calling function on assertion failure + * - use configurable slurm log facility for reporting errors + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick and + * Mark Grondona + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of Slurm, a resource management program. + * For details, see . + * Please also read the included file: DISCLAIMER. + * + * Slurm is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Slurm; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + ***************************************************************************** + * Description: + * + * void *xmalloc(size_t size); + * void xrealloc(void *p, size_t newsize); + * void xfree(void *p); + * int xsize(void *p); + * + * xmalloc(size) allocates size bytes and returns a pointer to the allocated + * memory. The memory is set to zero. xmalloc() will not return unless + * there are no errors. The memory must be freed using xfree(). + * + * xrealloc(p, newsize) changes the size of the block pointed to by p to the + * value of newsize. Newly allocated memory is zeroed. If p is NULL, + * xrealloc() performs the same function as `p = xmalloc(newsize)'. If p + * is not NULL, it is required to have been initialized with a call to + * [try_]xmalloc() or [try_]xrealloc(). + * + * xfree(p) frees the memory block pointed to by p. The memory must have been + * initialized with a call to [try_]xmalloc() or [try_]xrealloc(). + * + * xsize(p) returns the current size of the memory allocation pointed to by + * p. The memory must have been allocated with [try_]xmalloc() or + * [try_]xrealloc(). + * +\*****************************************************************************/ + +#ifndef _XMALLOC_H +#define _XMALLOC_H + +#include +#include + +#define xcalloc(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xcalloc(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xcalloc_nz(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, false, false, __FILE__, __LINE__, __func__) + +#define xmalloc(__sz) \ + slurm_xcalloc(1, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xmalloc(__sz) \ + slurm_xcalloc(1, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xmalloc_nz(__sz) \ + slurm_xcalloc(1, __sz, false, false, __FILE__, __LINE__, __func__) + +#define xfree(__p) slurm_xfree((void **)&(__p)) + +#define xfree_array(__p) slurm_xfree_array((void ***)&(__p)) + +#define xrecalloc(__p, __cnt, __sz) \ + slurm_xrecalloc((void **)&(__p), __cnt, __sz, true, false, __FILE__, __LINE__, __func__) + +#define xrealloc(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xrealloc(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xrealloc_nz(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, false, false, __FILE__, __LINE__, __func__) + +void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *); +void slurm_xfree(void **); +void slurm_xfree_array(void ***); +void *slurm_xrecalloc(void **, size_t, size_t, bool, bool, const char *, int, const char *); + +size_t xsize(void *item); + +void xfree_ptr(void *); + +#endif /* !_XMALLOC_H */ diff --git a/pyslurm/utils/__init__.pxd b/pyslurm/utils/__init__.pxd new file mode 100644 index 00000000..7a22bfae --- /dev/null +++ b/pyslurm/utils/__init__.pxd @@ -0,0 +1,2 @@ +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 diff --git a/pyslurm/utils/__init__.py b/pyslurm/utils/__init__.py new file mode 100644 index 00000000..eae6e6ed --- /dev/null +++ b/pyslurm/utils/__init__.py @@ -0,0 +1,44 @@ +######################################################################### +# utils.py - pyslurm utility functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""pyslurm utility functions""" + +# Utility time functions +from .ctime import ( + timestr_to_secs, + timestr_to_mins, + secs_to_timestr, + mins_to_timestr, + date_to_timestamp, + timestamp_to_date, +) + +# General utility functions +from .helpers import ( + uid_to_name, + gid_to_name, + user_to_uid, + group_to_gid, + expand_range_str, + humanize, + dehumanize, + nodelist_from_range_str, + nodelist_to_range_str, +) diff --git a/pyslurm/utils/cstr.pxd b/pyslurm/utils/cstr.pxd new file mode 100644 index 00000000..e8014a5f --- /dev/null +++ b/pyslurm/utils/cstr.pxd @@ -0,0 +1,39 @@ +######################################################################### +# common/cstr.pxd - slurm string functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc +from libc.string cimport memcpy, strlen + +cdef char *from_unicode(s) +cdef to_unicode(char *s, default=*) +cdef fmalloc(char **old, val) +cdef fmalloc2(char **p1, char **p2, val) +cdef free_array(char **arr, count) +cpdef list to_list(char *str_list, default=*) +cdef from_list(char **old, vals, delim=*) +cdef from_list2(char **p1, char **p2, vals, delim=*) +cpdef dict to_dict(char *str_dict, str delim1=*, str delim2=*) +cdef from_dict(char **old, vals, prepend=*, str delim1=*, str delim2=*) +cpdef dict to_gres_dict(char *gres) diff --git a/pyslurm/utils/cstr.pyx b/pyslurm/utils/cstr.pyx new file mode 100644 index 00000000..0b8aa2a8 --- /dev/null +++ b/pyslurm/utils/cstr.pyx @@ -0,0 +1,288 @@ +######################################################################### +# common/cstr.pyx - pyslurm string functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +import re + +cdef bytes NULL_BYTE = "\0".encode("ascii") +cdef bytes NONE_BYTE = "None".encode("ascii") + +cdef char *from_unicode(s): + """Convert Python3 str (unicode) to char* (no malloc) + + Note: + The lifetime of this char* depends on the lifetime of the equivalent + python-object passed in. If the python-object is gone, the char* cannot + be used safely anymore. + """ + if not s: + return NULL + + _s = str(s) + return _s + + +cdef to_unicode(char *_str, default=None): + """Convert a char* to Python3 str (unicode)""" + if _str and _str[0] != NULL_BYTE: + if _str == NONE_BYTE: + return default + + return _str + else: + return default + + +cdef fmalloc2(char **p1, char **p2, val): + """Like fmalloc, but copies the value to 2 char pointers.""" + fmalloc(p1, val) + fmalloc(p2, val) + + +cdef fmalloc(char **old, val): + """Try to free first and then create xmalloc'ed char* from str. + + Note: Uses Slurm's memory allocator. + """ + # TODO: Consider doing some size checks on the input by having an extra + # argument like "max_size" which is configurable. Otherwise infinitely huge + # strings could just be passed in and consume a lot of memory which would + # allow for a denial of service attack on services that use pyslurm. + cdef: + const char *tmp = NULL + size_t siz + + # Free the previous allocation (if neccessary) + xfree(old[0]) + + # Consider: Maybe every string containing a \0 should just + # be rejected with an Exception instead of silently cutting + # everything after \0 off? + + if val and val[0] != "\0": + # Let Cython convert the Python-string to a char* + # which will be NUL-terminated. + tmp = val + + # Get the length of the char*, include space for NUL character + siz = strlen(tmp) + 1 + + old[0] = slurm.try_xmalloc(siz) + if not old[0]: + raise MemoryError("xmalloc failed for char*") + + memcpy(old[0], tmp, siz) + else: + old[0] = NULL + + +cpdef list to_list(char *str_list, default=[]): + """Convert C-String to a list.""" + cdef str ret = to_unicode(str_list) + + if not ret: + return default + + return ret.split(",") + + +def list_to_str(vals, delim=","): + """Convert list to a C-String.""" + cdef object final = vals + + if vals and not isinstance(vals, str): + final = delim.join(vals) + + return final + + +cdef from_list(char **old, vals, delim=","): + fmalloc(old, list_to_str(vals, delim)) + + +cdef from_list2(char **p1, char **p2, vals, delim=","): + from_list(p1, vals, delim) + from_list(p2, vals, delim) + + +cpdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): + """Convert a char* key=value pair to dict. + + With a char* Slurm represents key-values pairs usually in the form of: + key1=value1,key2=value2 + which can easily be converted to a dict. + """ + cdef: + str _str_dict = to_unicode(str_dict) + str key, val + dict out = {} + + if not _str_dict: + return out + + for kv in _str_dict.split(delim1): + if delim2 in kv: + key, val = kv.split(delim2, 1) + out[key] = int(val) if val.isdigit() else val + + return out + + +def validate_str_key_value_format(val, delim1=",", delim2="="): + cdef dict out = {} + + for kv in val.split(delim1): + if delim2 in kv: + k, v = kv.split(delim2) + out[k] = v + else: + raise ValueError( + f"Invalid format for key-value pair {kv}. " + f"Expected {delim2} as seperator." + ) + + return out + + +def dict_to_str(vals, prepend=None, delim1=",", delim2="="): + """Convert a dict (or str) to Slurm Key-Value pair. + + Slurm predominantly uses a format of: + key1=value1,key2=value2,... + + for Key/Value type things, which can be easily created from a dict. + + A String which already has this form can also be passed in. The correct + format of this string will then be validated. + """ + cdef: + tmp_dict = {} if not vals else vals + list tmp = [] + + if not vals: + return None + + if isinstance(vals, str): + tmp_dict = validate_str_key_value_format(vals, delim1, delim2) + + for k, v in tmp_dict.items(): + if ((delim1 in str(k) or delim2 in str(k)) or + delim1 in str(v) or delim2 in str(v)): + raise ValueError( + f"Key or Value cannot contain either {delim1} or {delim2}. " + f"Got Key: {k} and Value: {v}." + ) + + tmp.append(f"{'' if not prepend else prepend}{k}{delim2}{v}") + + return delim1.join(tmp) + + +cdef from_dict(char **old, vals, prepend=None, + str delim1=",", str delim2="="): + fmalloc(old, dict_to_str(vals, prepend, delim1, delim2)) + + +cpdef dict to_gres_dict(char *gres): + """Parse a GRES string.""" + cdef: + dict output = {} + str gres_str = to_unicode(gres) + str gres_delim = "gres:" + + if not gres_str or gres_str == "(null)": + return {} + + for item in re.split(",(?=[^,]+?:)", gres_str): + + # Remove the additional "gres" specifier if it exists + if gres_delim in item: + item = item.replace(gres_delim, "") + + gres_splitted = re.split( + ":(?=[^:]+?)", + item.replace("(", ":", 1).replace(")", "") + ) + + name, typ, cnt = gres_splitted[0], gres_splitted[1], 0 + + # Check if we have a gres type. + if typ.isdigit(): + cnt = typ + typ = None + else: + cnt = gres_splitted[2] + + # Dict Key-Name depends on if we have a gres type or not + name_and_typ = f"{name}:{typ}" if typ else name + + if not "IDX" in gres_splitted: + # Check if we need to parse the exact GRES index when coming from + # job_resources_t. + output[name_and_typ] = int(cnt) + else: + # Cover cases with IDX + idx = gres_splitted[3] if not typ else gres_splitted[4] + output[name_and_typ] = { + "count": int(cnt), + "indexes": idx, + } + + return output + + +def from_gres_dict(vals, typ=""): + final = [] + gres_dict = {} if not vals else vals + + if not vals: + return None + + if isinstance(vals, str) and not vals.isdigit(): + gres_dict = {} + gres_list = vals.replace("gres:", "") + for gres_str in gres_list.split(","): + gres_and_type, cnt = gres_str.rsplit(":", 1) + gres_dict.update({gres_and_type: int(cnt)}) + elif not isinstance(vals, dict): + return f"gres:{typ}:{int(vals)}" + + for gres_and_type, cnt in gres_dict.items(): + # Error immediately on specifications that contain more than one + # semicolon, as it is wrong. + if len(gres_and_type.split(":")) > 2: + raise ValueError(f"Invalid specifier: '{gres_and_type}'") + + if typ not in gres_and_type: + gres_and_type = f"{typ}:{gres_and_type}" + + final.append(f"gres:{gres_and_type}:{int(cnt)}") + + return ",".join(final) + + +cdef free_array(char **arr, count): + for i in range(count): + xfree(arr[i]) + + xfree(arr) diff --git a/pyslurm/utils/ctime.pxd b/pyslurm/utils/ctime.pxd new file mode 100644 index 00000000..b9bde543 --- /dev/null +++ b/pyslurm/utils/ctime.pxd @@ -0,0 +1,32 @@ +######################################################################### +# ctime.pxd - wrappers around slurm time functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.utils cimport cstr +from libc.stdint cimport uint32_t + +cdef extern from 'time.h' nogil: + ctypedef long time_t + double difftime(time_t time1, time_t time2) + time_t time(time_t *t) diff --git a/pyslurm/utils/ctime.pyx b/pyslurm/utils/ctime.pyx new file mode 100644 index 00000000..45d7c8e2 --- /dev/null +++ b/pyslurm/utils/ctime.pyx @@ -0,0 +1,216 @@ +######################################################################### +# ctime.pyx - wrappers around slurm time functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +import datetime +from pyslurm.constants import UNLIMITED + + +def timestr_to_secs(timestr): + """Convert Slurm Timestring to seconds + + Args: + timestr (str): + A Timestring compatible with Slurms time functions. + + Returns: + (int): Amount of time in seconds + """ + cdef: + char *tmp = NULL + uint32_t secs + + if timestr is None: + return slurm.NO_VAL + elif timestr == UNLIMITED or timestr.casefold() == "unlimited": + return slurm.INFINITE + + if str(timestr).isdigit(): + timestr = "00:00:{}".format(timestr) + + tmp = cstr.from_unicode(timestr) + secs = slurm.slurm_time_str2secs(tmp) + + if secs == slurm.NO_VAL: + raise ValueError(f"Invalid Time Specification: {timestr}.") + + return secs + + +def timestr_to_mins(timestr): + """Convert Slurm Timestring to minutes + + Args: + timestr (str): + A Timestring compatible with Slurms time functions. + + Returns: + (int): Amount of time in minutes + """ + cdef: + char *tmp = NULL + uint32_t mins + + if timestr is None: + return slurm.NO_VAL + elif str(timestr).isdigit(): + return timestr + elif timestr == UNLIMITED or timestr.casefold() == "unlimited": + return slurm.INFINITE + + tmp = cstr.from_unicode(timestr) + mins = slurm.slurm_time_str2mins(tmp) + + if mins == slurm.NO_VAL: + raise ValueError(f"Invalid Time Specification: {timestr}.") + + return mins + + +def secs_to_timestr(secs, default=None): + """Parse time in seconds to Slurm Timestring + + Args: + secs (int): + Amount of seconds to convert + + Returns: + (str): A Slurm timestring + """ + cdef char time_line[32] + + if secs == slurm.NO_VAL or secs is None: + return default + elif secs != slurm.INFINITE: + slurm.slurm_secs2time_str( + secs, + time_line, + sizeof(time_line) + ) + + tmp = cstr.to_unicode(time_line) + if tmp == "00:00:00": + return None + else: + return tmp + else: + return UNLIMITED + + +def mins_to_timestr(mins, default=None): + """Parse time in minutes to Slurm Timestring + + Args: + mins (int): + Amount of minutes to convert + + Returns: + (str): A Slurm timestring + """ + cdef char time_line[32] + + if mins == slurm.NO_VAL or mins is None: + return default + elif mins != slurm.INFINITE: + slurm.slurm_mins2time_str( + mins, + time_line, + sizeof(time_line) + ) + + tmp = cstr.to_unicode(time_line) + if tmp == "00:00:00": + return None + else: + return tmp + else: + return UNLIMITED + + +def date_to_timestamp(date, on_nodate=0): + """Parse Date to Unix timestamp + + Args: + date (Union[str, int, datetime.datetime]): + A date to convert to a Unix timestamp. + + Returns: + (int): A unix timestamp + """ + cdef: + time_t tmp_time + char* tmp_char = NULL + + if not date: + # time_t of 0, so the option will be ignored by slurmctld + return on_nodate + elif str(date).isdigit(): + # Allow the user to pass a timestamp directly. + return int(date) + elif isinstance(date, datetime.datetime): + # Allow the user to pass a datetime.datetime object. + return int(date.timestamp()) + + tmp_char = cstr.from_unicode(date) + tmp_time = slurm.slurm_parse_time(tmp_char, 0) + + if not tmp_time: + raise ValueError(f"Invalid Time Specification: {date}") + + return tmp_time + + +def timestamp_to_date(timestamp): + """Parse Unix timestamp to Slurm Date-string + + Args: + timestamp (int): + A Unix timestamp that should be converted. + + Returns: + (str): A Slurm date timestring + """ + cdef: + char time_str[32] + time_t _time = timestamp + + if _time == slurm.NO_VAL: + return None + + # slurm_make_time_str returns 'Unknown' if 0 or slurm.INFINITE + slurm.slurm_make_time_str(&_time, time_str, sizeof(time_str)) + + ret = cstr.to_unicode(time_str) + if ret == "Unknown": + return None + + return ret + + +def _raw_time(time, on_noval=None, on_inf=None): + if time == slurm.NO_VAL or time == 0: + return on_noval + elif time == slurm.INFINITE: + return on_inf + else: + return time diff --git a/pyslurm/utils/helpers.pxd b/pyslurm/utils/helpers.pxd new file mode 100644 index 00000000..3f73c375 --- /dev/null +++ b/pyslurm/utils/helpers.pxd @@ -0,0 +1,33 @@ +######################################################################### +# helpers.pxd - basic helper functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm.utils cimport cstr +from libc.stdlib cimport free + +cpdef uid_to_name(uint32_t uid, err_on_invalid=*, dict lookup=*) +cpdef gid_to_name(uint32_t gid, err_on_invalid=*, dict lookup=*) +cpdef gres_from_tres_dict(dict tres_dict) diff --git a/pyslurm/utils/helpers.pyx b/pyslurm/utils/helpers.pyx new file mode 100644 index 00000000..4d5f6d0c --- /dev/null +++ b/pyslurm/utils/helpers.pyx @@ -0,0 +1,389 @@ +######################################################################### +# helpers.pyx - basic helper functions +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS +from grp import getgrgid, getgrnam, getgrall +from pwd import getpwuid, getpwnam, getpwall +from os import getuid, getgid +from itertools import chain +import re +import signal +from pyslurm.constants import UNLIMITED + + +MEMORY_UNITS = { + "K": 2**10.0, + "M": 2**20.0, + "G": 2**30.0, + "T": 2**40.0, + "P": 2**50.0, + "E": 2**60.0, + "Z": 2**70.0 +} + + +cpdef uid_to_name(uint32_t uid, err_on_invalid=True, dict lookup={}): + """Translate UID to a User-Name.""" + if uid == slurm.NO_VAL or uid == slurm.INFINITE: + return None + + if lookup: + try: + name = lookup[uid] + return name + except KeyError as e: + if err_on_invalid: + raise e + else: + try: + name = getpwuid(uid).pw_name + return name + except KeyError as e: + if err_on_invalid: + raise e + + return None + + +cpdef gid_to_name(uint32_t gid, err_on_invalid=True, dict lookup={}): + """Translate a uid to a Group-Name.""" + if gid == slurm.NO_VAL or gid == slurm.INFINITE: + return None + + if lookup: + try: + name = lookup[gid] + return name + except KeyError as e: + if err_on_invalid: + raise e + else: + try: + name = getgrgid(gid).gr_name + return name + except KeyError as e: + if err_on_invalid: + raise e + + return None + + +def user_to_uid(user, err_on_invalid=True): + """Translate User-Name to a uid.""" + if user is None: + return slurm.NO_VAL + + try: + if isinstance(user, str) and not user.isdigit(): + return getpwnam(user).pw_uid + + return getpwuid(int(user)).pw_uid + except KeyError as e: + if err_on_invalid: + raise e + + return getuid() + + +def group_to_gid(group, err_on_invalid=True): + """Translate a Group-Name to a gid.""" + if group is None: + return slurm.NO_VAL + + try: + if isinstance(group, str) and not group.isdigit(): + return getgrnam(group).gr_gid + + return getgrgid(int(group)).gr_gid + except KeyError as e: + if err_on_invalid: + raise e + + return getgid() + + +def _getgrall_to_dict(): + cdef list groups = getgrall() + cdef dict grp_info = {item.gr_gid: item.gr_name for item in groups} + return grp_info + + +def _getpwall_to_dict(): + cdef list passwd = getpwall() + cdef dict pw_info = {item.pw_uid: item.pw_name for item in passwd} + return pw_info + + +def expand_range_str(range_str): + """Expand a ranged string of numbers to a list of unique values. + + Args: + range_str (str): + A range string, which can for example look like this: + "1,2,3-10,11,15-20" + + Returns: + (list): List of unique values + """ + ret = [] + for mrange in range_str.split(","): + start, sep, end = mrange.partition("-") + start = int(start) + + if sep: + ret += range(start, int(end)+1) + else: + ret.append(start) + + return ret + + +def nodelist_from_range_str(nodelist): + """Convert a bracketed nodelist str with ranges to a list. + + Args: + nodelist (Union[str, list]): + Comma-seperated str or list with potentially bracketed hostnames + and ranges. + + Returns: + (list): List of all nodenames or None on failure + """ + if isinstance(nodelist, list): + nodelist = ",".join(nodelist) + + cdef: + char *nl = nodelist + slurm.hostlist_t hl + char *hl_unranged = NULL + + hl = slurm.slurm_hostlist_create(nl) + if not hl: + return [] + + hl_unranged = slurm.slurm_hostlist_deranged_string_malloc(hl) + out = cstr.to_list(hl_unranged) + + free(hl_unranged) + slurm.slurm_hostlist_destroy(hl) + + return out + + +def nodelist_to_range_str(nodelist): + """Convert a list of nodes to a bracketed str with ranges. + + Args: + nodelist (Union[str, list]): + Comma-seperated str or list with unique, unbracketed nodenames. + + Returns: + (str): Bracketed, ranged nodelist or None on failure. + """ + if isinstance(nodelist, list): + nodelist = ",".join(nodelist) + + cdef: + char *nl = nodelist + slurm.hostlist_t hl + char *hl_ranged = NULL + + hl = slurm.slurm_hostlist_create(nl) + if not hl: + return None + + hl_ranged = slurm.slurm_hostlist_ranged_string_malloc(hl) + out = cstr.to_unicode(hl_ranged) + + free(hl_ranged) + slurm.slurm_hostlist_destroy(hl) + + return out + + +def humanize(num, decimals=1): + """Humanize a number. + + This will convert the number to a string and add appropriate suffixes like + M,G,T,P,... + + Args: + num (int): + Number to humanize + decimals (int, optional): + Amount of decimals the humanized string should have. + + Returns: + (str): Humanized number with appropriate suffix. + """ + if num is None or num == "unlimited" or num == UNLIMITED: + return num + + num = int(num) + for unit in ["M", "G", "T", "P", "E", "Z"]: + if abs(num) < 1024.0: + return f"{num:3.{decimals}f}{unit}" + num /= 1024.0 + + return f"{num:.{decimals}f}Y" + + +def dehumanize(humanized_str, target="M", decimals=0): + """Dehumanize a previously humanized value. + + Args: + humanized_str (str): + A humanized str, for example "5M" or "10T" + target (str): + Target unit. The default is "M" (Mebibytes). Allowed values are + K,M,G,T,P,E,Z + decimals (int): + Amount of decimal places the result should have. Default is 0 + + Returns: + (int): Dehumanized value + """ + if not humanized_str: + return None + + units_str = " ".join(MEMORY_UNITS.keys()) + splitted = re.split(f'([{units_str}])', str(humanized_str)) + + if len(splitted) == 1: + try: + return int(humanized_str) + except ValueError as e: + raise ValueError(f"Invalid value specified: {humanized_str}") + + val = float(splitted[0]) + unit = splitted[1] + + val_in_bytes = val * MEMORY_UNITS[unit] + val_in_target_size = float(val_in_bytes / MEMORY_UNITS[target]) + + if not decimals: + return round(val_in_target_size) + else: + return float(f"{val_in_target_size:.{decimals}f}") + + +def signal_to_num(sig): + if not sig: + return None + + try: + if str(sig).isnumeric(): + _sig = signal.Signals(int(sig)).value + else: + _sig = signal.Signals[sig].value + except Exception: + raise ValueError(f"Invalid Signal: {sig}.") from None + + return _sig + + +def cpubind_to_num(cpu_bind): + cdef uint32_t flags = 0 + + if not cpu_bind: + return flags + + cpu_bind = cpu_bind.casefold().split(",") + + if "none" in cpu_bind: + flags |= slurm.CPU_BIND_NONE + elif "sockets" in cpu_bind: + flags |= slurm.CPU_BIND_TO_SOCKETS + elif "ldoms" in cpu_bind: + flags |= slurm.CPU_BIND_TO_LDOMS + elif "cores" in cpu_bind: + flags |= slurm.CPU_BIND_TO_CORES + elif "threads" in cpu_bind: + flags |= slurm.CPU_BIND_TO_THREADS + elif "off" in cpu_bind: + flags |= slurm.CPU_BIND_OFF + if "verbose" in cpu_bind: + flags |= slurm.CPU_BIND_VERBOSE + + return flags + + +def instance_to_dict(inst): + cdef dict out = {} + for attr in dir(inst): + val = getattr(inst, attr) + if attr.startswith("_") or callable(val): + # Ignore everything starting with "_" and all functions. + continue + out[attr] = val + + return out + + +def _get_exit_code(exit_code): + exit_state=sig = 0 + if exit_code != slurm.NO_VAL: + if WIFSIGNALED(exit_code): + exit_state, sig = 0, WTERMSIG(exit_code) + elif WIFEXITED(exit_code): + exit_state, sig = WEXITSTATUS(exit_code), 0 + if exit_state >= 128: + exit_state -= 128 + + return exit_state, sig + + +def humanize_step_id(sid): + if sid == slurm.SLURM_BATCH_SCRIPT: + return "batch" + elif sid == slurm.SLURM_EXTERN_CONT: + return "extern" + elif sid == slurm.SLURM_INTERACTIVE_STEP: + return "interactive" + elif sid == slurm.SLURM_PENDING_STEP: + return "pending" + else: + return sid + + +def dehumanize_step_id(sid): + if sid == "batch": + return slurm.SLURM_BATCH_SCRIPT + elif sid == "extern": + return slurm.SLURM_EXTERN_CONT + elif sid == "interactive": + return slurm.SLURM_INTERACTIVE_STEP + elif sid == "pending": + return slurm.SLURM_PENDING_STEP + else: + return int(sid) + + +cpdef gres_from_tres_dict(dict tres_dict): + gres_prefix = "gres/" + return { + k.replace(gres_prefix, ""):v + for k, v in tres_dict.items() + if gres_prefix in k + } diff --git a/pyslurm/utils/uint.pxd b/pyslurm/utils/uint.pxd new file mode 100644 index 00000000..3d8f50e5 --- /dev/null +++ b/pyslurm/utils/uint.pxd @@ -0,0 +1,47 @@ +######################################################################### +# common/uint.pxd - functions dealing with parsing uint types +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t + +cpdef u8(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u16(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u32(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u64(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u8_parse(uint8_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u16_parse(uint16_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u32_parse(uint32_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u64_parse(uint64_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u8_bool(val) +cpdef u16_bool(val) +cdef uint_set_bool_flag(flags, boolean, true_flag, false_flag=*) +cdef uint_parse_bool_flag(flags, flag, no_val) +cdef uint_parse_bool(val, no_val) +cdef uint_bool(val, no_val) +cdef u8_parse_bool(uint8_t val) +cdef u16_parse_bool(uint16_t val) +cdef u64_parse_bool_flag(uint64_t flags, flag) +cdef u64_set_bool_flag(uint64_t *flags, boolean, true_flag, false_flag=*) +cdef u16_parse_bool_flag(uint16_t flags, flag) +cdef u16_set_bool_flag(uint16_t *flags, boolean, true_flag, false_flag=*) diff --git a/pyslurm/utils/uint.pyx b/pyslurm/utils/uint.pyx new file mode 100644 index 00000000..0dae7779 --- /dev/null +++ b/pyslurm/utils/uint.pyx @@ -0,0 +1,188 @@ +######################################################################### +# common/uint.pyx - functions dealing with parsing uint types +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.constants import UNLIMITED + + +cpdef u8(val, inf=False, noval=slurm.NO_VAL8, on_noval=slurm.NO_VAL8, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint8_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and (val == UNLIMITED or val == "unlimited"): + return slurm.INFINITE8 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u8_parse(uint8_t val, on_inf=UNLIMITED, on_noval=None, noval=slurm.NO_VAL8, zero_is_noval=True): + """Convert uint8_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE8: + return on_inf + else: + return val + + +cpdef u16(val, inf=False, noval=slurm.NO_VAL16, on_noval=slurm.NO_VAL16, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint16_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and (val == UNLIMITED or val == "unlimited"): + return slurm.INFINITE16 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u16_parse(uint16_t val, on_inf=UNLIMITED, on_noval=None, noval=slurm.NO_VAL16, zero_is_noval=True): + """Convert uint16_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE16: + return on_inf + else: + return val + + +cpdef u32(val, inf=False, noval=slurm.NO_VAL, on_noval=slurm.NO_VAL, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint32_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and (val == UNLIMITED or val == "unlimited"): + return slurm.INFINITE + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u32_parse(uint32_t val, on_inf=UNLIMITED, on_noval=None, noval=slurm.NO_VAL, zero_is_noval=True): + """Convert uint32_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE: + return on_inf + else: + return val + + +cpdef u64(val, inf=False, noval=slurm.NO_VAL64, on_noval=slurm.NO_VAL64, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint64_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and (val == UNLIMITED or val == "unlimited"): + return slurm.INFINITE64 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u64_parse(uint64_t val, on_inf=UNLIMITED, on_noval=None, noval=slurm.NO_VAL64, zero_is_noval=True): + """Convert uint64_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE64: + return on_inf + else: + return val + + +cdef uint_set_bool_flag(flags, boolean, true_flag, false_flag=0): + if boolean: + if false_flag: + flags &= ~false_flag + flags |= true_flag + elif boolean is not None: + if false_flag: + flags |= false_flag + flags &= ~true_flag + + return flags + + +cdef uint_parse_bool_flag(flags, flag, no_val): + if flags == no_val: + return False + + if flags & flag: + return True + else: + return False + + +cdef uint_parse_bool(val, no_val): + if not val or val == no_val: + return False + + return True + + +cdef uint_bool(val, no_val): + if val is None: + return no_val + elif val: + return 1 + else: + return 0 + + +cpdef u8_bool(val): + return uint_bool(val, slurm.NO_VAL8) + + +cpdef u16_bool(val): + return uint_bool(val, slurm.NO_VAL16) + + +cdef u8_parse_bool(uint8_t val): + return uint_parse_bool(val, slurm.NO_VAL8) + + +cdef u16_parse_bool(uint16_t val): + return uint_parse_bool(val, slurm.NO_VAL16) + + +cdef u16_set_bool_flag(uint16_t *flags, boolean, true_flag, false_flag=0): + flags[0] = uint_set_bool_flag(flags[0], boolean, true_flag, false_flag) + + +cdef u64_set_bool_flag(uint64_t *flags, boolean, true_flag, false_flag=0): + flags[0] = uint_set_bool_flag(flags[0], boolean, true_flag, false_flag) + + +cdef u16_parse_bool_flag(uint16_t flags, flag): + return uint_parse_bool_flag(flags, flag, slurm.NO_VAL16) + + +cdef u64_parse_bool_flag(uint64_t flags, flag): + return uint_parse_bool_flag(flags, flag, slurm.NO_VAL64) diff --git a/pyslurm/xcollections.pxd b/pyslurm/xcollections.pxd new file mode 100644 index 00000000..98dfa713 --- /dev/null +++ b/pyslurm/xcollections.pxd @@ -0,0 +1,97 @@ +######################################################################### +# collections.pxd - pyslurm custom collections +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +cdef class MultiClusterMap: + """Mapping of Multi-Cluster Data for a Collection. + + !!! note "TL;DR" + + If you have no need to write Multi-Cluster capable code and just work + on a single Cluster, Collections inheriting from this Class behave + just like a normal `dict`. + + This class enables collections to hold data from multiple Clusters if + applicable. + For quite a few Entities in Slurm it is possible to gather data from + multiple Clusters. For example, with `sacct`, you can easily query Jobs + running on different Clusters - provided your Cluster is joined in a + Federation or simply part of a multi Cluster Setup. + + Collections like [pyslurm.db.Jobs][] inherit from this Class to enable + holding such data from multiple Clusters. Internally, the data is + structured in a `dict` like this (with [pyslurm.db.Jobs][] as an example): + + ```python + data = { + "LOCAL_CLUSTER": { + 1: pyslurm.db.Job(1), + 2: pyslurm.db.Job(2), + ... + }, + "OTHER_REMOTE_CLUSTER": { + 100: pyslurm.db.Job(100), + 101, pyslurm.db.Job(101) + ... + }, + ... + } + ``` + + When a collection inherits from this class, its functionality will + basically simulate a standard `dict` - with a few extensions to enable + multi-cluster code. + By default, even if your Collections contains Data from multiple Clusters, + any operation will be targeted on the local Cluster data, if available. + + For example, with the data from above: + + ```python + job = data[1] + ``` + + `job` would then hold the instance for `pyslurm.db.Job(1)` from the + `LOCAL_CLUSTER` data. + + Alternatively, data can also be accessed like this: + + ```python + job = data["OTHER_REMOTE_CLUSTER"][100] + ``` + + Here, you are directly specifying which Cluster data you want to access, + and you will get the instance for `pyslurm.db.Job(100)` from the + `OTHER_REMOTE_CLUSTER` data. + + Similarly, every method (where applicable) from a standard dict is + extended with multi-cluster functionality (check out the examples on the + methods) + """ + cdef public dict data + + cdef: + _typ + _key_type + _val_type + _id_attr diff --git a/pyslurm/xcollections.pyx b/pyslurm/xcollections.pyx new file mode 100644 index 00000000..a0ce2e6b --- /dev/null +++ b/pyslurm/xcollections.pyx @@ -0,0 +1,624 @@ +######################################################################### +# collections.pyx - pyslurm custom collections +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +"""Custom Collection utilities""" + +from pyslurm.settings import LOCAL_CLUSTER +import json +from typing import Union, Any + + +class BaseView: + """Base View for all other Views""" + def __init__(self, mcm): + self._mcm = mcm + self._data = mcm.data + + def __len__(self): + return len(self._mcm) + + def __repr__(self): + data = ", ".join(map(repr, self)) + return f'{self.__class__.__name__}([{data}])' + + +class ValuesView(BaseView): + """A simple Value View + + When iterating over an instance of this View, this will yield all values + from all clusters. + """ + def __contains__(self, val): + try: + item = self._mcm.get( + key=self._mcm._item_id(val), + cluster=val.cluster + ) + return item is val or item == val + except AttributeError: + pass + + return False + + def __iter__(self): + for cluster in self._mcm.data.values(): + for item in cluster.values(): + yield item + + +class ClustersView(BaseView): + """A simple Cluster-Keys View + + When iterating over an instance of this View, it will yield all the + Cluster names of the collection. + """ + def __contains__(self, item): + return item in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + yield from self._data + + +class MCKeysView(BaseView): + """A Multi-Cluster Keys View + + Unlike KeysView, when iterating over an MCKeysView instance, this will + yield a 2-tuple in the form `(cluster, key)`. + + Similarly, when checking whether this View contains a Key with the `in` + operator, a 2-tuple must be used in the form described above. + """ + def __contains__(self, item): + cluster, key, = item + return key in self._data[cluster] + + def __iter__(self): + for cluster, keys in self._data.items(): + for key in keys: + yield (cluster, key) + + +class KeysView(BaseView): + """A simple Keys View of a collection + + When iterating, this yields all the keys found from each Cluster in the + collection. Note that unlike the KeysView from a `dict`, the keys here + aren't unique and may appear multiple times. + + If you indeed have multiple Clusters in a collection and need to tell the + keys apart, use the `with_cluster()` function. + """ + def __contains__(self, item): + return item in self._mcm + + def __iter__(self): + for cluster, keys in self._data.items(): + yield from keys + + def with_cluster(self): + """Return a Multi-Cluster Keys View. + + Returns: + (MCKeysView): Multi-Cluster Keys View. + """ + return MCKeysView(self._mcm) + + +class ItemsView(BaseView): + """A simple Items View of a collection. + + Returns a 2-tuple in the form of `(key, value)` when iterating. + + Similarly, when checking whether this View contains an Item with the `in` + operator, a 2-tuple must be used. + """ + def __contains__(self, item): + key, val = item + + try: + out = self._mcm.data[item.cluster][key] + except (KeyError, AttributeError): + return False + else: + return out is val or out == val + + def __iter__(self): + for cluster, data in self._mcm.data.items(): + for key in data: + yield (key, data[key]) + + def with_cluster(self): + """Return a Multi-Cluster Items View. + + Returns: + (MCItemsView): Multi-Cluster Items View. + """ + return MCItemsView(self._mcm) + + +class MCItemsView(BaseView): + """A Multi-Cluster Items View. + + This differs from ItemsView in that it returns a 3-tuple in the form of + `(cluster, key, value)` when iterating. + + Similarly, when checking whether this View contains an Item with the `in` + operator, a 3-tuple must be used. + """ + def __contains__(self, item): + cluster, key, val = item + + try: + out = self._mcm.data[cluster][key] + except KeyError: + return False + else: + return out is val or out == val + + def __iter__(self): + for cluster, data in self._mcm.data.items(): + for key in data: + yield (cluster, key, data[key]) + + +cdef class MultiClusterMap: + + def __init__(self, data, typ=None, val_type=None, + key_type=None, id_attr=None, init_data=True): + self.data = {} if init_data else data + self._typ = typ + self._key_type = key_type + self._val_type = val_type + self._id_attr = id_attr + if init_data: + self._init_data(data) + + def _init_data(self, data): + if isinstance(data, list): + for item in data: + if isinstance(item, self._key_type): + item = self._val_type(item) + if LOCAL_CLUSTER not in self.data: + self.data[LOCAL_CLUSTER] = {} + + self.data[LOCAL_CLUSTER].update({self._item_id(item): item}) + elif isinstance(data, str): + itemlist = data.split(",") + items = {self._key_type(item):self._val_type(item) + for item in itemlist} + self.data[LOCAL_CLUSTER] = items + elif isinstance(data, dict): + self.update(data) + elif data is not None: + raise TypeError(f"Invalid Type: {type(data).__name__}") + + def _check_for_value(self, val_id, cluster): + cluster_data = self.data.get(cluster) + if cluster_data and val_id in cluster_data: + return True + return False + + def _get_cluster(self): + cluster = None + if not self.data or LOCAL_CLUSTER in self.data: + cluster = LOCAL_CLUSTER + else: + try: + cluster = next(iter(self.keys())) + except StopIteration: + raise KeyError("Collection is Empty") from None + + return cluster + + def _get_key_and_cluster(self, item): + if isinstance(item, self._val_type): + cluster, key = item.cluster, self._item_id(item) + elif isinstance(item, tuple) and len(item) == 2: + cluster, key = item + else: + cluster, key = self._get_cluster(), item + + return cluster, key + + def _check_val_type(self, item): + if not isinstance(item, self._val_type): + raise TypeError(f"Invalid Type: {type(item).__name__}. " + f"{self._val_type}.__name__ is required.") + + def _item_id(self, item): + return self._id_attr.__get__(item) + + def _iter_clusters_dict(self, other): + for key in other: + try: + iterator = iter(other[key]) + except TypeError as e: + try: + cluster = self._get_cluster() + except KeyError: + cluster = LOCAL_CLUSTER + + if not cluster in self.data: + self.data[cluster] = {} + yield (cluster, other) + break + else: + cluster = key + if not cluster in self.data: + self.data[cluster] = {} + yield (cluster, other[cluster]) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.data == other.data + return NotImplemented + + def __getitem__(self, item): + if item in self.data: + return self.data[item] + + cluster, key = self._get_key_and_cluster(item) + return self.data[cluster][key] + + def __setitem__(self, where, item): + if where in self.data: + self.data[where] = item + else: + cluster, key = self._get_key_and_cluster(where) + self.data[cluster][key] = item + + def __delitem__(self, item): + if item in self.data: + del self.data[item] + else: + cluster, key = self._get_key_and_cluster(item) + del self.data[cluster][key] + + def __len__(self): + return sum(len(data) for data in self.data.values()) + + def __repr__(self): + data = ", ".join(map(repr, self.data.values())) + return f'pyslurm.{self._typ}({data})' + + def __contains__(self, item): + if isinstance(item, self._val_type): + item = (item.cluster, self._item_id(item)) + return self.get(item, default=None) is not None + # return self._check_for_value(self._item_id(item), item.cluster) + elif isinstance(item, self._key_type): + found = False + for cluster, data in self.data.items(): + if item in data: + found = True + return found + elif isinstance(item, tuple): + return self.get(item, default=None) is not None + # return self._check_for_value(item, cluster) + + return False + + def __iter__(self): + return iter(self.keys()) + + def __bool__(self): + return bool(self.data) + + def __copy__(self): + return self.copy() + + def __or__(self, other): + if isinstance(other, MultiClusterMap): + if isinstance(self, dict): + return NotImplemented + + out = self.copy() + out |= other + return out + elif isinstance(other, dict): + out = self.copy() + for cluster, data in self._iter_clusters_dict(other): + out.data[cluster] = self.data[cluster] | data + return out + return NotImplemented + + def __ror__(self, other): + if isinstance(other, MultiClusterMap): + out = other.copy() + out |= self + return out + elif isinstance(other, dict): + out = self.copy() + for cluster, data in self._iter_clusters_dict(other): + out.data[cluster] = data | self.data[cluster] + return out + return NotImplemented + + def __ior__(self, other): + if isinstance(other, MultiClusterMap): + for cluster in other.clusters(): + if not cluster in self.data: + self.data[cluster] = {} + self.data[cluster] |= other.data[cluster] + else: + for cluster, data in self._iter_clusters_dict(other): + self.data[cluster] |= data + return self + + def copy(self): + """Return a Copy of this instance.""" + out = self.__class__.__new__(self.__class__) + super(self.__class__, out).__init__( + data=self.data.copy(), + typ=self._typ, + key_type=self._key_type, + val_type=self._val_type, + init_data=False, + ) + return out + + def get(self, key, default=None): + """Get the specific value for a Key + + This behaves like `dict`'s `get` method, with the difference that you + can additionally pass in a 2-tuple in the form of `(cluster, key)` as + the key, which can be helpful if this collection contains data from + multiple Clusters. + + If just a key without notion of the Cluster is given, access to the + local cluster data is implied. If this collection does however not + contain data from the local cluster, the first cluster detected + according to `next(iter(self.keys()))` will be used. + + Examples: + Get a Job from the LOCAL_CLUSTER + + >>> job_id = 1 + >>> job = data.get(job_id) + + Get a Job from another Cluster in the Collection, by providing a + 2-tuple with the cluster identifier: + + >>> job_id = 1 + >>> job = data.get(("REMOTE_CLUSTER", job_id)) + """ + cluster, key = self._get_key_and_cluster(key) + return self.data.get(cluster, {}).get(key, default) + + def add(self, item): + """An Item to add to the collection + + Note that a collection can only hold its specific type. + For example, a collection of [pyslurm.db.Jobs][] can only hold + [pyslurm.db.Job][] objects. Trying to add anything other than the + accepted type will raise a TypeError. + + Args: + item (Any): + Item to add to the collection. + + Raises: + TypeError: When an item with an unexpected type not belonging to + the collection was added. + + Examples: + Add a `pyslurm.db.Job` instance to the `pyslurm.db.Jobs` + collection. + + >>> import pyslurm + >>> jobs = pyslurm.db.Jobs() + >>> job = pyslurm.db.Job(1) + >>> jobs.add(job) + >>> print(jobs) + pyslurm.db.Jobs({1: pyslurm.db.Job(1)}) + """ + if item.cluster not in self.data: + self.data[item.cluster] = {} + + self._check_val_type(item) + self.data[item.cluster][self._item_id(item)] = item + + def to_json(self, multi_cluster=False): + """Convert the collection to JSON. + + Returns: + (str): JSON formatted string from `json.dumps()` + """ + if not self.data: + return '{}' + + data = multi_dict_recursive(self) + if multi_cluster: + return json.dumps(data) + else: + cluster = self._get_cluster() + return json.dumps(data[cluster]) + + def keys(self): + """Return a View of all the Keys in this collection + + Returns: + (KeysView): View of all Keys + + Examples: + Iterate over all Keys from all Clusters: + + >>> for key in collection.keys() + ... print(key) + + Iterate over all Keys from all Clusters with the name of the + Cluster additionally provided: + + >>> for cluster, key in collection.keys().with_cluster() + ... print(cluster, key) + """ + return KeysView(self) + + def items(self): + """Return a View of all the Values in this collection + + Returns: + (ItemsView): View of all Items + + Examples: + Iterate over all Items from all Clusters: + + >>> for key, value in collection.items() + ... print(key, value) + + Iterate over all Items from all Clusters with the name of the + Cluster additionally provided: + + >>> for cluster, key, value in collection.items().with_cluster() + ... print(cluster, key, value) + """ + return ItemsView(self) + + def values(self): + """Return a View of all the Values in this collection + + Returns: + (ValuesView): View of all Values + + Examples: + Iterate over all Values from all Clusters: + + >>> for value in collection.values() + ... print(value) + """ + return ValuesView(self) + + def clusters(self): + """Return a View of all the Clusters in this collection + + Returns: + (ClustersView): View of Cluster keys + + Examples: + Iterate over all Cluster-Names the Collection contains: + + >>> for cluster in collection.clusters() + ... print(cluster) + """ + return ClustersView(self) + + def popitem(self): + """Remove and return a `(key, value)` pair as a 2-tuple""" + try: + item = next(iter(self.values())) + except StopIteration: + raise KeyError from None + + key = self._item_id(item) + del self.data[item.cluster][key] + return (key, item) + + def clear(self): + """Clear the collection""" + self.data.clear() + + def pop(self, key, default=None): + """Remove key from the collection and return the value + + This behaves like `dict`'s `pop` method, with the difference that you + can additionally pass in a 2-tuple in the form of `(cluster, key)` as + the key, which can be helpful if this collection contains data from + multiple Clusters. + + If just a key without notion of the Cluster is given, access to the + local cluster data is implied. If this collection does however not + contain data from the local cluster, the first cluster detected + according to `next(iter(self.keys()))` will be used. + """ + item = self.get(key, default=default) + if item is default or item == default: + return default + + cluster = item.cluster + del self.data[cluster][key] + if not self.data[cluster]: + del self.data[cluster] + + return item + + def update(self, data={}, **kwargs): + """Update the collection. + + This functions like `dict`'s `update` method. + """ + for cluster, data in self._iter_clusters_dict(data): + self.data[cluster].update(data) + + for cluster, data in self._iter_clusters_dict(kwargs): + self.data[cluster].update(data) + + +def multi_reload(cur, frozen=True): + if not cur: + return cur + + new = cur.__class__.load() + for cluster, item in list(cur.keys().with_cluster()): + if (cluster, item) in new.keys().with_cluster(): + cur[cluster][item] = new.pop(item, cluster) + elif not frozen: + del cur[cluster][item] + + if not frozen: + for cluster, item in new.keys().with_cluster(): + if (cluster, item) not in cur.keys().with_cluster(): + cur[cluster][item] = new[cluster][item] + + return cur + + +def dict_recursive(collection): + cdef dict out = {} + for item_id, item in collection.items(): + if hasattr(item, "to_dict"): + out[item_id] = item.to_dict() + return out + + +def to_json(collection): + return json.dumps(dict_recursive(collection)) + + +def multi_dict_recursive(collection): + cdef dict out = collection.data.copy() + for cluster, data in collection.data.items(): + out[cluster] = dict_recursive(data) + return out + + +def sum_property(collection, prop, startval=0): + out = startval + for item in collection.values(): + data = prop.__get__(item) + if data is not None: + out += data + + return out diff --git a/scripts/build.sh b/scripts/build.sh index 444fd108..b3e389c8 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,13 +1,25 @@ #!/bin/bash set -e -################################### -# Build PySlurm -################################### +usage() { echo "Usage: $0 [-j jobs]" 1>&2; exit 1; } -cd pyslurm -echo "---> Building PySlurm..." -python$PYTHON setup.py build +# Option to allow parallel build +OPT_JOBS=1 -echo "---> Installing PySlurm..." -python$PYTHON setup.py install +PYTHON_VERSION=3 + +while getopts ":j:" o; do + case "${o}" in + j) + OPT_JOBS=${OPTARG} + ;; + *) + usage + ;; + esac +done + +shift $((OPTIND-1)) + +python"$PYTHON_VERSION" setup.py build -j "$OPT_JOBS" +python"$PYTHON_VERSION" setup.py install diff --git a/scripts/builddocs.sh b/scripts/builddocs.sh index 5e555bfd..b56482f1 100755 --- a/scripts/builddocs.sh +++ b/scripts/builddocs.sh @@ -1,24 +1,6 @@ #!/bin/bash -#set -e -########################################### -# Build the docs and push to GitHub Pages # -########################################### - -# Build docs for all jobs within build -pip$PYTHON install Sphinx>=1.1 -make BUILDDIR=/root/docs -C /pyslurm/doc/ html - -# Only push to GitHub Pages once per build -if [[ "$PYTHON" == "2.7" && - "$CYTHON" == "0.27.3" && - "$SLURM" == "17.11.8" && - "$BRANCH" == "master" ]] -then - git clone https://github.com/pyslurm/pyslurm.github.io.git - rsync -av --delete --exclude=.git /root/docs/html/ /pyslurm.github.io/ - cd pyslurm.github.io - git add . - git -c user.name="Travis" -c user.email="Travis" commit -m 'Updated docs' - git push -q https://giovtorres:$GITHUB_TOKEN@github.com/pyslurm/pyslurm.github.io &2>/dev/null -fi +python setup.py clean +pip install -r doc_requirements.txt +pip install --no-build-isolation -e . +mkdocs build diff --git a/scripts/pyslurm_bindgen.py b/scripts/pyslurm_bindgen.py index 3d952cf9..82eb157c 100755 --- a/scripts/pyslurm_bindgen.py +++ b/scripts/pyslurm_bindgen.py @@ -1,7 +1,28 @@ #!/usr/bin/env python3 +######################################################################### +# pyslurm_bindgen.py - generate cython compatible bindings for Slurm +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import autopxd import click +from datetime import datetime import os import re import pathlib @@ -31,6 +52,19 @@ def get_data_type(val): raise ValueError("Cannot get data type for value: {}".format(val)) +def capture_copyright(hdr_file): + out = [] + for line in hdr_file: + if line.startswith("/"): + line = line.replace("/", "#").replace("\\", "") + line = line.replace("*", "#").lstrip() + out.append(line) + if "CODE-OCEC" in line: + break + + return "".join(out) + + def try_get_macro_value(s): if s.startswith("SLURM_BIT"): val = int(s[s.find("(")+1:s.find(")")]) @@ -51,11 +85,22 @@ def try_get_macro_value(s): return None +def write_to_file(content, hdr): + c = click.get_current_context() + output_dir = c.params["output_dir"] + + output_file = os.path.join(output_dir, hdr + ".pxi") + with open(output_file, "w") as ofile: + ofile.write(content) + + def translate_slurm_header(hdr_dir, hdr): hdr_path = os.path.join(hdr_dir, hdr) with open(hdr_path) as f: - translate_hdr_macros(f.readlines(), hdr) + lines = f.readlines() + copyright_notice = capture_copyright(lines) + macros = "".join(translate_hdr_macros(lines, hdr)) c = click.get_current_context() if c.params["show_unparsed_macros"] or c.params["generate_python_const"]: @@ -70,7 +115,59 @@ def translate_slurm_header(hdr_dir, hdr): ) ) - print(str(codegen)) + disclaimer = f"""\ +############################################################################## +# NOTICE: This File has been generated by scripts/pyslurm_bindgen.py, which +# uses the autopxd2 tool in order to generate Cython compatible definitions +# from the {hdr} C-Header file. Basically, this can be seen as a modified +# version of the original header, with the following changes: +# +# * have the correct cython syntax for type definitions, e.g. "typedef struct +# " is converted to "ctypedef struct " +# * C-Macros are listed with their appropriate uint type +# * Any definitions that cannot be translated are not included in this file +# +# Generated on {datetime.now().isoformat()} +# +# The Original Copyright notice from {hdr} has been included +# below: +# +{copyright_notice}# +# Slurm is licensed under the GNU GPLv2. For the full text of Slurm's License, +# please see here: pyslurm/slurm/SLURM_LICENSE +# +# Please, as mentioned above, also have a look at Slurm's DISCLAIMER under +# pyslurm/slurm/SLURM_DISCLAIMER +############################################################################## +""" + + pyslurm_copyright = """# +# Copyright (C) 2023 PySlurm Developers (Modifications as described above) +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + c = click.get_current_context() + code = disclaimer + pyslurm_copyright + macros + "\n" + str(codegen) + code = code.replace("cpdef", "cdef") + if c.params["stdout"]: + print(code) + else: + write_to_file(code, hdr) def handle_special_cases(name, hdr): @@ -122,17 +219,19 @@ def translate_hdr_macros(s, hdr): print("") return + out = [] if vals: if c.params["generate_python_const"]: for name, ty in vals.items(): print("{} = slurm.{}".format(name, name)) else: - print("cdef extern from \"{}\":".format("slurm/" + hdr)) - print("") + hdr_file = "slurm/" + hdr + out.append(f"cdef extern from \"{hdr_file}\":\n") + out.append("\n") for name, ty in vals.items(): - print(" {} {}".format(ty, name)) - print("") + out.append(f" {ty} {name}\n") + return out def setup_include_path(hdr_dir): include_dir = pathlib.Path(hdr_dir).parent.as_posix() @@ -164,7 +263,22 @@ def setup_include_path(hdr_dir): is_flag=True, help="Generate variables acting as constants from Slurm macros.", ) -def main(slurm_header_dir, show_unparsed_macros, generate_python_const): +@click.option( + "--output-dir", + "-o", + metavar="", + default="pyslurm/slurm", + help="Output Directory for the files", +) +@click.option( + "--stdout", + "-s", + default=False, + is_flag=True, + help="Instead of writing everything to files, just print to stdout.", +) +def main(slurm_header_dir, show_unparsed_macros, + generate_python_const, output_dir, stdout): setup_include_path(slurm_header_dir) translate_slurm_header(slurm_header_dir, "slurm_errno.h") translate_slurm_header(slurm_header_dir, "slurm.h") diff --git a/scripts/run_tests_in_container.py b/scripts/run_tests_in_container.py index 4110bcfd..2328633f 100644 --- a/scripts/run_tests_in_container.py +++ b/scripts/run_tests_in_container.py @@ -8,7 +8,7 @@ "3.6": "3.6.15", "3.7": "3.7.12", "3.8": "3.8.12", - "3.9": "3.9.9", + "3.9": "3.9.18", "3.10": "3.10.0", } @@ -17,7 +17,7 @@ def test_run(): host = testinfra.get_host(f"docker://slurmctl") python = f'python{os.environ.get("PYTHON")}' host.run(f'pyenv global {version_map[os.environ.get("PYTHON")]}') - print(host.check_output(f"{python} setup.py build")) + print(host.check_output(f"{python} setup.py build -v")) print(host.check_output(f"{python} setup.py install")) print(host.check_output("./scripts/configure.sh")) print(host.check_output(f"{python} -m pip uninstall --yes pytest")) diff --git a/scripts/slurm_msg_type_dict.py b/scripts/slurm_msg_type_dict.py new file mode 100755 index 00000000..dbd2d0dc --- /dev/null +++ b/scripts/slurm_msg_type_dict.py @@ -0,0 +1,46 @@ +#! /usr/bin/env python3 +""" +Parse $slurmrepo/src/common/slurm_protocol_defs.h and create +a small C program that generates a mapping of the numeric +slurm msg types to their symbolic names. + +Example: + ./slurm_msg_type_dict.py $slurmrepo/src/common/slurm_protocol_defs.h > msgdict.c + gcc -o msgdict msgdict.c + ./msgdict +""" + +import re +import sys +import argparse + +def generate_c(header_file_name): + typedef_re = re.compile(r"\s*typedef\s+enum\s*{(.*?)}\s*slurm_msg_type_t\s*;", re.DOTALL) + symbol_re = re.compile(r"^\s*([A-Z0-9_]+)\s*[,=\n]") + + with open(header_file_name, mode="r", encoding="utf-8") as header_file: + header = header_file.read() + typedef = typedef_re.search(header) + if typedef is None: + print("could not identify the slurm_msg_type_t typedef in the header file") + sys.exit(1) + + print("""#include """) + print(typedef.group(0)) + print("""\n\nint main(void) {""") + for line in typedef.group(1).split("\n"): + symbol = symbol_re.match(line) + if symbol is not None: + print(f""" printf("%d: \\\"%s\\\",\\n", {symbol.group(1)}, "{symbol.group(1)}");""") + else: + print(f""" printf("\\n");""") + print(""" return 0;\n}""") + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument("header", help="$slurmrepo/src/common/slurm_protocol_defs.h") + args = parser.parse_args() + generate_c(args.header) + +if __name__ == "__main__": + main() diff --git a/setup.cfg b/setup.cfg index f23c586d..ba3ad0b6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,29 +1,19 @@ -[aliases] -doc=build_sphinx -docs=build_sphinx +[options] +packages = find: + +[options.packages.find] +include = pyslurm, pyslurm.* [bdist_rpm] release = 1 packager = Giovanni Torres -doc_files = CONTRIBUTORS.rst - README.rst - THANKS.rst - doc/ +doc_files = README.md examples/ -build_requires = python-devel >= 2.7 - Cython >= 0.19 - python-sphinx >= 1.1 - slurm-devel >= 17.11.5 - python-nose -requires = slurm-slurmd slurm-slurmdbd +build_requires = python3-devel >= 3.6 + slurm-devel >= 23.02.0 +requires = slurm use_bzip2 = 1 -[build_sphinx] -builder = man -source-dir = doc/source -build-dir = doc/build -all_files = 1 - [flake8] max-line-length = 88 extend-ignore = E203 diff --git a/setup.py b/setup.py index 4ddce583..f3fedd27 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ # Keep in sync with pyproject.toml CYTHON_VERSION_MIN = "0.29.30" -SLURM_RELEASE = "22.5" -PYSLURM_PATCH_RELEASE = "0" +SLURM_RELEASE = "23.2" +PYSLURM_PATCH_RELEASE = "2" SLURM_SHARED_LIB = "libslurm.so" CURRENT_DIR = pathlib.Path(__file__).parent @@ -33,7 +33,6 @@ url="https://github.com/PySlurm/pyslurm", platforms=["Linux"], keywords=["HPC", "Batch Scheduler", "Resource Manager", "Slurm", "Cython"], - packages=["pyslurm"], classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", @@ -157,7 +156,7 @@ def cleanup_build(): info("Removing build/") remove_tree("build", verbose=1) - files = find_files_with_extension("pyslurm", {".c", ".pyc"}) + files = find_files_with_extension("pyslurm", {".c", ".pyc", ".so"}) for file in files: if file.is_file(): @@ -263,7 +262,7 @@ def cythongen(): else: if LooseVersion(cython_version) < LooseVersion(CYTHON_VERSION_MIN): msg = f"Please use Cython version >= {CYTHON_VERSION_MIN}" - raise RuntimeError(msg) + #raise RuntimeError(msg) # Clean up temporary build objects first @@ -300,9 +299,8 @@ def parse_setuppy_commands(): cleanup_build() return False - build_cmd = ('install', 'sdist', 'build', 'build_ext', 'build_py', - 'build_clib', 'build_scripts', 'bdist_wheel', 'bdist_rpm', - 'build_src', 'bdist_egg', 'develop') + build_cmd = ('build', 'build_ext', 'build_py', 'build_clib', + 'build_scripts', 'bdist_wheel', 'build_src', 'bdist_egg', 'develop') for cmd in build_cmd: if cmd in args: @@ -318,10 +316,14 @@ def setup_package(): build_it = parse_setuppy_commands() if build_it: - if "sdist" not in sys.argv: - parse_slurm_args() - slurm_sanity_checks() - cythongen() + parse_slurm_args() + slurm_sanity_checks() + cythongen() + + if "install" in sys.argv: + parse_slurm_args() + slurm_sanity_checks() + metadata["ext_modules"] = make_extensions() setup(**metadata) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 00000000..bf70149c --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,44 @@ +######################################################################### +# conftest.py - pytest fixtures +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) +from util import create_simple_job_desc + + +@pytest.fixture +def submit_job(): + + jobs = [] + def _job(script=None, **kwargs): + job_desc = create_simple_job_desc(script, **kwargs) + job = Job(job_desc.submit()) + + jobs.append(job) + return job + + yield _job + + for j in jobs: + j.cancel() diff --git a/tests/integration/test_db_connection.py b/tests/integration/test_db_connection.py new file mode 100644 index 00000000..876ec63d --- /dev/null +++ b/tests/integration/test_db_connection.py @@ -0,0 +1,56 @@ +######################################################################### +# test_db_connection.py - database connection api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_connection.py - Test database connectin api functionalities.""" + +import pytest +import pyslurm + + +def test_create_instance(): + with pytest.raises(RuntimeError): + pyslurm.db.Connection() + + +def test_open(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + + +def test_close(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + + conn.close() + assert not conn.is_open + # no-op + conn.close() + + +def test_commit(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + conn.commit() + + +def test_rollback(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + conn.rollback() diff --git a/tests/integration/test_db_job.py b/tests/integration/test_db_job.py new file mode 100644 index 00000000..310df51f --- /dev/null +++ b/tests/integration/test_db_job.py @@ -0,0 +1,139 @@ +######################################################################### +# test_db_job.py - database job api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_job.py - Unit test database job api functionalities.""" + +import pytest +import pyslurm +import time +import util +import json + + +# TODO: Instead of submitting new Jobs and waiting to test Database API +# functionality, we could just fill a slurm database with data on a host, then +# dump the slurm_acct_db to a SQL file and import it in the test environment +# before the integration tests are ran. +# Just a few Jobs and other stuff is enough to keep it small, so it could also +# be put in the repository and uploaded to github. + + +def test_load_single(submit_job): + job = submit_job() + util.wait() + db_job = pyslurm.db.Job.load(job.id) + + assert db_job.id == job.id + + with pytest.raises(pyslurm.RPCError): + pyslurm.db.Job.load(0) + + +def test_parse_all(submit_job): + job = submit_job() + util.wait() + db_job = pyslurm.db.Job.load(job.id) + job_dict = db_job.to_dict() + + assert job_dict["stats"] + assert job_dict["steps"] + + +def test_to_json(submit_job): + job = submit_job() + util.wait() + + jfilter = pyslurm.db.JobFilter(ids=[job.id]) + jobs = pyslurm.db.Jobs.load(jfilter) + + json_data = jobs.to_json() + dict_data = json.loads(json_data) + assert dict_data + assert json_data + assert len(dict_data) == 1 + + +def test_modify(submit_job): + job = submit_job() + util.wait(5) + + jfilter = pyslurm.db.JobFilter(ids=[job.id]) + changes = pyslurm.db.Job(comment="test comment") + pyslurm.db.Jobs.modify(jfilter, changes) + + job = pyslurm.db.Job.load(job.id) + assert job.comment == "test comment" + + +def test_modify_with_existing_conn(submit_job): + job = submit_job() + util.wait(5) + + conn = pyslurm.db.Connection.open() + jfilter = pyslurm.db.JobFilter(ids=[job.id]) + changes = pyslurm.db.Job(comment="test comment") + pyslurm.db.Jobs.modify(jfilter, changes, conn) + + job = pyslurm.db.Job.load(job.id) + assert job.comment != "test comment" + + conn.commit() + job = pyslurm.db.Job.load(job.id) + assert job.comment == "test comment" + + +def test_if_steps_exist(submit_job): + # TODO + pass + + +def test_load_with_filter_node(submit_job): + # TODO + pass + + +def test_load_with_filter_qos(submit_job): + # TODO + pass + + +def test_load_with_filter_cluster(submit_job): + # TODO + pass + + +def test_load_with_filter_multiple(submit_job): + # TODO + pass + + +def test_load_with_script(submit_job): + script = util.create_job_script() + job = submit_job(script=script) + util.wait(5) + db_job = pyslurm.db.Job.load(job.id, with_script=True) + assert db_job.script == script + + +def test_load_with_env(submit_job): + job = submit_job() + util.wait(5) + db_job = pyslurm.db.Job.load(job.id, with_env=True) + assert db_job.environment diff --git a/tests/integration/test_db_qos.py b/tests/integration/test_db_qos.py new file mode 100644 index 00000000..e1cde024 --- /dev/null +++ b/tests/integration/test_db_qos.py @@ -0,0 +1,55 @@ +######################################################################### +# test_db_qos.py - database qos api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_qos.py - Integration test database qos api functionalities.""" + +import pytest +import pyslurm +import time +import util + + +def test_load_single(): + qos = pyslurm.db.QualityOfService.load("normal") + + assert qos.name == "normal" + assert qos.id == 1 + + with pytest.raises(pyslurm.RPCError): + pyslurm.db.QualityOfService.load("qos_non_existent") + + +def test_parse_all(submit_job): + qos = pyslurm.db.QualityOfService.load("normal") + qos_dict = qos.to_dict() + + assert qos_dict + assert qos_dict["name"] == qos.name + + +def test_load_all(): + qos = pyslurm.db.QualitiesOfService.load() + assert qos + + +def test_load_with_filter_name(): + qfilter = pyslurm.db.QualityOfServiceFilter(names=["non_existent"]) + qos = pyslurm.db.QualitiesOfService.load(qfilter) + assert not qos diff --git a/tests/integration/test_job.py b/tests/integration/test_job.py new file mode 100644 index 00000000..8c9d4750 --- /dev/null +++ b/tests/integration/test_job.py @@ -0,0 +1,190 @@ +######################################################################### +# test_job.py - job api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job.py - Integration test job api functionalities.""" + +import time +import pytest +import pyslurm +import json +import util +from util import create_simple_job_desc +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) + + +def test_parse_all(submit_job): + job = submit_job() + Job.load(job.id).to_dict() + + +def test_load(submit_job): + job = submit_job() + jid = job.id + + # Nothing has been loaded at this point, just make sure everything is + # on default values. + assert job.ntasks == 1 + assert job.cpus_per_task == 1 + assert job.time_limit == None + + # Now load the job info + job = Job.load(jid) + + assert job.id == jid + assert job.ntasks == 2 + assert job.cpus_per_task == 3 + assert job.time_limit == 1440 + + with pytest.raises(RPCError): + Job.load(99999) + + +def test_cancel(submit_job): + job = submit_job() + job.cancel() + # make sure the job is actually cancelled + util.wait() + assert Job.load(job.id).state == "CANCELLED" + + +def test_send_signal(submit_job): + job = submit_job() + + util.wait() + assert Job.load(job.id).state == "RUNNING" + + # Send a SIGKILL (basically cancelling the Job) + job.send_signal(9) + + # make sure the job is actually cancelled + util.wait() + assert Job.load(job.id).state == "CANCELLED" + + +def test_suspend_unsuspend(submit_job): + job = submit_job() + + util.wait() + job.suspend() + assert Job.load(job.id).state == "SUSPENDED" + + job.unsuspend() + # make sure the job is actually running again + util.wait() + assert Job.load(job.id).state == "RUNNING" + + +# Don't need to test hold/resume, since it uses just job.modify() to set +# priority to 0/INFINITE. +def test_modify(submit_job): + job = submit_job(priority=0) + job = Job(job.id) + + changes = JobSubmitDescription( + time_limit = "2-00:00:00", + ntasks = 5, + cpus_per_task = 4, + ) + + job.modify(changes) + job = Job.load(job.id) + + assert job.time_limit == 2880 + assert job.ntasks == 5 + assert job.cpus_per_task == 4 + + +def test_requeue(submit_job): + job = submit_job() + job = Job.load(job.id) + + assert job.requeue_count == 0 + + util.wait() + job.requeue() + job = Job.load(job.id) + + assert job.requeue_count == 1 + + +def test_notify(submit_job): + job = submit_job() + util.wait() + + # Could check the logfile, but we just assume for now + # that when this function raises no Exception, everything worked. + job.notify("Hello Friends!") + + +def test_get_batch_script(submit_job): + script_body = create_simple_job_desc().script + job = submit_job() + + assert script_body == job.get_batch_script() + + +def test_get_job_queue(submit_job): + # Submit 10 jobs, gather the job_ids in a list + job_list = [submit_job() for i in range(10)] + + jobs = Jobs.load() + for job in job_list: + # Check to see if all the Jobs we submitted exist + assert job.id in jobs + assert isinstance(jobs[job.id], Job) + + +def test_load_steps(submit_job): + job_list = [submit_job() for i in range(3)] + util.wait() + + jobs = Jobs.load() + jobs.load_steps() + + for _job in job_list: + job = jobs[_job.id] + assert job.state == "RUNNING" + assert job.steps + assert isinstance(job.steps, pyslurm.JobSteps) + assert job.steps.get("batch") + + +def test_to_json(submit_job): + job_list = [submit_job() for i in range(3)] + util.wait() + + jobs = Jobs.load() + jobs.load_steps() + + json_data = jobs.to_json() + dict_data = json.loads(json_data) + assert dict_data + assert json_data + assert len(dict_data) >= 3 + + +def test_get_resource_layout_per_node(submit_job): + # TODO + assert True diff --git a/tests/integration/test_job_steps.py b/tests/integration/test_job_steps.py new file mode 100644 index 00000000..e61f9ad1 --- /dev/null +++ b/tests/integration/test_job_steps.py @@ -0,0 +1,177 @@ +######################################################################### +# test_job_steps.py - job steps api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job_steps.py - Test the job steps api functions.""" + +import pytest +import time +from pyslurm import ( + JobStep, + JobSteps, + RPCError, +) +import util + + +def create_job_script_multi_step(steps=None): + default = f""" + srun -n1 -N1 -c2 \ + -J step_zero --distribution=block:cyclic:block,Pack \ + sleep 300 & + srun -n1 -N1 -c3 \ + -t 10 -J step_one --distribution=block:cyclic:block,Pack \ + sleep 300 &""" + + job_script = f"""\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +{default if steps is None else steps} +wait +""" + return job_script + + +def test_load(submit_job): + job = submit_job(script=create_job_script_multi_step()) + + # Load the step info, waiting one second to make sure the Step + # actually exists. + util.wait() + step = JobStep.load(job.id, "batch") + + assert step.id == "batch" + assert step.job_id == job.id + assert step.name == "batch" + # Job was submitted with ntasks=2, but the batch step always has just 1. + assert step.ntasks == 1 + # Job was submitted with a time-limit of 1 day, but it seems this doesn't + # propagate through for the steps if not set explicitly. + assert step.time_limit is None + + # Now try to load the first and second Step started by srun + step_zero = JobStep.load(job, 0) + step_one = JobStep.load(job, 1) + + # It is possible that the srun executed as the second command will + # become the Step with ID '0' - so we just swap it. + if step_zero.name == "step_one": + tmp = step_zero + step_zero = step_one + step_one = tmp + + assert step_one.id == 0 + assert step_zero.id == 1 + + step = step_zero + assert step.job_id == job.id + assert step.name == "step_zero" + assert step.ntasks == 1 + assert step.alloc_cpus == 2 + assert step.time_limit is None + + step = step_one + assert step.job_id == job.id + assert step.name == "step_one" + assert step.ntasks == 1 + assert step.alloc_cpus == 3 + assert step.time_limit == 10 + + +def test_collection(submit_job): + job = submit_job(script=create_job_script_multi_step()) + + util.wait() + steps = JobSteps.load(job) + + assert steps + # We have 3 Steps: batch, 0 and 1 + assert len(steps) == 3 + assert ("batch" in steps and + 0 in steps and + 1 in steps) + + +def test_cancel(submit_job): + job = submit_job(script=create_job_script_multi_step()) + + util.wait() + steps = JobSteps.load(job) + assert len(steps) == 3 + assert ("batch" in steps and + 0 in steps and + 1 in steps) + + steps[0].cancel() + + util.wait() + steps = JobSteps.load(job) + assert len(steps) == 2 + assert ("batch" in steps and + 1 in steps) + + +def test_modify(submit_job): + steps = "srun -t 20 sleep 100" + job = submit_job(script=create_job_script_multi_step(steps)) + + util.wait() + step = JobStep.load(job, 0) + assert step.time_limit == 20 + + step.modify(JobStep(time_limit="00:05:00")) + assert JobStep.load(job, 0).time_limit == 5 + + step.modify(JobStep(time_limit="00:15:00")) + assert JobStep.load(job, 0).time_limit == 15 + + +def test_send_signal(submit_job): + steps = "srun -t 10 sleep 100" + job = submit_job(script=create_job_script_multi_step(steps)) + + util.wait() + step = JobStep.load(job, 0) + assert step.state == "RUNNING" + + # Send a SIGTERM (basically cancelling the Job) + step.send_signal(15) + + # Make sure the job is actually cancelled. + # If a RPCError is raised, this means the Step got cancelled. + util.wait() + with pytest.raises(RPCError): + step = JobStep.load(job, 0) + + +def test_load_with_wrong_step_id(submit_job): + job = submit_job() + + with pytest.raises(RPCError): + JobStep.load(job, 3) + + +def test_parse_all(submit_job): + job = submit_job() + util.wait() + JobStep.load(job, "batch").to_dict() diff --git a/tests/integration/test_job_submit.py b/tests/integration/test_job_submit.py new file mode 100644 index 00000000..0626d1c1 --- /dev/null +++ b/tests/integration/test_job_submit.py @@ -0,0 +1,53 @@ +######################################################################### +# test_job_submit.py - job submit api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job_submit.py - Test the job submit api functions.""" + +import pytest +import pyslurm +from util import create_simple_job_desc, create_job_script +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) + +def job_desc(**kwargs): + return JobSubmitDescription(script=create_job_script(), **kwargs) + + +def test_submit_example1(): + desc = job_desc() + desc.name = "test1" + desc.working_directory = "/tmp" + desc.qos = "normal" + desc.standard_output = "/tmp/test1.out" + desc.standard_error = "/tmp/test1.err" + desc.ntasks = 2 + desc.cpus_per_task = 2 + desc.resource_sharing = "yes" + desc.memory_per_cpu = "2G" + desc.time_limit = 10 + desc.nice = 500 + desc.distribution = "block:block:cyclic" + desc.is_requeueable = True + desc.kill_on_node_fail = True + desc.submit() diff --git a/tests/integration/test_node.py b/tests/integration/test_node.py new file mode 100644 index 00000000..94ede1e5 --- /dev/null +++ b/tests/integration/test_node.py @@ -0,0 +1,77 @@ +######################################################################### +# test_node.py - node api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_node.py - Test the node api functions.""" + +import pytest +import pyslurm +import json +from pyslurm import Node, Nodes, RPCError + + +def test_load(): + name, _ = Nodes.load().popitem() + + # Now load the node info + node = Node.load(name) + assert node.name == name + assert node.weight is not None + assert node.slurm_version is not None + + with pytest.raises(RPCError, + match=f"Node 'nonexistent' does not exist"): + Node.load("nonexistent") + + +def test_create(): + node = Node("testhostpyslurm") + node.create() + + with pytest.raises(RPCError, + match=f"Invalid node state specified"): + Node("testhostpyslurm2").create("idle") + + +def test_modify(): + _, node = Nodes.load().popitem() + + node.modify(Node(weight=10000)) + assert Node.load(node.name).weight == 10000 + + node.modify(Node(weight=20000)) + assert Node.load(node.name).weight == 20000 + + node.modify(Node(weight=5000)) + assert Node.load(node.name).weight == 5000 + + +def test_parse_all(): + _, node = Nodes.load().popitem() + assert node.to_dict() + + +def test_to_json(): + nodes = Nodes.load() + json_data = nodes.to_json() + dict_data = json.loads(json_data) + + assert dict_data + assert len(dict_data) >= 1 + assert json_data diff --git a/tests/integration/test_partition.py b/tests/integration/test_partition.py new file mode 100644 index 00000000..30c54f92 --- /dev/null +++ b/tests/integration/test_partition.py @@ -0,0 +1,101 @@ +######################################################################### +# test_partition.py - partition api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_partition.py - Test the Partition api functions.""" + +import pytest +import pyslurm +import json +import util +from pyslurm import Partition, Partitions, RPCError + + +def test_load(): + name, part = Partitions.load().popitem() + + assert part.name + assert part.state + + with pytest.raises(RPCError, + match=f"Partition 'nonexistent' doesn't exist"): + Partition.load("nonexistent") + + +def test_create_delete(): + part = Partition( + name="testpart", + default_time="20-00:00:00", + default_memory_per_cpu=1024, + ) + part.create() + part.delete() + + +def test_modify(): + _, part = Partitions.load().popitem() + + part.modify(Partition(default_time=120)) + assert Partition.load(part.name).default_time == 120 + + part.modify(Partition(default_time="1-00:00:00")) + assert Partition.load(part.name).default_time == 24*60 + + part.modify(Partition(max_time="UNLIMITED")) + assert Partition.load(part.name).max_time == "UNLIMITED" + + part.modify(Partition(state="DRAIN")) + assert Partition.load(part.name).state == "DRAIN" + + part.modify(Partition(state="UP")) + assert Partition.load(part.name).state == "UP" + + +def test_parse_all(): + _, part = Partitions.load().popitem() + assert part.to_dict() + + +def test_to_json(): + parts = Partitions.load() + json_data = parts.to_json() + dict_data = json.loads(json_data) + + assert dict_data + assert len(dict_data) >= 1 + assert json_data + + +def test_reload(): + _partnames = [util.randstr() for i in range(3)] + _tmp_parts = Partitions(_partnames) + for part in _tmp_parts.values(): + part.create() + + all_parts = Partitions.load() + assert len(all_parts) >= 3 + + my_parts = Partitions(_partnames[1:]).reload() + assert len(my_parts) == 2 + for part in my_parts.values(): + assert part.state != "UNKNOWN" + + for part in _tmp_parts.values(): + part.delete() diff --git a/tests/integration/util.py b/tests/integration/util.py new file mode 100644 index 00000000..05576052 --- /dev/null +++ b/tests/integration/util.py @@ -0,0 +1,71 @@ +######################################################################### +# util.py - utility functions for tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) +import time +import random, string + +# Horrendous, but works for now, because when testing against a real slurmctld +# we need to wait a bit for state changes (i.e. we cancel a job and +# immediately check after if the state is really "CANCELLED", but the state +# hasn't changed yet, so we need to wait a bit) +WAIT_SECS_SLURMCTLD = 3 + + +def wait(secs=WAIT_SECS_SLURMCTLD): + time.sleep(secs) + + +def randstr(strlen=10): + chars = string.ascii_lowercase + return ''.join(random.choice(chars) for n in range(strlen)) + + +def create_job_script(): + job_script = """\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +sleep 500\ + +""" + return job_script + + +def create_simple_job_desc(script=None, **kwargs): + job = JobSubmitDescription(**kwargs) + + job.name = "test_job" + job.standard_output = "/tmp/slurm-test-%j.out" + job.memory_per_cpu = "1G" + job.ntasks = 2 + job.cpus_per_task = 3 + job.script = create_job_script() if not script else script + job.time_limit = "1-00:00:00" + + return job diff --git a/tests/test_job.py b/tests/test_job.py index efb16c09..e11bb65c 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -110,3 +110,79 @@ def test_job_kill(): # time.sleep(3) # test_job_search_after = pyslurm.job().find_id(test_job_id)[0] # assert_equals(test_job_search_after.get("job_state"), "FAILED") + + +def test_job_wait_finished(): + """Job: Test job().wait_finished().""" + test_job = { + "wrap": "sleep 30", + "job_name": "pyslurm_test_job", + "ntasks": 1, + "cpus_per_task": 1, + } + test_job_id = pyslurm.job().submit_batch_job(test_job) + start_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + + # wait for the job to finish + exit_code = pyslurm.job().wait_finished(test_job_id) + + end_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + assert start_job_state != "COMPLETED" + assert end_job_state == "COMPLETED" + assert exit_code == 0 + + # test again with another wrap + test_job = { + "wrap": "sleep 300; exit 1", # "exit 1" should yield failure ending + "job_name": "pyslurm_test_job", + "ntasks": 1, + "cpus_per_task": 1, + } + test_job_id = pyslurm.job().submit_batch_job(test_job) + start_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + + # wait for the job to finish + exit_code = pyslurm.job().wait_finished(test_job_id) + + end_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + assert start_job_state != "COMPLETED" + assert end_job_state == "FAILED" + assert exit_code == 1 + + +def test_job_wait_finished_w_arrays(): + """Job: Test job().wait_finished() with job arrays.""" + test_job = { + "wrap": "sleep 30; exit 0", + "job_name": "pyslurm_array_test_job", + "ntasks": 1, + "cpus_per_task": 1, + "array_inx": "0,1,2", + } + test_job_id = pyslurm.job().submit_batch_job(test_job) + start_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + # wait for the job to finish + exit_code = pyslurm.job().wait_finished(test_job_id) + end_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + assert start_job_state != "COMPLETED" + assert end_job_state == "COMPLETED" + assert exit_code == 0 + + # test for exit codes: maximum exit code of all array jobs + test_job = { + # use array ID as exit code to yield different exit codes: 0, 1, 2 + "wrap": "sleep 30; exit $SLURM_ARRAY_TASK_ID", + "job_name": "pyslurm_array_test_job", + "ntasks": 1, + "cpus_per_task": 1, + "array_inx": "0,1,2", + } + test_job_id = pyslurm.job().submit_batch_job(test_job) + start_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + # wait for the job to finish + exit_code = pyslurm.job().wait_finished(test_job_id) + end_job_state = pyslurm.job().find_id(test_job_id)[0]["job_state"] + assert start_job_state != "COMPLETED" + # exit code 2 (the maximum of all) should yield FAILED for the entire job + assert end_job_state == "FAILED" + assert exit_code == 2 diff --git a/tests/unit/test_collection.py b/tests/unit/test_collection.py new file mode 100644 index 00000000..a29e4f86 --- /dev/null +++ b/tests/unit/test_collection.py @@ -0,0 +1,398 @@ +######################################################################### +# test_collection.py - custom collection unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_collection.py - Unit test custom collection functionality.""" + +import pytest +import pyslurm +from pyslurm.xcollections import sum_property + +LOCAL_CLUSTER = pyslurm.settings.LOCAL_CLUSTER +OTHER_CLUSTER = "other_cluster" + + +class TestMultiClusterMap: + + def _create_collection(self): + data = { + LOCAL_CLUSTER: { + 1: pyslurm.db.Job(1), + 2: pyslurm.db.Job(2), + }, + OTHER_CLUSTER: { + 1: pyslurm.db.Job(1, cluster=OTHER_CLUSTER), + 10: pyslurm.db.Job(10, cluster=OTHER_CLUSTER), + } + } + col = pyslurm.db.Jobs() + col.update(data) + return col + + def test_create(self): + jobs = pyslurm.db.Jobs("101,102") + assert len(jobs) == 2 + assert 101 in jobs + assert 102 in jobs + assert jobs[101].id == 101 + assert jobs[102].id == 102 + + jobs = pyslurm.db.Jobs([101, 102]) + assert len(jobs) == 2 + assert 101 in jobs + assert 102 in jobs + assert jobs[101].id == 101 + assert jobs[102].id == 102 + + jobs = pyslurm.db.Jobs( + { + 101: pyslurm.db.Job(101), + 102: pyslurm.db.Job(102), + } + ) + assert len(jobs) == 2 + assert 101 in jobs + assert 102 in jobs + assert jobs[101].id == 101 + assert jobs[102].id == 102 + assert True + + def test_add(self): + col = self._create_collection() + col_len = len(col) + + item = pyslurm.db.Job(20) + col.add(item) + + assert len(col[LOCAL_CLUSTER]) == 3 + assert len(col) == col_len+1 + + item = pyslurm.db.Job(20, cluster=OTHER_CLUSTER) + col.add(item) + + assert len(col[LOCAL_CLUSTER]) == 3 + assert len(col) == col_len+2 + + def test_get(self): + col = self._create_collection() + + item = col.get(1) + assert item is not None + assert isinstance(item, pyslurm.db.Job) + assert item.cluster == LOCAL_CLUSTER + + item = col.get((OTHER_CLUSTER, 1)) + assert item is not None + assert isinstance(item, pyslurm.db.Job) + assert item.cluster == OTHER_CLUSTER + + item = col.get(30) + assert item is None + + def test_keys(self): + col = self._create_collection() + + keys = col.keys() + keys_with_cluster = keys.with_cluster() + assert len(keys) == len(col) + + for k in keys: + assert k + + for cluster, k in keys_with_cluster: + assert cluster + assert cluster in col.data + assert k + + def test_values(self): + col = self._create_collection() + values = col.values() + + assert len(values) == len(col) + + for item in values: + assert item + print(item) + assert isinstance(item, pyslurm.db.Job) + assert item.cluster in col.data + + def test_getitem(self): + col = self._create_collection() + + item1 = col[LOCAL_CLUSTER][1] + item2 = col[1] + item3 = col[OTHER_CLUSTER][1] + + assert item1 + assert item2 + assert item3 + assert item1 == item2 + assert item1 != item3 + + with pytest.raises(KeyError): + item = col[30] + + with pytest.raises(KeyError): + item = col[OTHER_CLUSTER][30] + + def test_setitem(self): + col = self._create_collection() + col_len = len(col) + + item = pyslurm.db.Job(30) + col[item.id] = item + assert len(col[LOCAL_CLUSTER]) == 3 + assert len(col) == col_len+1 + + item = pyslurm.db.Job(50, cluster=OTHER_CLUSTER) + col[OTHER_CLUSTER][item.id] = item + assert len(col[OTHER_CLUSTER]) == 3 + assert len(col) == col_len+2 + + item = pyslurm.db.Job(100, cluster=OTHER_CLUSTER) + col[item] = item + assert len(col[OTHER_CLUSTER]) == 4 + assert len(col) == col_len+3 + + item = pyslurm.db.Job(101, cluster=OTHER_CLUSTER) + col[(item.cluster, item.id)] = item + assert len(col[OTHER_CLUSTER]) == 5 + assert len(col) == col_len+4 + + new_other_data = { + 1: pyslurm.db.Job(1), + 2: pyslurm.db.Job(2), + } + col[OTHER_CLUSTER] = new_other_data + assert len(col[OTHER_CLUSTER]) == 2 + assert len(col[LOCAL_CLUSTER]) == 3 + assert 1 in col[OTHER_CLUSTER] + assert 2 in col[OTHER_CLUSTER] + + def test_delitem(self): + col = self._create_collection() + col_len = len(col) + + del col[1] + assert len(col[LOCAL_CLUSTER]) == 1 + assert len(col) == col_len-1 + + del col[OTHER_CLUSTER][1] + assert len(col[OTHER_CLUSTER]) == 1 + assert len(col) == col_len-2 + + del col[OTHER_CLUSTER] + assert len(col) == 1 + assert OTHER_CLUSTER not in col.data + + def test_copy(self): + col = self._create_collection() + col_copy = col.copy() + assert col == col_copy + + def test_iter(self): + col = self._create_collection() + for k in col: + assert k + + def test_items(self): + col = self._create_collection() + for k, v in col.items(): + assert k + assert v + assert isinstance(v, pyslurm.db.Job) + + for c, k, v in col.items().with_cluster(): + assert c + assert k + assert v + assert isinstance(v, pyslurm.db.Job) + + def test_popitem(self): + col = self._create_collection() + col_len = len(col) + + key, item = col.popitem() + assert item + assert key + assert isinstance(item, pyslurm.db.Job) + assert len(col) == col_len-1 + + def test_update(self): + col = self._create_collection() + col_len = len(col) + + col_update = { + 30: pyslurm.db.Job(30), + 50: pyslurm.db.Job(50), + } + col.update(col_update) + assert len(col) == col_len+2 + assert len(col[LOCAL_CLUSTER]) == 4 + assert 30 in col + assert 50 in col + + col_update = { + "new_cluster": { + 80: pyslurm.db.Job(80, cluster="new_cluster"), + 50: pyslurm.db.Job(50, cluster="new_cluster"), + } + } + col.update(col_update) + assert len(col) == col_len+4 + assert len(col[LOCAL_CLUSTER]) == 4 + assert len(col["new_cluster"]) == 2 + assert 80 in col + assert 50 in col + + col_update = { + 200: pyslurm.db.Job(200, cluster=OTHER_CLUSTER), + 300: pyslurm.db.Job(300, cluster=OTHER_CLUSTER), + } + col.update({OTHER_CLUSTER: col_update}) + assert len(col) == col_len+6 + assert len(col[OTHER_CLUSTER]) == 4 + assert 200 in col + assert 300 in col + + empty_col = pyslurm.db.Jobs() + empty_col.update(col_update) + assert len(empty_col) == 2 + + def test_pop(self): + col = self._create_collection() + col_len = len(col) + + item = col.pop(1) + assert item + assert item.id == 1 + assert len(col) == col_len-1 + + item = col.pop(999, default="def") + assert item == "def" + + def test_contains(self): + col = self._create_collection() + item = pyslurm.db.Job(1) + assert item in col + + assert 10 in col + assert 20 not in col + + assert (OTHER_CLUSTER, 10) in col + assert (LOCAL_CLUSTER, 10) not in col + + def test_to_json(self): + col = self._create_collection() + data = col.to_json(multi_cluster=True) + assert data + + def test_cluster_view(self): + col = self._create_collection() + assert len(col.clusters()) == 2 + for c in col.clusters(): + assert c + + def test_sum_property(self): + class TestObject: + @property + def memory(self): + return 10240 + + @property + def cpus(self): + return None + + object_dict = {i: TestObject() for i in range(10)} + + expected = 10240 * 10 + assert sum_property(object_dict, TestObject.memory) == expected + + expected = 0 + assert sum_property(object_dict, TestObject.cpus) == expected + + def test_ior(self): + col = self._create_collection() + col_len = len(col) + + other_data = { + LOCAL_CLUSTER: { + 3: pyslurm.db.Job(3), + 2: pyslurm.db.Job(2), + }, + "test_cluster": { + 1000: pyslurm.db.Job(1000, cluster="test_cluster"), + 1001: pyslurm.db.Job(1001, cluster="test_cluster"), + } + } + other_col = pyslurm.db.Jobs() + other_col.update(other_data) + + col |= other_col + assert isinstance(col, pyslurm.xcollections.MultiClusterMap) + assert isinstance(col, pyslurm.db.Jobs) + assert len(col.clusters()) == 3 + assert len(col) == col_len+3 + + dict_data = { + 10: pyslurm.db.Job(10), + 11: pyslurm.db.Job(11), + } + + col |= dict_data + assert isinstance(col, pyslurm.xcollections.MultiClusterMap) + assert isinstance(col, pyslurm.db.Jobs) + assert len(col.clusters()) == 3 + assert len(col[LOCAL_CLUSTER]) == 5 + assert len(col) == col_len+5 + + def test_or(self): + col = self._create_collection() + col_len = len(col) + + other_data = { + LOCAL_CLUSTER: { + 3: pyslurm.db.Job(3), + 2: pyslurm.db.Job(2), + }, + "test_cluster": { + 1000: pyslurm.db.Job(1000, cluster="test_cluster"), + 1001: pyslurm.db.Job(1001, cluster="test_cluster"), + } + } + other_col = pyslurm.db.Jobs() + other_col.update(other_data) + + _col = col | other_col + assert isinstance(_col, pyslurm.xcollections.MultiClusterMap) + assert isinstance(_col, pyslurm.db.Jobs) + assert len(_col.clusters()) == 3 + assert len(_col) == col_len+3 + + dict_data = { + 10: pyslurm.db.Job(10), + 11: pyslurm.db.Job(11), + } + + _col = _col | dict_data + assert isinstance(_col, pyslurm.xcollections.MultiClusterMap) + assert isinstance(_col, pyslurm.db.Jobs) + assert len(_col.clusters()) == 3 + assert len(_col[LOCAL_CLUSTER]) == 5 + assert len(_col) == col_len+5 diff --git a/tests/unit/test_common.py b/tests/unit/test_common.py new file mode 100644 index 00000000..4706130f --- /dev/null +++ b/tests/unit/test_common.py @@ -0,0 +1,429 @@ +######################################################################### +# test_common.py - common utility tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_common.py - Test the most commonly used helper functions.""" + +import pyslurm +import pytest +from datetime import datetime +from pyslurm import Job, JobSubmitDescription, Node, Partition +from pyslurm.utils.ctime import ( + timestr_to_mins, + timestr_to_secs, + mins_to_timestr, + secs_to_timestr, + date_to_timestamp, + timestamp_to_date, +) +from pyslurm.utils.uint import ( + u8, + u16, + u32, + u64, + u8_parse, + u16_parse, + u32_parse, + u64_parse, +) +from pyslurm.utils.helpers import ( + uid_to_name, + gid_to_name, + user_to_uid, + group_to_gid, + expand_range_str, + humanize, + dehumanize, + signal_to_num, + cpubind_to_num, + nodelist_from_range_str, + nodelist_to_range_str, + instance_to_dict, + gres_from_tres_dict, +) +from pyslurm.utils import cstr +from pyslurm.xcollections import ( + sum_property, +) + + +class TestStrings: + + def test_fmalloc(self): + n = Node() + + n.name = "Testing fmalloc string routines." + assert n.name == "Testing fmalloc string routines." + + n.name = None + assert n.name is None + + # Everything after a \0 will be cut off + n.name = "test1\0test2" + assert n.name == "test1" + + n.name = "\0" + assert n.name is None + + def test_lists(self): + n = Node() + input_as_list = ["test1", "test2", "test3", "test4"] + input_as_str = ",".join(input_as_list) + + n.available_features = input_as_list + assert n.available_features == input_as_list + + n.available_features = input_as_str + assert n.available_features == input_as_list + + n.available_features = [] + assert n.available_features == [] + + n.available_features = "" + assert n.available_features == [] + + n.available_features = None + assert n.available_features == [] + + def test_str_to_dict(self): + expected_dict = {"cpu": 2, "mem": "11G", + "gres/gpu": 1, "gres/gpu:nvidia-a100": 1} + input_str = "cpu=2,mem=11G,gres/gpu=1,gres/gpu:nvidia-a100=1" + assert cstr.to_dict(input_str) == expected_dict + assert cstr.to_dict("") == {} + + def test_dict_to_str(self): + input_dict = {"key1": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(input_dict) == expected_str + + input_dict = {"key1": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(input_dict) == expected_str + + expected_str = "key1-value1:key2-value2" + assert cstr.dict_to_str(input_dict, delim1=":", delim2="-") == expected_str + + input_dict = {"key1=": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + with pytest.raises(ValueError, + match=r"Key or Value cannot contain either*"): + assert cstr.dict_to_str(input_dict) == expected_str + + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(expected_str) == expected_str + + assert cstr.dict_to_str({}) == None + assert cstr.dict_to_str("") == None + + def test_dict_to_gres_str(self): + input_dict = {"gpu:tesla": 3} + expected_str = "gres:gpu:tesla:3" + assert cstr.from_gres_dict(input_dict) == expected_str + assert cstr.from_gres_dict(expected_str) == expected_str + assert cstr.from_gres_dict("gpu:tesla:3") == expected_str + + input_dict = {"gpu": 3} + expected_str = "gres:gpu:3" + assert cstr.from_gres_dict(input_dict) == expected_str + assert cstr.from_gres_dict(expected_str) == expected_str + assert cstr.from_gres_dict("gpu:3") == expected_str + + input_dict = {"tesla": 3, "a100": 5} + expected_str = "gres:gpu:tesla:3,gres:gpu:a100:5" + assert cstr.from_gres_dict(input_dict, "gpu") == expected_str + assert cstr.from_gres_dict(expected_str) == expected_str + assert cstr.from_gres_dict("tesla:3,a100:5", "gpu") == expected_str + + def test_str_to_gres_dict(self): + input_str = "gpu:nvidia-a100:1(IDX:0,1)" + expected = {"gpu:nvidia-a100":{"count": 1, "indexes": "0,1"}} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gpu:nvidia-a100:1" + expected = {"gpu:nvidia-a100": 1} + assert cstr.to_gres_dict(input_str) == expected + + def test_gres_from_tres_dict(self): + input_dict = {"cpu": 10, "mem": "5G", + "gres/gpu": 5, "gres/gpu:nvidia": 100} + expected = {"gpu": 5, "gpu:nvidia": 100} + assert gres_from_tres_dict(input_dict) == expected + + +class TestUint: + + def _uint_impl(self, func_set, func_get, typ): + val = func_set(2**typ-2) + assert func_get(val) == None + + val = func_set(None) + assert func_get(val) == None + + val = func_set(str(2**typ-2)) + assert func_get(val) == None + + val = func_set("UNLIMITED", inf=True) + assert func_get(val) == "UNLIMITED" + + val = func_set(0) + assert func_get(val) == None + + val = func_set(0, zero_is_noval=False) + assert func_get(val, zero_is_noval=False) == 0 + + with pytest.raises(TypeError, + match="an integer is required"): + val = func_set("UNLIMITED") + + with pytest.raises(OverflowError, + match=r"can't convert negative value to*"): + val = func_set(-1) + + with pytest.raises(OverflowError, + match=r"value too large to convert to*|" + "Python int too large*"): + val = func_set(2**typ) + + def test_u8(self): + self._uint_impl(u8, u8_parse, 8) + + def test_u16(self): + self._uint_impl(u16, u16_parse, 16) + + def test_u32(self): + self._uint_impl(u32, u32_parse, 32) + + def test_u64(self): + self._uint_impl(u64, u64_parse, 64) + + def test_set_parse_bool_flag(self): + part = pyslurm.Partition() + + assert not part.is_hidden + + part.is_hidden = True + assert part.is_hidden + + part.is_root_only = True + assert part.is_hidden + assert part.is_root_only + assert not part.is_default + assert not part.allow_root_jobs + + part.is_default = False + part.is_hidden = False + assert not part.is_hidden + assert part.is_root_only + assert not part.is_default + assert not part.allow_root_jobs + + +# def _uint_bool_impl(self, arg): +# js = JobSubmitDescription() + +# setattr(js, arg, True) +# assert getattr(js, arg) == True + +# setattr(js, arg, False) +# assert getattr(js, arg) == False + +# # Set to true again to make sure toggling actually works. +# setattr(js, arg, True) +# assert getattr(js, arg) == True + +# setattr(js, arg, None) +# assert getattr(js, arg) == False + +# def test_u8_bool(self): +# self._uint_bool_impl("overcommit") + +# def test_u16_bool(self): +# self._uint_bool_impl("requires_contiguous_nodes") + +# def test_u64_bool_flag(self): +# self._uint_bool_impl("kill_on_invalid_dependency") + + +class TestTime: + + def test_parse_minutes(self): + mins = 60 + mins_str = "01:00:00" + + assert timestr_to_mins(mins_str) == mins + assert timestr_to_mins("UNLIMITED") == 2**32-1 + assert timestr_to_mins(None) == 2**32-2 + + assert mins_to_timestr(mins) == mins_str + assert mins_to_timestr(2**32-1) == "UNLIMITED" + assert mins_to_timestr(2**32-2) == None + assert mins_to_timestr(0) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: invalid_val."): + timestr_to_mins("invalid_val") + + def test_parse_seconds(self): + secs = 3600 + secs_str = "01:00:00" + + assert timestr_to_secs(secs_str) == secs + assert timestr_to_secs("UNLIMITED") == 2**32-1 + assert timestr_to_secs(None) == 2**32-2 + + assert secs_to_timestr(secs) == secs_str + assert secs_to_timestr(2**32-1) == "UNLIMITED" + assert secs_to_timestr(2**32-2) == None + assert secs_to_timestr(0) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: invalid_val."): + timestr_to_secs("invalid_val") + + def test_parse_date(self): + datetime_date = datetime(2022, 11, 8, 21, 8, 17) + timestamp = int(datetime_date.timestamp()) + date = datetime_date.isoformat(timespec="seconds") + + assert date_to_timestamp(date) == timestamp + assert date_to_timestamp(timestamp) == timestamp + assert date_to_timestamp(datetime_date) == timestamp + + assert timestamp_to_date(timestamp) == date + assert timestamp_to_date(0) == None + assert timestamp_to_date(2**32-1) == None + assert timestamp_to_date(2**32-2) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: 2022-11-08T21"): + date_to_timestamp("2022-11-08T21") + + +class TestMiscUtil: + + def test_parse_uid(self): + name = uid_to_name(0) + assert name == "root" + + lookup = {0: "root"} + name = uid_to_name(0, lookup=lookup) + assert name == "root" + + assert user_to_uid("root") == 0 + assert user_to_uid(0) == 0 + assert user_to_uid("0") == 0 + + with pytest.raises(KeyError): + name = uid_to_name(2**32-5) + + with pytest.raises(KeyError): + name = user_to_uid("invalid_user") + + def test_parse_gid(self): + name = gid_to_name(0) + assert name == "root" + + lookup = {0: "root"} + name = gid_to_name(0, lookup=lookup) + assert name == "root" + + assert group_to_gid("root") == 0 + assert group_to_gid(0) == 0 + assert group_to_gid("0") == 0 + + with pytest.raises(KeyError): + name = gid_to_name(2**32-5) + + with pytest.raises(KeyError): + name = group_to_gid("invalid_group") + + def test_expand_range_str(self): + r = expand_range_str("1-5,6,7,10-11") + assert r == [1, 2, 3, 4, 5, 6, 7, 10, 11] + + def test_humanize(self): + val = humanize(1024) + assert val == "1.0G" + + val = humanize(2**20) + assert val == "1.0T" + + val = humanize(800) + assert val == "800.0M" + + val = humanize("UNLIMITED") + assert val == "UNLIMITED" + + val = humanize(None) + assert val == None + + with pytest.raises(ValueError): + val = humanize("invalid_val") + + def test_dehumanize(self): + # Note: default target unit for dehumanize is "M". + val = dehumanize(1024) + assert val == 1024 + + val = dehumanize("2M") + assert val == 2 + + val = dehumanize("10G") + assert val == 10240 + + val = dehumanize("9.6G") + assert val == round(1024*9.6) + + val = dehumanize("10T") + assert val == 10*(2**20) + + val = dehumanize("10T", target="G") + assert val == 10*(2**10) + + with pytest.raises(ValueError, + match="Invalid value specified: 10L"): + val = dehumanize("10L") + + with pytest.raises(ValueError, + match="could not convert string to float: 'invalid_val'"): + val = dehumanize("invalid_valM") + + def test_signal_to_num(self): + sig = signal_to_num("SIGKILL") + assert sig == 9 + + sig = signal_to_num(7) + assert sig == 7 + + with pytest.raises(ValueError): + sig = signal_to_num("invalid_sig") + + def test_nodelist_from_range_str(self): + nodelist = ["node001", "node007", "node008", "node009"] + nodelist_str = ",".join(nodelist) + assert nodelist == nodelist_from_range_str("node[001,007-009]") + assert nodelist_from_range_str("node[001,007:009]") == [] + + def test_nodelist_to_range_str(self): + nodelist = ["node001", "node007", "node008", "node009"] + nodelist_str = ",".join(nodelist) + assert "node[001,007-009]" == nodelist_to_range_str(nodelist) + assert "node[001,007-009]" == nodelist_to_range_str(nodelist_str) + diff --git a/tests/unit/test_db_job.py b/tests/unit/test_db_job.py new file mode 100644 index 00000000..c2ae8bb0 --- /dev/null +++ b/tests/unit/test_db_job.py @@ -0,0 +1,52 @@ +######################################################################### +# test_db_job.py - database job unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_job.py - Unit test basic database job functionalities.""" + +import pytest +import pyslurm + + +def test_filter(): + job_filter = pyslurm.db.JobFilter() + + job_filter.clusters = ["test1"] + job_filter.partitions = ["partition1", "partition2"] + job_filter._create() + + job_filter.ids = [1000, 1001] + job_filter._create() + + job_filter.with_script = True + job_filter._create() + + job_filter.with_env = True + with pytest.raises(ValueError): + job_filter._create() + + +def test_create_instance(): + job = pyslurm.db.Job(9999) + assert job.id == 9999 + + +def test_parse_all(): + job = pyslurm.db.Job(9999) + assert job.to_dict() diff --git a/tests/unit/test_db_qos.py b/tests/unit/test_db_qos.py new file mode 100644 index 00000000..5ee2db76 --- /dev/null +++ b/tests/unit/test_db_qos.py @@ -0,0 +1,44 @@ +######################################################################### +# test_db_qos.py - database qos unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_qos.py - Unit test basic database qos functionalities.""" + +import pytest +import pyslurm + + +def test_search_filter(): + qos_filter = pyslurm.db.QualityOfServiceFilter() + qos_filter._create() + + qos_filter.ids = [1, 2] + qos_filter._create() + + qos_filter.preempt_modes = ["cluster"] + qos_filter._create() + + with pytest.raises(ValueError): + qos_filter.preempt_modes = ["invalid_preempt_mode"] + qos_filter._create() + + +def test_create_instance(): + qos = pyslurm.db.QualityOfService("test") + assert qos.name == "test" diff --git a/tests/unit/test_db_slurm_list.py b/tests/unit/test_db_slurm_list.py new file mode 100644 index 00000000..6d770bcf --- /dev/null +++ b/tests/unit/test_db_slurm_list.py @@ -0,0 +1,134 @@ +######################################################################### +# test_db_slurm_list.py - Slurm list tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_slurm_List.py - Unit test basic Slurm list functionalities.""" + +import pytest +import pyslurm +from pyslurm.db.util import SlurmList + + +def test_create_and_destroy_list(): + slist = SlurmList() + assert not slist.is_null + + slist2 = SlurmList(["user1", "user2"]) + assert not slist.is_null + assert slist2.cnt == 2 + assert slist2.itr_cnt == 0 + assert slist2.is_itr_null + + slist2._dealloc_itr() + slist2._dealloc_list() + assert slist2.is_null + + +def test_append(): + slist = SlurmList() + input_list = ["user1", "user2", "user3"] + slist.append(input_list) + assert slist.cnt == len(input_list) + + input_str = "user4" + slist.append(input_str) + assert slist.cnt == 4 + + input_int = 10 + slist.append(input_int) + assert slist.cnt == 5 + + input_ignore_none = ["user6", None] + slist.append(input_ignore_none) + assert slist.cnt == 6 + + +def test_convert_to_pylist(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.cnt == 3 + assert slist.to_pylist() == input_list + + +def test_iter(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.itr_cnt == 0 + assert slist.is_itr_null + assert not slist.is_null + assert slist.cnt == 3 + + for idx, slurm_item in enumerate(slist): + assert not slist.is_itr_null + assert slurm_item.has_data + assert slist.itr_cnt == idx+1 + + assert slist.itr_cnt == 0 + assert slist.is_itr_null + + slist._dealloc_list() + assert slist.is_null + assert slist.cnt == 0 + + for item in slist: + # Should not be possible to get here + assert False + + +def test_iter_and_pop(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.itr_cnt == 0 + assert slist.is_itr_null + assert slist.cnt == 3 + + for idx, slurm_item in enumerate(SlurmList.iter_and_pop(slist)): + assert slist.is_itr_null + assert slurm_item.has_data + + assert slist.cnt == 0 + assert slist.itr_cnt == 0 + assert slist.is_itr_null + + # Round 2 on existing object + slist.append(["user10", "user11"]) + assert slist.itr_cnt == 0 + assert slist.cnt == 2 + + for slurm_item in SlurmList.iter_and_pop(slist): + assert slurm_item.has_data + + assert slist.cnt == 0 + assert slist.itr_cnt == 0 + assert slist.is_itr_null + + +def test_iter_and_pop_on_null_list(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert not slist.is_null + assert slist.cnt == 3 + + slist._dealloc_list() + assert slist.is_null + assert slist.cnt == 0 + + for slurm_item in SlurmList.iter_and_pop(slist): + # Should not be possible to get here + assert False diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py new file mode 100644 index 00000000..863fcfab --- /dev/null +++ b/tests/unit/test_job.py @@ -0,0 +1,72 @@ +######################################################################### +# test_job.py - job unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job.py - Unit test basic job functionalities.""" + +import pytest +import pyslurm +from pyslurm import Job +from pyslurm.core.job.util import * + +def test_create_instance(): + job = Job(9999) + assert job.id == 9999 + + +def test_parse_all(): + assert Job(9999).to_dict() + + +def test_parse_dependencies_to_dict(): + expected = None + assert dependency_str_to_dict("") == expected + + expected = { + "after": [1, 2], + "afterany": [], + "afterburstbuffer": [], + "aftercorr": [], + "afternotok": [], + "afterok": [3], + "singleton": False, + "satisfy": "all", + } + input_str = "after:1:2,afterok:3" + assert dependency_str_to_dict(input_str) == expected + + +def test_mail_types_int_to_list(): + expected = [] + assert mail_type_int_to_list(0) == expected + + +def test_acctg_profile_int_to_list(): + expected = [] + assert acctg_profile_int_to_list(0) == expected + + +def test_power_type_int_to_list(): + expected = [] + assert power_type_int_to_list(0) == expected + + +def test_cpu_freq_int_to_str(): + expected = None + assert cpu_freq_int_to_str(0) == expected diff --git a/tests/unit/test_job_steps.py b/tests/unit/test_job_steps.py new file mode 100644 index 00000000..c8c52352 --- /dev/null +++ b/tests/unit/test_job_steps.py @@ -0,0 +1,42 @@ +######################################################################### +# test_job_steps.py - job steps unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job_steps.py - Unit test basic job step functionality.""" + +import pytest +from pyslurm import JobStep, Job +from pyslurm.utils.helpers import ( + humanize_step_id, + dehumanize_step_id, +) + +def test_create_instance(): + step = JobStep(9999, 1) + assert step.id == 1 + assert step.job_id == 9999 + + job = Job(10000) + step2 = JobStep(job, 2) + assert step2.id == 2 + assert step2.job_id == 10000 + + +def test_parse_all(): + assert JobStep(9999, 1).to_dict() diff --git a/tests/unit/test_job_submit.py b/tests/unit/test_job_submit.py new file mode 100644 index 00000000..5720f75f --- /dev/null +++ b/tests/unit/test_job_submit.py @@ -0,0 +1,379 @@ +######################################################################### +# test_job_submit.py - job submission unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_job_submit.py - Test the job submit api functions.""" + +import sys +import time +import pytest +import pyslurm +import tempfile +import os +from os import environ as pyenviron +from util import create_simple_job_desc, create_job_script +from pyslurm.utils.uint import u32 +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) +from pyslurm.core.job.submission import ( + _parse_cpu_freq_str_to_dict, + _validate_cpu_freq, + _parse_nodes, + _parse_dependencies, + _parse_signal_str_to_dict, + _validate_batch_script, +) +from pyslurm.core.job.util import ( + mail_type_list_to_int, + acctg_profile_list_to_int, + cpu_freq_str_to_int, + cpu_gov_str_to_int, + shared_type_str_to_int, + power_type_list_to_int, +) + + +def job_desc(**kwargs): + return JobSubmitDescription(script=create_job_script(), **kwargs) + + +def test_parse_environment(): + job = job_desc() + + # Everything in the current environment will be exported + job.environment = "ALL" + job._create_job_submit_desc() + + # Only SLURM_* Vars from the current env will be exported + job.environment = "NONE" + job._create_job_submit_desc() + + # TODO: more test cases + # Test explicitly set vars as dict +# job.environment = { +# "PYSLURM_TEST_VAR_1": 2, +# "PYSLURM_TEST_VAR_2": "test-value", +# } + + +def test_parse_cpu_frequency(): + freq = "Performance" + freq_dict = _parse_cpu_freq_str_to_dict(freq) + assert freq_dict["governor"] == "Performance" + assert len(freq_dict) == 1 + _validate_cpu_freq(freq_dict) + + freq = 1000000 + freq_dict = _parse_cpu_freq_str_to_dict(freq) + assert freq_dict["max"] == "1000000" + assert len(freq_dict) == 1 + _validate_cpu_freq(freq_dict) + + freq = "1000000-3700000" + freq_dict = _parse_cpu_freq_str_to_dict(freq) + assert freq_dict["min"] == "1000000" + assert freq_dict["max"] == "3700000" + assert len(freq_dict) == 2 + _validate_cpu_freq(freq_dict) + + freq = "1000000-3700000:Performance" + freq_dict = _parse_cpu_freq_str_to_dict(freq) + assert freq_dict["min"] == "1000000" + assert freq_dict["max"] == "3700000" + assert freq_dict["governor"] == "Performance" + _validate_cpu_freq(freq_dict) + + with pytest.raises(ValueError, + match=r"Invalid cpu_frequency format*"): + freq = "Performance:3700000" + freq_dict = _parse_cpu_freq_str_to_dict(freq) + + with pytest.raises(ValueError, + match=r"min cpu-freq*"): + freq = "4000000-3700000" + freq_dict = _parse_cpu_freq_str_to_dict(freq) + _validate_cpu_freq(freq_dict) + +# with pytest.raises(ValueError, +# match=r"Invalid cpu freq value*"): +# freq = "3700000:Performance" +# job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Setting Governor when specifying*"): + freq = {"max": 3700000, "governor": "Performance"} + _validate_cpu_freq(freq) + + with pytest.raises(ValueError, + match=r"Setting Governor when specifying*"): + freq = {"min": 3700000, "governor": "Performance"} + _validate_cpu_freq(freq) + + +def test_parse_nodes(): + nodes = "5" + nmin, nmax = _parse_nodes(nodes) + assert nmin == 5 + assert nmax == 5 + + nodes = {"min": 5, "max": 5} + nmin, nmax = _parse_nodes(nodes) + assert nmin == 5 + assert nmax == 5 + + nodes = "5-10" + nmin, nmax = _parse_nodes(nodes) + assert nmin == 5 + assert nmax == 10 + + with pytest.raises(ValueError, + match=r"Max Nodecount cannot be less than*"): + nodes = {"min": 10, "max": 5} + nmin, nmax = _parse_nodes(nodes) + + +def test_parse_script(): + script = create_job_script() + + # Try passing in a path to a script. + fd, path = tempfile.mkstemp() + try: + with os.fdopen(fd, 'w') as tmp: + tmp.write(script) + + _validate_batch_script(path, "-t 10 input.csv") + finally: + os.remove(path) + + with pytest.raises(ValueError, + match=r"Passing arguments to a script*"): + script = "#!/bin/bash\nsleep 10" + script_args = "-t 10" + _validate_batch_script(script, script_args) + + with pytest.raises(ValueError, + match=r"The Slurm Controller does not allow*"): + script = "#!/bin/bash\nsleep 10" + "\0" + script_args = None + _validate_batch_script(script, script_args) + + with pytest.raises(ValueError, + match="Batch script is empty or none was provided."): + script = "" + script_args = None + _validate_batch_script(script, script_args) + + with pytest.raises(ValueError, + match=r"Batch script contains DOS line breaks*"): + script = "#!/bin/bash\nsleep 10" + "\r\n" + script_args = None + _validate_batch_script(script, script_args) + + +def test_parse_dependencies(): + dep = { + "afterany": [40, 30, 20], + "afternotok": [100], + "satisfy": "any", + "singleton": True, + } + dep_str = _parse_dependencies(dep) + assert dep_str == "afterany:40:30:20?afternotok:100?singleton" + + dep = { + "after": [100, "200+30"], + "afterok": [300], + } + dep_str = _parse_dependencies(dep) + assert dep_str == "after:100:200+30,afterok:300" + + dep = { + "after": 200, + "afterok": 300, + } + dep_str = _parse_dependencies(dep) + assert dep_str == "after:200,afterok:300" + + +def test_validate_cpus(): + job = job_desc() + job.cpus_per_task = 5 + job._validate_options() + + with pytest.raises(ValueError, + match="cpus_per_task and cpus_per_gpu are mutually exclusive."): + job.cpus_per_gpu = 5 + job._validate_options() + + job.cpus_per_task = None + job.cpus_per_gpu = 5 + job._validate_options() + + with pytest.raises(ValueError, + match="cpus_per_task and cpus_per_gpu are mutually exclusive."): + job.cpus_per_task = 5 + job._validate_options() + + +def test_parse_signal(): + signal = 7 + signal_dict = _parse_signal_str_to_dict(signal) + assert signal_dict["signal"] == "7" + assert len(signal_dict) == 1 + + signal = "7@120" + signal_dict = _parse_signal_str_to_dict(signal) + assert signal_dict["signal"] == "7" + assert signal_dict["time"] == "120" + assert len(signal_dict) == 2 + + signal = "RB:8@180" + signal_dict = _parse_signal_str_to_dict(signal) + assert signal_dict["signal"] == "8" + assert signal_dict["time"] == "180" + assert signal_dict["batch_only"] + assert signal_dict["allow_reservation_overlap"] + assert len(signal_dict) == 4 + + +def test_mail_type_list_to_int(): + typ = "ARRAY_TASKS,BEGIN" + assert mail_type_list_to_int(typ) > 0 + + with pytest.raises(ValueError, match=r"Invalid *"): + typ = "BEGIN,END,INVALID_TYPE" + mail_type_list_to_int(typ) + + +def test_acctg_profile_list_to_int(): + typ = "energy,task" + assert acctg_profile_list_to_int(typ) > 0 + + with pytest.raises(ValueError, match=r"Invalid *"): + typ = "energy,invalid_type" + acctg_profile_list_to_int(typ) + + +def test_power_type_list_to_int(): + typ = "level" + assert power_type_list_to_int(typ) > 0 + + with pytest.raises(ValueError, match=r"Invalid *"): + typ = "invalid_type" + power_type_list_to_int(typ) + + +def test_cpu_gov_str_to_int(): + typ = "PERFORMANCE" + assert cpu_gov_str_to_int(typ) > 0 + + with pytest.raises(ValueError, match=r"Invalid *"): + typ = "INVALID_GOVERNOR" + cpu_gov_str_to_int(typ) + + +def test_cpu_freq_str_to_int(): + typ = "HIGH" + assert cpu_freq_str_to_int(typ) > 0 + + with pytest.raises(ValueError, match=r"Invalid *"): + typ = "INVALID_FREQ_STR" + cpu_freq_str_to_int(typ) + + with pytest.raises(OverflowError): + typ = 2**32 + cpu_freq_str_to_int(typ) + + +def test_setting_attrs_with_env_vars(): + pyenviron["PYSLURM_JOBDESC_ACCOUNT"] = "account1" + pyenviron["PYSLURM_JOBDESC_NAME"] = "jobname" + pyenviron["PYSLURM_JOBDESC_WCKEY"] = "wckey" + pyenviron["PYSLURM_JOBDESC_CLUSTERS"] = "cluster1,cluster2" + pyenviron["PYSLURM_JOBDESC_COMMENT"] = "A simple job comment" + pyenviron["PYSLURM_JOBDESC_REQUIRES_CONTIGUOUS_NODES"] = "True" + pyenviron["PYSLURM_JOBDESC_WORKING_DIRECTORY"] = "/work/user1" + + job = job_desc(working_directory="/work/user2") + job.load_environment() + + assert job.account == "account1" + assert job.name == "jobname" + assert job.wckey == "wckey" + assert job.clusters == "cluster1,cluster2" + assert job.comment == "A simple job comment" + assert job.requires_contiguous_nodes == True + assert job.working_directory == "/work/user2" + + job = job_desc(working_directory="/work/user2", account="account2") + job.load_environment(overwrite=True) + + assert job.account == "account1" + assert job.name == "jobname" + assert job.wckey == "wckey" + assert job.clusters == "cluster1,cluster2" + assert job.comment == "A simple job comment" + assert job.requires_contiguous_nodes == True + assert job.working_directory == "/work/user1" + + +def test_parsing_sbatch_options_from_script(): + fd, path = tempfile.mkstemp() + try: + with os.fdopen(fd, 'w') as tmp: + tmp.write( + """#!/bin/bash + + #SBATCH --time 20 + #SBATCH --mem-per-cpu =1G + #SBATCH -G 1 + #SBATCH --exclusive + #SBATCH --ntasks = 2 + #SBATCH -c=3 # inline-comments should be ignored + + sleep 1000 + """ + ) + + job = job_desc(ntasks=5) + job.script = path + job.load_sbatch_options() + assert job.time_limit == "20" + assert job.memory_per_cpu == "1G" + assert job.gpus == "1" + assert job.resource_sharing == "no" + assert job.ntasks == 5 + assert job.cpus_per_task == "3" + + job = job_desc(ntasks=5) + job.script = path + job.load_sbatch_options(overwrite=True) + assert job.time_limit == "20" + assert job.memory_per_cpu == "1G" + assert job.gpus == "1" + assert job.resource_sharing == "no" + assert job.ntasks == "2" + assert job.cpus_per_task == "3" + finally: + os.remove(path) + diff --git a/tests/unit/test_node.py b/tests/unit/test_node.py new file mode 100644 index 00000000..c4dba73e --- /dev/null +++ b/tests/unit/test_node.py @@ -0,0 +1,46 @@ +######################################################################### +# test_node.py - node unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_node.py - Unit Test basic functionality of the Node class.""" + +import pytest +import pyslurm +from pyslurm import Node, Nodes +from pyslurm.core.node import _node_state_from_str + + +def test_create_instance(): + node = Node("localhost") + assert node.name == "localhost" + + +def test_parse_all(): + assert Node("localhost").to_dict() + + +def test_set_node_state(): + assert _node_state_from_str("RESUME") + assert _node_state_from_str("undrain") + assert _node_state_from_str("POWER_DOWN") + + +def test_setting_attributes(): + # TODO + assert True diff --git a/tests/unit/test_partition.py b/tests/unit/test_partition.py new file mode 100644 index 00000000..b699893c --- /dev/null +++ b/tests/unit/test_partition.py @@ -0,0 +1,70 @@ +######################################################################### +# test_partition.py - partition unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# Copyright (C) 2023 PySlurm Developers +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_partition.py - Unit Test basic functionality of the Partition class.""" + +import pytest +import pyslurm +from pyslurm import Partition, Partitions + + +def test_create_instance(): + part = Partition("normal") + assert part.name == "normal" + + +def test_parse_all(): + assert Partition("normal").to_dict() + + +def test_parse_memory(): + part = Partition() + + assert part.default_memory_per_cpu is None + assert part.default_memory_per_node is None + + part.default_memory_per_cpu = "2G" + assert part.default_memory_per_cpu == 2048 + assert part.default_memory_per_node is None + + part.default_memory_per_node = "2G" + assert part.default_memory_per_cpu is None + assert part.default_memory_per_node == 2048 + + +def test_parse_job_defaults(): + part = Partition() + + assert part.default_cpus_per_gpu is None + assert part.default_memory_per_gpu is None + + part.default_cpus_per_gpu = 10 + assert part.default_cpus_per_gpu == 10 + assert part.default_memory_per_gpu is None + + part.default_memory_per_gpu = "10G" + assert part.default_cpus_per_gpu == 10 + assert part.default_memory_per_gpu == 10240 + + part.default_cpus_per_gpu = None + part.default_memory_per_gpu = None + assert part.default_cpus_per_gpu is None + assert part.default_memory_per_gpu is None diff --git a/tests/unit/test_task_dist.py b/tests/unit/test_task_dist.py new file mode 100644 index 00000000..52a3e07c --- /dev/null +++ b/tests/unit/test_task_dist.py @@ -0,0 +1,52 @@ +######################################################################### +# test_task_dist.py - task distribution unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_task_dist.py - Test task distribution functions.""" + +import pyslurm +from pyslurm.core.job.task_dist import TaskDistribution + + +def test_from_int(): + expected = None + assert TaskDistribution.from_int(0) == expected + + +def test_from_str(): + + input_str = "cyclic:cyclic:cyclic" + expected = TaskDistribution("cyclic", "cyclic", "cyclic") + parsed = TaskDistribution.from_str(input_str) + assert parsed == expected + assert parsed.to_str() == input_str + + input_str = "*:*:fcyclic,NoPack" + expected = TaskDistribution("*", "*", "fcyclic", False) + parsed = TaskDistribution.from_str(input_str) + assert parsed == expected + assert parsed.to_str() == "block:cyclic:fcyclic,NoPack" + + input_plane_size = 10 + expected = TaskDistribution(plane_size=input_plane_size) + parsed = TaskDistribution.from_str(f"plane={input_plane_size}") + assert parsed == expected + assert parsed.to_str() == "plane" + assert parsed.plane == 10 +# assert parsed.as_int() == pyslurm.SLURM_DIST_PLANE diff --git a/tests/unit/util.py b/tests/unit/util.py new file mode 100644 index 00000000..d142a3a4 --- /dev/null +++ b/tests/unit/util.py @@ -0,0 +1,56 @@ +######################################################################### +# util.py - utility functions for tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) + +# TODO: Figure out how to share this properly between the unit and integration +# folders + +def create_job_script(): + job_script = """\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +sleep 500\ + +""" + return job_script + + +def create_simple_job_desc(script=None, **kwargs): + job = JobSubmitDescription(**kwargs) + + job.name = "test_job" + job.standard_output = "/tmp/slurm-test-%j.out" + job.memory_per_cpu = "1G" + job.ntasks = 2 + job.cpus_per_task = 3 + job.script = create_job_script() if not script else script + job.time_limit = "1-00:00:00" + + return job diff --git a/valgrind-pyslurm.supp b/valgrind-pyslurm.supp new file mode 100644 index 00000000..d7243f44 --- /dev/null +++ b/valgrind-pyslurm.supp @@ -0,0 +1,544 @@ +# Initial suppression file taken from here: +# https://github.com/python/cpython/blob/77a3196b7cc17d90a8aae5629aa71ff183b9266a/Misc/valgrind-python.supp +# Extended with Slurm specific suppressions + +{ + Python _PyFunction_Vectorcall + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:_PyObject_GC_NewVar + obj:/usr/bin/python3.10 + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall +} + +### +### IGNORE POSSIBLE LEAKS CAUSED BY SOME INIT FUNCTIONS IN libslurm +### + +{ + Slurm select_g_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:select_g_init + ... +} + +{ + Slurm slurm_auth_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:slurm_auth_init + ... +} + +{ + Slurm slurm_conf_init/slurm_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:slurm_conf_init + fun:slurm_init + ... +} + +{ + Slurm hash_g_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:hash_g_init + ... +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 8 (x86_64 aka amd64) + Memcheck:Value8 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:address_in_range +} + +# +# Leaks (including possible leaks) +# Hmmm, I wonder if this masks some real leaks. I think it does. +# Will need to fix that. +# + +{ + Suppress leaking the GIL. Happens once per process, see comment in ceval.c. + Memcheck:Leak + fun:malloc + fun:PyThread_allocate_lock + fun:PyEval_InitThreads +} + +{ + Suppress leaking the GIL after a fork. + Memcheck:Leak + fun:malloc + fun:PyThread_allocate_lock + fun:PyEval_ReInitThreads +} + +{ + Suppress leaking the autoTLSkey. This looks like it shouldn't leak though. + Memcheck:Leak + fun:malloc + fun:PyThread_create_key + fun:_PyGILState_Init + fun:Py_InitializeEx + fun:Py_Main +} + +{ + Hmmm, is this a real leak or like the GIL? + Memcheck:Leak + fun:malloc + fun:PyThread_ReInitTLS +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:realloc + fun:_PyObject_GC_Resize + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:malloc + fun:_PyObject_GC_New + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:malloc + fun:_PyObject_GC_NewVar + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +# +# Non-python specific leaks +# + +{ + Handle pthread issue (possibly leaked) + Memcheck:Leak + fun:calloc + fun:allocate_dtv + fun:_dl_allocate_tls_storage + fun:_dl_allocate_tls +} + +{ + Handle pthread issue (possibly leaked) + Memcheck:Leak + fun:memalign + fun:_dl_allocate_tls_storage + fun:_dl_allocate_tls +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Addr8 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Value8 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Addr8 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Value8 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:_PyObject_Realloc +} + +### +### All the suppressions below are for errors that occur within libraries +### that Python uses. The problems to not appear to be related to Python's +### use of the libraries. +### + +{ + Generic ubuntu ld problems + Memcheck:Addr8 + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so +} + +{ + Generic gentoo ld problems + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so +} + +{ + DBM problems, see test_dbm + Memcheck:Param + write(buf) + fun:write + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_close +} + +{ + DBM problems, see test_dbm + Memcheck:Value8 + fun:memmove + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + DBM problems, see test_dbm + Memcheck:Cond + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + DBM problems, see test_dbm + Memcheck:Cond + fun:memmove + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + GDBM problems, see test_gdbm + Memcheck:Param + write(buf) + fun:write + fun:gdbm_open + +} + +{ + Uninitialised byte(s) false alarm, see bpo-35561 + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl + fun:pyepoll_internal_ctl +} + +{ + ZLIB problems, see test_gzip + Memcheck:Cond + obj:/lib/libz.so.1.2.3 + obj:/lib/libz.so.1.2.3 + fun:deflate +} + +{ + Avoid problems w/readline doing a putenv and leaking on exit + Memcheck:Leak + fun:malloc + fun:xmalloc + fun:sh_set_lines_and_columns + fun:_rl_get_screen_size + fun:_rl_init_terminal_io + obj:/lib/libreadline.so.4.3 + fun:rl_initialize +} + +# Valgrind emits "Conditional jump or move depends on uninitialised value(s)" +# false alarms on GCC builtin strcmp() function. The GCC code is correct. +# +# Valgrind bug: https://bugs.kde.org/show_bug.cgi?id=264936 +{ + bpo-38118: Valgrind emits false alarm on GCC builtin strcmp() + Memcheck:Cond + fun:PyUnicode_Decode +} + + +### +### These occur from somewhere within the SSL, when running +### test_socket_sll. They are too general to leave on by default. +### +###{ +### somewhere in SSL stuff +### Memcheck:Cond +### fun:memset +###} +###{ +### somewhere in SSL stuff +### Memcheck:Value4 +### fun:memset +###} +### +###{ +### somewhere in SSL stuff +### Memcheck:Cond +### fun:MD5_Update +###} +### +###{ +### somewhere in SSL stuff +### Memcheck:Value4 +### fun:MD5_Update +###} + +# Fedora's package "openssl-1.0.1-0.1.beta2.fc17.x86_64" on x86_64 +# See http://bugs.python.org/issue14171 +{ + openssl 1.0.1 prng 1 + Memcheck:Cond + fun:bcmp + fun:fips_get_entropy + fun:FIPS_drbg_instantiate + fun:RAND_init_fips + fun:OPENSSL_init_library + fun:SSL_library_init + fun:init_hashlib +} + +{ + openssl 1.0.1 prng 2 + Memcheck:Cond + fun:fips_get_entropy + fun:FIPS_drbg_instantiate + fun:RAND_init_fips + fun:OPENSSL_init_library + fun:SSL_library_init + fun:init_hashlib +} + +{ + openssl 1.0.1 prng 3 + Memcheck:Value8 + fun:_x86_64_AES_encrypt_compact + fun:AES_encrypt +} + +# +# All of these problems come from using test_socket_ssl +# +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_bin2bn +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_num_bits_word +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:BN_num_bits_word +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_mod_exp_mont_word +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_mod_exp_mont +} + +{ + from test_socket_ssl + Memcheck:Param + write(buf) + fun:write + obj:/usr/lib/libcrypto.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:RSA_verify +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:RSA_verify +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:DES_set_key_unchecked +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:DES_encrypt2 +} + +{ + from test_socket_ssl + Memcheck:Cond + obj:/usr/lib/libssl.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Value4 + obj:/usr/lib/libssl.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BUF_MEM_grow_clean +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:memcpy + fun:ssl3_read_bytes +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:SHA1_Update +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:SHA1_Update +} + +{ + test_buffer_non_debug + Memcheck:Addr4 + fun:PyUnicodeUCS2_FSConverter +} + +{ + test_buffer_non_debug + Memcheck:Addr4 + fun:PyUnicode_FSConverter +} + +{ + wcscmp_false_positive + Memcheck:Addr8 + fun:wcscmp + fun:_PyOS_GetOpt + fun:Py_Main + fun:main +} + +# Additional suppressions for the unified decimal tests: +{ + test_decimal + Memcheck:Addr4 + fun:PyUnicodeUCS2_FSConverter +} + +{ + test_decimal2 + Memcheck:Addr4 + fun:PyUnicode_FSConverter +} +