diff --git a/.ansible-lint b/.ansible-lint new file mode 100644 index 000000000..973ff29f3 --- /dev/null +++ b/.ansible-lint @@ -0,0 +1,5 @@ +skip_list: + - 'fqcn-builtins' + - 'fqcn' + - 'name[missing]' + - 'name[template]' diff --git a/.gitignore b/.gitignore index 53354f0e6..bc44901af 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,173 @@ -target/ -out/ -dependency-reduced-pom.xml -*.ipr -*.iws -*.iml -.idea/ -*.retry +# complete idea +.idea/ + +# variable resources +resources/playbook/site.yml +resources/playbook/ansible_hosts +resources/playbook/vars/instances.yml +resources/playbook/vars/login.yml +resources/playbook/vars/worker_specification.yml +resources/playbook/vars/common_configuration.yml + +# any log files +*.log + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..d3d3a2306 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,619 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=.git + +# Add files or directories matching the regex patterns to the ignore-list. The +# regex matches against paths and can be in Posix or Windows format. +ignore-paths= + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. The default value ignores Emacs file +# locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=0 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.10 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + missing-function-docstring, + import-error, + logging-fstring-interpolation, + too-many-arguments, + fixme + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata, + test, + bla, + tmp + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + f, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=6 + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=BaseException, + Exception + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no diff --git a/README.md b/README.md index e2d10c8ec..f48ce822d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,116 @@ -# BiBiGrid2 +# BiBiGrid +BiBiGrid is a cloud cluster creation and management framework for OpenStack (and more providers in the future). -BiBiGrid is a tool for an easy cluster setup inside a cloud environment. +> **Note** +> The latest version is currently work in progress. Future changes are likely. +> Not all features of the previous version are available, but they will come soon. +> The [previous version](https://github.com/BiBiServ/bibigrid/tree/bibigrid-2.3.1) is still available, +> but not maintained anymore. + +## Getting Started +For most users the [Hands-On BiBiGrid Tutorial](https://github.com/deNBI/bibigrid_clum2022) +is the best entry point. + +However, if you are already quite experienced with *OpenStack* and the previous *BiBiGrid* the following brief explanation +might be just what you need. + +
+ Brief, technical BiBiGrid2 overview + +### How to configure a cluster? +#### Configuration File: bibigrid.yml +A [template](bibigrid.yml) file is included in the repository ([bibigrid.yml](bibigrid.yml)). + +The cluster configuration file consists of a list of configurations. Every configuration describes the provider specific configuration. +The first configuration additionally contains all the keys that apply to the entire cluster (roles for example). +Currently only clusters with one provider are possible, so focus only on the first configuration in the list. + +The configuration template [bibigrid.yml](bibigrid.yml) contains many helpful comments, making completing it easier for you. + +[You need more details?](documentation/markdown/features/configuration.md) + +#### Cloud Specification Data: clouds.yml +To access the cloud, authentication information is required. +You can download your `clouds.yaml` from OpenStack. + +Your `clouds.yaml` is to be placed in `~/.config/bibigrid/` and will be loaded by BiBiGrid2 on execution. + +[You need more details?](documentation/markdown/features/cloud_specification_data.md) + +### Quick First Time Usage +If you haven't used BiBiGrid1 in the past or are unfamiliar with OpenStack, we heavily recommend following the +[tutorial](https://github.com/deNBI/bibigrid_clum2022) instead. + +#### Preparation +1. Download (or create) the `clouds.yaml` (and optionally `clouds-public.yaml`) file as described [above](#cloud-specification-data-cloudsyml). +2. Place the `clouds.yaml` into `~/.config/bibigrid` +3. Fill the configuration, `bibigrid.yml`, with your specifics. At least you need: A master instance with valid type and image, +a region, an availability zone, an sshUser (most likely ubuntu) and a subnet. +You probably also want at least one worker with a valid type, image and count. +4. If your cloud provider runs post-launch services, you need to set the `waitForServices` +key appropriately which expects a list of services to wait for. +5. Create a virtual environment from `bibigrid2/requirements.txt`. +See [here](https://www.akamai.com/blog/developers/how-building-virtual-python-environment) for more detailed info. +6. Take a look at [First execution](#first-execution) + +#### First execution +Before follow the steps described at [Preparation](#preparation). + +After cloning the repository navigate to `bibigrid2`. +In order to execute BiBiGrid2 source the virtual environment created during [preparation](#preparation). +Take a look at BiBiGrid2's [Command Line Interface](documentation/markdown/features/CLI.md) +if you want to explore for yourself. + +A first execution run through could be: +1. `./bibigrid.sh -i [path-to-bibigrid.yml] -ch`: checks the configuration +2. `./bibigrid.sh -i 'bibigrid.yml -i [path-to-bibigrid.yml] -c'`: creates the cluster (execute only if check was successful) +3. Use **BiBiGrid2's create output** to investigate the created cluster further. Especially connecting to the ide might be helpful. +Otherwise, connect using ssh. +4. While in ssh try `sinfo` to printing node info +5. Run `srun -x $(hostname) hostname` to power up a worker and get its hostname. +6. Run `sinfo` again to see the node powering up. After a while it will be terminated again. +7. Use the terminate command from **BiBiGrid2's create output** to shut down the cluster again. +All floating-ips used will be released. + +Great! You've just started and terminated your first cluster using BiBiGrid2! + +
+ +### Troubleshooting +If your cluster doesn't start up, please first make sure your configurations file is valid (`-ch`). +If it is not, try to modify the configurations file to make it valid. Use `-v` or `-vv` to get a more verbose output, +so you can find the issue faster. Also double check if you have sufficient permissions to access the project. +If you can't make your configurations file valid, please contact a developer. +If that's the case, please contact a developer and/or manually check if your quotas are exceeded. +Some quotas can currently not be checked by bibigrid. + +**Whenever you contact a developer, please send your logfile along.** + +# Documentation +If you would like to learn more about BiBiGrid2 please follow a fitting link: +- [BiBiGrid2 Features](documentation/markdown/bibigrid_feature_list.md) +- [Software used by BiBiGrid2](documentation/markdown/bibigrid_software_list.md) + +
+ Differences to BiBiGrid1 + +* BiBiGrid2 no longer uses RC- but cloud.yaml-files for cloud-specification data. Environment variables are no longer used (or supported). +See [Cloud Specification Data](documentation/markdown/features/cloud_specification_data.md). +* BiBiGrid2 has a largely reworked configurations file, because BiBiGrid2 core supports multiple providers this step was necessary. +See [Configuration](documentation/markdown/features/configuration.md) +* BiBiGrid2 currently only implements the provider OpenStack. +* BiBiGrid2 only starts the master and will dynamically start workers using slurm when they are needed. +Workers are powered down once they are not used for a longer period. +* BiBiGrid2 lays the foundation for clusters that are spread over multiple providers, but Hybrid Clouds aren't fully implemented yet. +
+ +# Development +## Development-Guidelines + +[https://github.com/BiBiServ/Development-Guidelines](https://github.com/BiBiServ/Development-Guidelines) + +## On implementing concrete providers +New concrete providers can be implemented very easily. Just copy the `provider.py` file and implement all methods for +your cloud-provider. Also inherit from the `provider` class. After that add your provider to the providerHandler lists; giving it a associated name for the +configuration files. By that, your provider is automatically added to BiBiGrid2's tests and regular execution. By testing +your provider first, you will see whether all provider methods are implemented as expected. \ No newline at end of file diff --git a/bibigrid.sh b/bibigrid.sh new file mode 100755 index 000000000..7739c57ad --- /dev/null +++ b/bibigrid.sh @@ -0,0 +1 @@ +python3 -m bibigrid2.core.startup "$@" \ No newline at end of file diff --git a/bibigrid.yml b/bibigrid.yml new file mode 100644 index 000000000..69f589079 --- /dev/null +++ b/bibigrid.yml @@ -0,0 +1,93 @@ + # See https://cloud.denbi.de/wiki/Tutorials/BiBiGrid/ (after update) + # First configuration will be used for general cluster information and must include the master. + # All other configurations mustn't include another master, but exactly one vpnWorker instead (keys like master). + +- infrastructure: openstack # former mode. Describes what cloud provider is used (others are not implemented yet) + cloud: openstack # name of clouds.yaml cloud-specification key (which is value to top level key clouds) + + # -- BEGIN: GENERAL CLUSTER INFORMATION -- + ## sshPublicKeyFiles listed here will be added to access the cluster. A temporary key is created by bibigrid itself. + #sshPublicKeyFiles: + # - [key one] + + ## Volumes and snapshots that will be mounted to master + #masterMounts: # KEY NOT FULLY IMPLEMENTED YET + # - [mount one] + + #nfsShares: # KEY NOT FULLY IMPLEMENTED YET; /vol/spool/ is automatically created as a nfs + # - [nfsShare one] + + ## Ansible (Galaxy) roles can be added for execution # KEY NOT IMPLEMENTED YET + #ansibleRoles: + # - file: SomeFile + # hosts: SomeHosts + # name: SomeName + # vars: SomeVars + # vars_file: SomeVarsFile + + #ansibleGalaxyRoles: # KEY NOT IMPLEMENTED YET + # - hosts: SomeHost + # name: SomeName + # galaxy: SomeGalaxy + # git: SomeGit + # url: SomeURL + # vars: SomeVars + # vars_file: SomeVarsFile + + ## Uncomment if you don't want assign a public ip to the master; for internal cluster (Tuebingen). + #useMasterWithPublicIp: False # defaults True if False no public-ip (floating-ip) will be allocated + + # Other keys - default False + #localFS: True + #localDNSlookup: True + #zabbix: True + #nfs: True + #ide: True # Very useful to set on True. Use `./bibigrid.sh -i [path-to-bibigrid.yml] -ide -cid [cluster-id]` to start port forwarding to access the ide. + + useMasterAsCompute: True # Currently ignored by slurm + + #waitForServices: # existing service name that runs after an instance is launched. BiBiGrid's playbook will wait until service is "stopped" to avoid issues + # - de.NBI_Bielefeld_environment.service # uncomment for cloud site Bielefeld + + # master configuration + masterInstance: + type: # existing type/flavor on your cloud. See launch instance>flavor for options + image: # existing image on your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/images pick an active one. Currently only ubuntu22.04 is supported + + # -- END: GENERAL CLUSTER INFORMATION -- + + # worker configuration + #workerInstances: + # - type: # existing type/flavor on your cloud. See launch instance>flavor for options + # image: # same as master + # count: # any number of workers you would like to create with set type, image combination + + # Depends on cloud image + sshUser: # for example ubuntu + + # Depends on cloud site: + # Berlin : regionOne + # Bielefeld : bielefeld + # DKFZ : regionOne + # Giessen : RegionOne + # Heidelberg : RegionOne + # Tuebingen : RegionOne + region: Bielefeld + + # Depends on cloud site: + # Berlin : nova + # Bielefeld : default + # DKFZ : nova + # Giessen : nova + # Heidelberg : nova + # Tuebingen : nova + availabilityZone: default + + # Depends on cloud site and project + subnet: # existing subnet on your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/networks/ + + # Uncomment if no full DNS service for started instances is available. + # Currently, the case in Berlin, DKFZ, Heidelberg and Tuebingen. + #localDNSLookup: True + + #- [next configurations] # KEY NOT IMPLEMENTED YET diff --git a/bibigrid2/core/actions/check.py b/bibigrid2/core/actions/check.py new file mode 100644 index 000000000..41797ec18 --- /dev/null +++ b/bibigrid2/core/actions/check.py @@ -0,0 +1,20 @@ +""" +Module that acts as a wrapper and uses validateConfiguration to validate given configuration +""" +import logging +from bibigrid2.core.utility import validate_configuration + +LOG = logging.getLogger("bibigrid") + +def check(configurations, providers): + """ + Uses validateConfiguration to validate given configuration. + :param configurations: list of configurations (dicts) + :param providers: list of providers + :return: + """ + success = validate_configuration.ValidateConfiguration(configurations, providers).validate() + check_result = "succeeded! Cluster is ready to start." if success else "failed!" + print(f"Total check {check_result}") + LOG.info("Total check returned %s.", success) + return 0 diff --git a/bibigrid2/core/actions/create.py b/bibigrid2/core/actions/create.py new file mode 100644 index 000000000..bd24ac3fe --- /dev/null +++ b/bibigrid2/core/actions/create.py @@ -0,0 +1,362 @@ +""" +The cluster creation (master's creation, key creation, ansible setup and execution, ...) is done here +""" + +import logging +import os +import subprocess +import threading +import traceback +from functools import partial + +import paramiko +import yaml + +from bibigrid2.core.actions import terminate_cluster +from bibigrid2.core.utility import ansible_configurator +from bibigrid2.core.utility import id_generation +from bibigrid2.core.utility.handler import ssh_handler +from bibigrid2.core.utility.paths import ansible_resources_path as aRP +from bibigrid2.core.utility.paths import bin_path as biRP +from bibigrid2.models import exceptions +from bibigrid2.models import return_threading +from bibigrid2.models.exceptions import ExecutionException + +PREFIX = "bibigrid" +SEPARATOR = "-" +PREFIX_WITH_SEP = PREFIX + SEPARATOR +LOG = logging.getLogger("bibigrid") + + +def get_identifier(identifier, cluster_id, worker_group="", additional=""): + """ + This method does more advanced string formatting to generate master, vpnwkr and worker names + @param identifier: master|vpnwkr|worker + @param cluster_id: id of cluster + @param worker_group: group of worker (every member of a group has same flavor/type and image) + @param additional: an additional string to be added at the end + @return: the generated string + """ + general = PREFIX_WITH_SEP + identifier + str(worker_group) + SEPARATOR + cluster_id + if additional: + return general + SEPARATOR + str(additional) + return general + + +MASTER_IDENTIFIER = partial(get_identifier, identifier="master", additional="") +WORKER_IDENTIFIER = partial(get_identifier, identifier="worker") +VPN_WORKER_IDENTIFIER = partial(get_identifier, identifier="vpnwkr") + +KEY_PREFIX = "tempKey_bibi" +KEY_FOLDER = os.path.expanduser("~/.config/bibigrid/keys/") +AC_NAME = "ac" + SEPARATOR + "{cluster_id}" +KEY_NAME = KEY_PREFIX + SEPARATOR + "{cluster_id}" +CLUSTER_MEMORY_FOLDER = KEY_FOLDER +CLUSTER_MEMORY_FILE = ".bibigrid.mem" +CLUSTER_MEMORY_PATH = os.path.join(CLUSTER_MEMORY_FOLDER, CLUSTER_MEMORY_FILE) + + +class Create: # pylint: disable=too-many-instance-attributes,too-many-arguments + """ + The class Create holds necessary methods to execute the Create-Action + """ + + def __init__(self, providers, configurations, config_path, debug=False): + """ + Additionally sets (unique) cluster_id, public_key_commands (to copy public keys to master) and key_name. + Call create() to actually start server. + :param providers: List of providers (provider) + :param configurations: List of configurations (dict) + :param config_path: string that is the path to config-file + :param debug: Bool. If True Cluster will offer shut-down after create and + will ask before shutting down on errors + """ + self.providers = providers + self.configurations = configurations + self.debug = debug + self.cluster_id = id_generation.generate_safe_cluster_id(providers) + self.ssh_user = configurations[0].get("sshUser") or "ubuntu" + self.ssh_add_public_key_commands = ssh_handler.get_add_ssh_public_key_commands( + configurations[0].get("sshPublicKeyFiles")) + self.config_path = config_path + self.master_ip = None + LOG.debug("Cluster-ID: %s", self.cluster_id) + self.name = AC_NAME.format(cluster_id=self.cluster_id) + self.key_name = KEY_NAME.format(cluster_id=self.cluster_id) + self.instance_counter = 0 + self.thread_lock = threading.Lock() + self.use_master_with_public_ip = configurations[0].get("useMasterWithPublicIp", True) + LOG.debug("Keyname: %s", self.key_name) + + def generate_keypair(self): + """ + Generates ECDSA Keypair using system-function ssh-keygen and uploads the generated public key to providers. + generate_keypair makes use of the fact that files in tmp are automatically deleted + ToDo find a more pythonic way to create an ECDSA keypiar + See here for why using python module ECDSA wasn't successful + https://stackoverflow.com/questions/71194770/why-does-creating-ecdsa-keypairs-via-python-differ-from-ssh-keygen-t-ecdsa-and + :return: + """ + # create KEY_FOLDER if it doesn't exist + if not os.path.isdir(KEY_FOLDER): + LOG.info("%s not found. Creating folder.", KEY_FOLDER) + os.mkdir(KEY_FOLDER) + # generate keyfile + res = subprocess.check_output(f'ssh-keygen -t ecdsa -f {KEY_FOLDER}{self.key_name} -P ""', shell=True).decode() + LOG.debug(res) + # read private keyfile + with open(f"{os.path.join(KEY_FOLDER, self.key_name)}.pub", mode="r", encoding="UTF-8") as key_file: + public_key = key_file.read() + # upload keyfiles + for provider in self.providers: + provider.create_keypair(name=self.key_name, public_key=public_key) + + # write cluster_id to automatically read it on following calls if no cid is given + with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file: + yaml.safe_dump(data={"cluster_id": self.cluster_id}, stream=cluster_memory_file) + + def start_instance(self, provider, identifier, instance_type, network, volumes=None, + external_network=None): + """ + Starts any (master,worker,vpn) single server/instance in given network on given provider + with floating-ip if master or vpn and with volume if master. + :param provider: provider server will be started on + :param identifier: string MASTER/WORKER/VPN_IDENTIFIER + :param instance_type: dict from configuration containing server type, image and count (but count is not needed) + :param network: string network where server will be started in. + All server of a provider are started in the same network + :param volumes: list of volumes that are to be attached to the server. Currently only relevant for master + :param external_network: string only needed if worker=False to create floating_ip + :return: + """ + # potentially weird counting due to master + with self.thread_lock: + if identifier == MASTER_IDENTIFIER: # pylint: disable=comparison-with-callable + name = identifier(cluster_id=self.cluster_id) + elif identifier == WORKER_IDENTIFIER: # pylint: disable=comparison-with-callable + name = identifier(number=self.instance_counter, cluster_id=self.cluster_id) + # else: + # name = identifier(number=self.instance_counter, cluster_id=self.cluster_id) + self.instance_counter += 1 + LOG.info("Starting instance/server %s", name) + flavor = instance_type["type"] + image = instance_type["image"] + server = provider.create_server(name=name, flavor=flavor, key_name=self.key_name, + image=image, network=network, volumes=volumes) + floating_ip = None + # pylint: disable=comparison-with-callable + if identifier == VPN_WORKER_IDENTIFIER or ( + identifier == MASTER_IDENTIFIER and self.use_master_with_public_ip): + # wait seems to be included. Not in documentation + floating_ip = provider.attach_available_floating_ip(network=external_network, + server=server)["floating_ip_address"] + elif identifier == MASTER_IDENTIFIER: + floating_ip = provider.conn.get_server(server["id"])["private_v4"] + # pylint: enable=comparison-with-callable + return floating_ip + + def start_instances(self, configuration, provider): + """ + Starts all instances of a provider using multithreading + :param configuration: dict configuration of said provider + :param provider: provider + :return: + """ + LOG.info("Starting instances on %s", provider.NAME) + # threads = [] + identifier, instance_type, volumes = self.prepare_vpn_or_master_args(configuration, provider) + external_network = provider.get_external_network(configuration["network"]) + + # Starts master/vpn. Uses return threading to get floating_ip of master/vpn + vpn_or_master_thread = return_threading.ReturnThread(target=self.start_instance, + args=[provider, + identifier, + instance_type, + configuration["network"], + volumes, + external_network]) + vpn_or_master_thread.start() + + # Starts all workers + # for worker_instance_type in configuration.get("workerInstances") or []: + # for worker in range(worker_instance_type["count"]): + # worker_thread = threading.Thread(target=self.start_instance, + # args=[provider, + # WORKER_IDENTIFIER, + # worker_instance_type, + # configuration["network"], + # True]) + # worker_thread.start() + # threads.append(worker_thread) + LOG.info("Waiting for servers to start-up on cloud %s", provider.cloud_specification['identifier']) + vpn_or_m_floating_ip_address = vpn_or_master_thread.join() + self.setup_reachable_servers(configuration, vpn_or_m_floating_ip_address) + # for thread in threads: + # thread.join() + + def prepare_vpn_or_master_args(self, configuration, provider): + """ + Prepares start_instance arguments for master/vpn + :param configuration: configuration (dict) of said master/vpn + :param provider: provider + :return: arguments needed by start_instance + """ + if configuration.get("masterInstance"): + instance_type = configuration["masterInstance"] + identifier = MASTER_IDENTIFIER + master_mounts = configuration.get("masterMounts", []) + volumes = self.prepare_volumes(provider, master_mounts) + elif configuration.get("vpnInstance"): + instance_type = configuration["vpnInstance"] + identifier = VPN_WORKER_IDENTIFIER + volumes = [] # only master has volumes + else: + LOG.warning("Configuration %s has no vpnwkr or master and is therefore unreachable.", configuration) + raise KeyError + return identifier, instance_type, volumes + + def setup_reachable_servers(self, configuration, vpn_or_m_floating_ip_address): + """ + Executes necessary commands on master or vpnwkr + :param configuration: said configuration + :param vpn_or_m_floating_ip_address: floating_ip to master or vpnwkr + """ + if configuration.get("masterInstance"): + self.master_ip = vpn_or_m_floating_ip_address + ssh_handler.ansible_preparation(floating_ip=vpn_or_m_floating_ip_address, + private_key=KEY_FOLDER + self.key_name, + username=self.ssh_user, + commands=self.ssh_add_public_key_commands) + elif configuration.get("vpnInstance"): + ssh_handler.execute_ssh(floating_ip=self.master_ip, + private_key=KEY_FOLDER + self.key_name, + username=self.ssh_user, + commands=ssh_handler.VPN_SETUP) + + def prepare_volumes(self, provider, mounts): + """ + Creates volumes from snapshots and returns all volumes (pre-existing and newly created) + :param provider: provider on which the volumes and snapshots exist + :param mounts: volumes or snapshots + :return: list of pre-existing and newly created volumes + """ + LOG.info("Preparing volumes") + volumes = [] + for mount in mounts: + volume_id = provider.get_volume_by_id_or_name(mount)["id"] + if volume_id: + volumes.append(volume_id) + else: + LOG.debug("Volume %s does not exist. Checking for snapshot.", mount) + volume_id = provider.create_volume_from_snapshot(mount) + if volume_id: + volumes.append(volume_id) + else: + LOG.warning("Mount %s is neither a snapshot nor a volume.", mount) + ret_volumes = set(volumes) + if len(ret_volumes) < len(volumes): + LOG.warning("Identical mounts found in masterMounts list. " + "Trying to set() to save the run. Check configurations!") + return ret_volumes + + def prepare_configurations(self): + """ + Makes sure that subnet and network key are set for each configuration. + If none is set a keyError will be raised and caught in create. + :return: + """ + for configuration, provider in zip(self.configurations, self.providers): + if not configuration.get("network"): + configuration["network"] = provider.get_network_id_by_subnet(configuration["subnet"]) + elif not configuration.get("subnet"): + configuration["subnet"] = provider.get_subnet_ids_by_network(configuration["network"]) + configuration["sshUser"] = self.ssh_user # is used in ansibleConfigurator + + def upload_data(self): + """ + Configures ansible and then uploads the modified files and all necessary data to the master + :return: + """ + if not os.path.isdir(aRP.VARS_FOLDER): + LOG.info("%s not found. Creating folder.", aRP.VARS_FOLDER) + os.mkdir(aRP.VARS_FOLDER) + ansible_configurator.configure_ansible_yaml(providers=self.providers, + configurations=self.configurations, + cluster_id=self.cluster_id) + ssh_handler.execute_ssh(floating_ip=self.master_ip, private_key=KEY_FOLDER + self.key_name, + username=self.ssh_user, + filepaths=[(aRP.PLAYBOOK_PATH, aRP.PLAYBOOK_PATH_REMOTE), + (biRP.BIN_PATH, biRP.BIN_PATH_REMOTE)], + commands=ssh_handler.ANSIBLE_START + + [ssh_handler.get_ac_command(self.providers[0], AC_NAME.format( + cluster_id=self.cluster_id))]) + + def start_start_instances_threads(self): + """ + Starts for each provider a start_instances thread and joins them. + :return: + """ + start_instances_threads = [] + for configuration, provider in zip(self.configurations, self.providers): + start_instances_thread = return_threading.ReturnThread(target=self.start_instances, + args=[configuration, provider]) + start_instances_thread.start() + start_instances_threads.append(start_instances_thread) + for start_instance_thread in start_instances_threads: + start_instance_thread.join() + + def create(self): + """ + Creates cluster and prints helpful cluster-info afterwards. + If debug is set True it offers termination after starting the cluster. + :return: exit_state + """ + self.generate_keypair() + try: + self.prepare_configurations() + self.start_start_instances_threads() + self.upload_data() + self.print_cluster_start_info() + if self.debug: + LOG.info("DEBUG MODE: Entering termination...") + terminate_cluster.terminate_cluster(cluster_id=self.cluster_id, providers=self.providers, + debug=self.debug) + except exceptions.ConnectionException: + LOG.error("Connection couldn't be established. Check Provider connection.") + except paramiko.ssh_exception.NoValidConnectionsError: + LOG.error("SSH connection couldn't be established. Check keypair.") + except KeyError as exc: + LOG.error(f"Tried to access dictionary key {str(exc)}, but couldn't. Please check your configurations.") + except FileNotFoundError as exc: + LOG.error(f"Tried to access resource files but couldn't. No such file or directory: {str(exc)}") + except TimeoutError as exc: + LOG.error(f"Timeout while connecting to master. Maybe you are trying to create a master without " + f"public ip " + f"while not being in the same network: {str(exc)}") + except ExecutionException as exc: + if self.debug: + LOG.error(traceback.format_exc()) + LOG.error(f"Execution of cmd on remote host fails: {str(exc)}") + except Exception as exc: # pylint: disable=broad-except + if self.debug: + LOG.error(traceback.format_exc()) + LOG.error(f"Unexpected error: '{str(exc)}' ({type(exc)}) Contact a developer!)") + else: + return 0 # will be called if no exception occurred + terminate_cluster.terminate_cluster(cluster_id=self.cluster_id, providers=self.providers, debug=self.debug) + return 1 + + def print_cluster_start_info(self): + """ + Prints helpful cluster-info: + SSH: How to connect to master via SSH + Terminate: What bibigrid2 command is needed to terminate the created cluster + Detailed cluster info: How to print detailed info about the created cluster + :return: + """ + print(f"Cluster {self.cluster_id} with master {self.master_ip} up and running!") + print(f"SSH: ssh -i '{KEY_FOLDER}{self.key_name}' {self.ssh_user}@{self.master_ip}") + print(f"Terminate cluster: ./bibigrid.sh -i '{self.config_path}' -t -cid {self.cluster_id}") + print(f"Detailed cluster info: ./bibigrid.sh -i '{self.config_path}' -l -cid {self.cluster_id}") + if self.configurations[0].get("ide"): + print(f"IDE Port Forwarding: ./bibigrid.sh -i '{self.config_path}' -ide -cid {self.cluster_id}") diff --git a/bibigrid2/core/actions/ide.py b/bibigrid2/core/actions/ide.py new file mode 100644 index 000000000..d1877a826 --- /dev/null +++ b/bibigrid2/core/actions/ide.py @@ -0,0 +1,95 @@ +""" +This module contains methods to establish port forwarding in order to access an ide (theia). +""" + +import logging +import random +import re +import signal +import subprocess +import sys +import time +import webbrowser +import sshtunnel + +from bibigrid2.core.utility.handler import cluster_ssh_handler + +DEFAULT_IDE_WORKSPACE = "${HOME}" +REMOTE_BIND_ADDRESS = 8181 +DEFAULT_IDE_PORT_END = 8383 +LOCAL_BIND_ADDRESS = 9191 +MAX_JUMP = 100 +LOCALHOST = "127.0.0.1" +LOG = logging.getLogger("bibigrid") + +def sigint_handler(caught_signal, frame): # pylint: disable=unused-argument + """ + Is called when SIGINT is thrown and terminates the program + @param caught_signal: + @param frame: + @return: 0 + """ + print("Exiting...") + sys.exit(0) +signal.signal(signal.SIGINT, sigint_handler) + + +def is_used(ip_address): + """ + https://stackoverflow.com/questions/62000168/how-to-check-if-ssh-tunnel-is-being-used + :return: + """ + ports_used = [] + with subprocess.Popen(["netstat", "-na"], stdout=subprocess.PIPE) as process: + out = process.stdout.read() + lines = out.decode('utf-8').split('\n') + for line in lines: + is_open = re.match(rf'tcp.*{ip_address}:([0-9][0-9]*).*ESTABLISHED\s*$', line) + if is_open is not None: + print(line) + ports_used.append(is_open[1]) + + +def ide(cluster_id, master_provider, master_configuration): + """ + Creates a port forwarding from LOCAL_BIND_ADDRESS to REMOTE_BIND_ADDRESS from localhost to master of specified + cluster + @param cluster_id: cluster_id or ip + @param master_provider: master's provider + @param master_configuration: master's configuration + @return: + """ + LOG.info("Starting port forwarding for ide") + master_ip, ssh_user, used_private_key = cluster_ssh_handler.get_ssh_connection_info(cluster_id, master_provider, + master_configuration) + used_local_bind_address = LOCAL_BIND_ADDRESS + if master_ip and ssh_user and used_private_key: + attempts = 0 + while attempts < 16: + attempts += 1 + try: + with sshtunnel.SSHTunnelForwarder( + ssh_address_or_host=master_ip, # Raspberry Pi in my network + + ssh_username=ssh_user, + ssh_pkey=used_private_key, + + local_bind_address=(LOCALHOST, used_local_bind_address), + remote_bind_address=(LOCALHOST, REMOTE_BIND_ADDRESS) + ) as server: + print("CTRL+C to close port forwarding when you are done.") + with server: + # opens in existing window if any default program exists + webbrowser.open(f"http://localhost:{used_local_bind_address}", new=2) + while True: + time.sleep(5) + except sshtunnel.HandlerSSHTunnelForwarderError: + used_local_bind_address += random.randint(1, MAX_JUMP) + LOG.info("Attempt: %s. Port in use... Trying new port %s", attempts, used_local_bind_address) + if not master_ip: + LOG.warning("Cluster id %s doesn't match an existing cluster with a master.", cluster_id) + if not ssh_user: + LOG.warning("No ssh user has been specified in the first configuration.") + if not used_private_key: + LOG.warning("No matching sshPublicKeyFiles can be found in the first configuration or in .bibigrid") + return 1 diff --git a/bibigrid2/core/actions/list_clusters.py b/bibigrid2/core/actions/list_clusters.py new file mode 100644 index 000000000..58f9924ae --- /dev/null +++ b/bibigrid2/core/actions/list_clusters.py @@ -0,0 +1,152 @@ +""" +This module contains methods to list all clusters or a specific cluster in a formatted, readable output. +This includes a method to create a dictionary containing all running clusters and their servers. +""" + +import logging +import pprint +import re + +from bibigrid2.core.actions import create + +SERVER_REGEX = re.compile(r"^bibigrid-((master)-([a-zA-Z0-9]+)|(worker|vpnwkr)\d+-([a-zA-Z0-9]+)-\d+)$") +LOG = logging.getLogger("bibigrid") + +def dict_clusters(providers): + """ + Creates a dictionary containing all servers by type and provider information + :param providers: list of all providers + :return: list of all clusters in yaml format + """ + LOG.info("Creating cluster dictionary...") + cluster_dict = {} + for provider in providers: + servers = provider.list_servers() + for server in servers: + result = SERVER_REGEX.match(server["name"]) + if result: + identifier = result.group(4) or result.group(2) + cluster_id = result.group(5) or result.group(3) + setup(cluster_dict, cluster_id, server, provider) + if identifier == "master": + cluster_dict[cluster_id][identifier] = server + else: + cluster_dict[cluster_id][identifier + "s"].append(server) + return cluster_dict # recursively converts munches in cluster_dict to dict + + +def setup(cluster_dict, cluster_id, server, provider): + """ + Determines cluster_id. + Generates empty entry for cluster_id in cluster_dict. + :param server: found server (dict) + :param cluster_id: id of said cluster + :param cluster_dict: dict containing all found servers by their cluster_id + :param provider: server's provider + :return: cluster_id + """ + if not cluster_dict.get(cluster_id): + cluster_dict[cluster_id] = {} + cluster_dict[cluster_id]["workers"] = [] + cluster_dict[cluster_id]["vpnwkrs"] = [] + server["provider"] = provider.NAME + server["cloud_specification"] = provider.cloud_specification["identifier"] + + +def print_list_clusters(cluster_id, providers): + """ + Calls dict_clusters and gives a visual representation of the found cluster. + Detail depends on whether a cluster_id is given or not. + :param cluster_id: + :param providers: + :return: + """ + cluster_dict = dict_clusters(providers=providers) + if cluster_id: # pylint: disable=too-many-nested-blocks + if cluster_dict.get(cluster_id): + LOG.info("Printing specific cluster_dictionary") + master_count, worker_count, vpn_count = get_size_overview(cluster_dict[cluster_id]) + print(f"\tCluster has {master_count} master, {vpn_count} vpnwkr and {worker_count} regular workers. " + f"The cluster is spread over {vpn_count + master_count} reachable provider(s).") + pprint.pprint(cluster_dict[cluster_id]) + else: + LOG.info("Cluster with cluster-id {cluster_id} not found.") + print(f"Cluster with cluster-id {cluster_id} not found.") + else: + LOG.info("Printing overview of cluster all clusters") + if cluster_dict: + for cluster_key_id, cluster_node_dict in cluster_dict.items(): + print(f"Cluster-ID: {cluster_key_id}") + master = cluster_node_dict.get('master') + if master: + for key in ["name", "user_id", "launched_at", "key_name", "public_v4", "public_v6", "provider"]: + value = cluster_node_dict['master'].get(key) + if value: + print(f"\t{key}: {value}") + security_groups = get_security_groups(cluster_node_dict) + print(f"\tsecurity_groups: {security_groups}") + networks = get_networks(cluster_node_dict) + print(f"\tnetwork: {pprint.pformat(networks)}") + else: + LOG.warning("No master for cluster: %s.", cluster_key_id) + master_count, worker_count, vpn_count = get_size_overview(cluster_node_dict) + print(f"\tCluster has {master_count} master, {vpn_count} vpnwkr and {worker_count} regular workers. " + f"The cluster is spread over {vpn_count + master_count} reachable provider(s).") + else: + print("No cluster found.") + return 0 + + +def get_size_overview(cluster_dict): + """ + :param cluster_dict: dictionary of cluster to size_overview + :return: number of masters, number of workers, number of vpns + """ + LOG.info("Printing size overview") + master_count = int(bool(cluster_dict.get("master"))) + worker_count = len(cluster_dict.get("workers") or "") + vpn_count = len(cluster_dict.get("vpnwkrs") or "") + return master_count, worker_count, vpn_count + + +def get_networks(cluster_dict): + """ + Gets all addresses of servers + :param cluster_dict: dictionary of clusters to find addresses + :return: dict containing addresses + """ + master = cluster_dict["master"] + addresses = [{master["provider"]: list(master["addresses"].keys())}] + for server in (cluster_dict.get("vpnwkrs") or []): + addresses.append({server["provider"]: list(server["addresses"].keys())}) + return addresses + + +def get_security_groups(cluster_dict): + """ + Gets all security group of servers + :param cluster_dict: dictionary of clusters to find security_groups + :return: dict containing security_groups + """ + master = cluster_dict["master"] + security_groups = [{master["provider"]: master["security_groups"]}] + for server in (cluster_dict.get("vpnwkrs") or []): + security_groups.append({server["provider"]: server["security_groups"]}) + return security_groups + + +def get_master_access_ip(cluster_id, master_provider): + """ + Returns master's ip of cluster cluster_id + :param master_provider: master's provider + :param cluster_id: Id of cluster + :return: public ip of master + """ + LOG.info("Finding master ip for cluster %s...", cluster_id) + servers = master_provider.list_servers() + for server in servers: + master = create.MASTER_IDENTIFIER(cluster_id=cluster_id) + if server["name"].startswith(master): + return server.get("public_v4") or server.get("public_v6") or server.get("private_v4") + LOG.warning("Cluster %s not found on master_provider %s.", cluster_id, master_provider) + return None diff --git a/bibigrid2/core/actions/terminate_cluster.py b/bibigrid2/core/actions/terminate_cluster.py new file mode 100644 index 000000000..67f744dc8 --- /dev/null +++ b/bibigrid2/core/actions/terminate_cluster.py @@ -0,0 +1,173 @@ +""" +This module contains methods to terminate a cluster. i.e. to delete all servers, keypairs (local and remote) +and application credentials used by it. +""" + +import logging +import os +import re + +from bibigrid2.core.actions import create +LOG = logging.getLogger("bibigrid") + +def terminate_cluster(cluster_id, providers, debug=False): + """ + Goes through all providers and gets info of all servers which name contains cluster ID. + It then checks if any resources are reserved, but not used and frees them that were hold by the cluster. + :param debug if set user gets asked before termination is executed + :param providers providers + :param cluster_id: ID of cluster to terminate + :return: VOID + """ + if debug: + if not input(f"DEBUG MODE: Any non-empty input to shutdown cluster {cluster_id}. " + "Empty input to exit with cluster still alive:"): + return 0 + cluster_server_state = [] + cluster_keypair_state = [] + tmp_keyname = create.KEY_NAME.format(cluster_id=cluster_id) + local_keypairs_deleted = delete_local_keypairs(tmp_keyname) + if local_keypairs_deleted or input(f"WARNING: No local temporary keyfiles found for cluster {cluster_id}. " + f"This might not be your cluster. Are you sure you want to terminate it?\n" + f"Any non-empty input to shutdown cluster {cluster_id}. " + f"Empty input to exit with cluster still alive:"): + for provider in providers: + LOG.info("Terminating cluster %s on on cloud %s", + cluster_id, provider.cloud_specification['identifier']) + server_list = provider.list_servers() + cluster_server_state += terminate_servers(server_list, cluster_id, provider) + cluster_keypair_state.append(delete_keypairs(provider, tmp_keyname)) + ac_state = delete_application_credentials(providers[0], cluster_id) + terminate_output(cluster_server_state, cluster_keypair_state, ac_state, cluster_id) + return 0 + + +def terminate_servers(server_list, cluster_id, provider): + """ + Terminates all servers in server_list that match the bibigrid regex. + @param server_list: list of server dicts. All servers are from provider + @param cluster_id: id of cluster to terminate + @param provider: provider that holds all servers in server_list + @return: a list of the servers' (that were to be terminated) termination states + """ + LOG.info("Deleting servers on provider %s...", provider.cloud_specification['identifier']) + cluster_server_state = [] + # ^(master-{cluster_id}|worker-{cluster_id}|worker-[0-9]+-[0-9]+-{cluster_id})$ + server_regex = re.compile(fr"^bibigrid-(master-{cluster_id}+|(worker|vpnwkr)\d+-{cluster_id}+-\d+)$") + for server in server_list: + if server_regex.match(server["name"]): + LOG.info("Trying to terminate Server %s on cloud %s.", + server['name'], provider.cloud_specification['identifier']) + cluster_server_state.append(terminate_server(provider, server)) + return cluster_server_state + + +def terminate_server(provider, server): + """ + Terminates a single server and stores the termination state + @param provider: the provider that holds the server + @param server: the server that is to be terminated + @return: true if the server has been terminated, false else + """ + terminated = provider.delete_server(server["id"]) + if not terminated: + LOG.warning("Unable to terminate server %s on provider %s.", + server['name'], provider.cloud_specification['identifier']) + else: + LOG.info("Server %s terminated on provider %s.", + server['name'], provider.cloud_specification['identifier']) + return terminated + + +def delete_keypairs(provider, tmp_keyname): + """ + Deletes keypairs from all provider + @param provider: provider to delete keypair from + @param tmp_keyname: BiBiGrid2 keyname + @return: True if keypair was deleted + """ + LOG.info("Deleting Keypair on provider %s...", provider.cloud_specification['identifier']) + deleted = provider.delete_keypair(tmp_keyname) + if deleted: + LOG.info("Keypair %s deleted on provider %s.", tmp_keyname, provider.cloud_specification['identifier']) + else: + LOG.warning("Unable to delete %s on provider %s.", tmp_keyname, provider.cloud_specification['identifier']) + return deleted + + +def delete_local_keypairs(tmp_keyname): + """ + Deletes local keypairs of a cluster + @param tmp_keyname: BiBiGrid2 keyname + @return: Returns true if at least one local keyfile (pub or private) was found + """ + success = False + LOG.info("Deleting Keypair locally...") + tmp_keypath = os.path.join(create.KEY_FOLDER, tmp_keyname) + pub_tmp_keypath = tmp_keypath + ".pub" + if os.path.isfile(tmp_keypath): + os.remove(tmp_keypath) + success = True + else: + LOG.warning(f"Unable to find private keyfile '{tmp_keypath}' locally. No local private keyfile deleted.") + if os.path.isfile(pub_tmp_keypath): + os.remove(pub_tmp_keypath) + success = True + else: + LOG.warning(f"Unable to find public keyfile '{pub_tmp_keypath}' locally. No local public keyfile deleted.") + return success + + +def delete_application_credentials(master_provider, cluster_id): + """ + Deletes application credentials from the master_provider + @param master_provider: provider that holds the master + @param cluster_id: + @return: True if no cluster credential remains on the provider. Else False. + """ + # implement deletion + auth = master_provider.cloud_specification["auth"] + if not auth.get("application_credential_id") or not auth.get("application_credential_secret"): + return master_provider.delete_application_credential_by_id_or_name(create.AC_NAME.format(cluster_id=cluster_id)) + LOG.info("Because you used application credentials to authenticate, " + "no created application credentials need deletion.") + return True + + +def terminate_output(cluster_server_state, cluster_keypair_state, ac_state, cluster_id): + """ + Logs the termination result in detail + @param cluster_server_state: list of bools. Each bool stands for a server termination + @param cluster_keypair_state: list of bools. Each bool stands for a keypair deletion + @param ac_state: bool that stands for the deletion of the credentials on the master + @param cluster_id: + @return: + """ + cluster_existed = bool(cluster_server_state) + cluster_server_terminated = all(cluster_server_state) + cluster_keypair_deleted = all(cluster_keypair_state) + if cluster_existed: + if cluster_server_terminated: + LOG.info("Terminated all servers of cluster %s.", cluster_id) + else: + LOG.warning("Unable to terminate all servers of cluster %s.", cluster_id) + if cluster_keypair_deleted: + LOG.info("Deleted all keypairs of cluster %s.", cluster_id) + else: + LOG.warning("Unable to delete all keypairs of cluster %s.", cluster_id) + if cluster_server_terminated and cluster_keypair_deleted: + out = f"Successfully terminated cluster {cluster_id}." + LOG.info(out) + print(out) + else: + LOG.warning("Unable to terminate cluster %s properly." + "\nAll servers terminated: %s\nAll keys deleted: %s", + cluster_id, cluster_server_terminated, cluster_keypair_deleted) + if ac_state: + LOG.info("Successfully handled application credential of cluster %s.", cluster_id) + else: + LOG.warning("Unable to delete application credential of cluster %s", cluster_id) + else: + LOG.warning("Unable to find any servers for cluster-id %s. " + "Check cluster-id and configuration.\nAll keys deleted: %s", + cluster_id, cluster_keypair_deleted) diff --git a/bibigrid2/core/actions/update.py b/bibigrid2/core/actions/update.py new file mode 100644 index 000000000..091e39300 --- /dev/null +++ b/bibigrid2/core/actions/update.py @@ -0,0 +1,27 @@ +""" +Module that contains methods to update the master playbook +""" + +import logging + +from bibigrid2.core.utility import ansible_commands as aC +from bibigrid2.core.utility.handler import ssh_handler +from bibigrid2.core.utility.paths import ansible_resources_path as aRP +from bibigrid2.core.utility.paths import bin_path as biRP +from bibigrid2.core.utility.handler import cluster_ssh_handler + +LOG = logging.getLogger("bibigrid") + +def update(cluster_id, master_provider, master_configuration): + LOG.info("Starting update...") + master_ip, ssh_user, used_private_key = cluster_ssh_handler.get_ssh_connection_info(cluster_id, master_provider, + master_configuration) + if master_ip and ssh_user and used_private_key: + LOG.info("Trying to update %s@%s", master_ip, ssh_user) + ssh_handler.execute_ssh(floating_ip=master_ip, private_key=used_private_key, username=ssh_user, + commands=[aC.EXECUTE], + filepaths=[(aRP.PLAYBOOK_PATH, aRP.PLAYBOOK_PATH_REMOTE), + (biRP.BIN_PATH, biRP.BIN_PATH_REMOTE)]) + return 0 + + return 1 diff --git a/bibigrid2/core/actions/version.py b/bibigrid2/core/actions/version.py new file mode 100644 index 000000000..0ddbdb45d --- /dev/null +++ b/bibigrid2/core/actions/version.py @@ -0,0 +1,6 @@ +""" +Contains the static variable __version__ which holds the current version number. +https://www.akeeba.com/how-do-version-numbers-work.html +""" + +__version__ = "0.2.0" diff --git a/bibigrid2/core/provider.py b/bibigrid2/core/provider.py new file mode 100644 index 000000000..1c50c8bb6 --- /dev/null +++ b/bibigrid2/core/provider.py @@ -0,0 +1,210 @@ +""" +Holds the abstract class Provider +""" + + +class Provider: # pylint: disable=too-many-public-methods + """ + See in detailed return value information in tests>provider>test_Provider. + Make sure to register your newly implemented provider in provider_handler: name:class + This will automatically register it for testing when startupTests main is called. + """ + NAME = "Provider" + + class QuotaExceededException(Exception): + """ + Just a renamed Exception. + """ + + def __init__(self, cloud_specification): + """ + Call necessary methods to create a connection and save cloud_specification data as needed. + """ + self.cloud_specification = cloud_specification # contains sensitive information! + self.cloud_specification["identifier"] = self.cloud_specification.get('profile') or self.cloud_specification[ + 'auth'].get('project_id') or self.cloud_specification["auth"].get('application_credential_id') or "Unknown" + + def create_application_credential(self, name=None): + """ + Creates an application credential with name name + :param name: Name of new application credential + :return: the application credential dictionary + """ + + def delete_application_credential_by_id_or_name(self, ac_id_or_name): + """ + Deletes existing application credential by id or name and returns true. + If application credential not found it returns false. + :param ac_id_or_name: application credential id or name + :return: True if deleted else false + """ + + def get_image_by_id_or_name(self, image_id_or_name): + """ + Returns image that has id or name image_id_or_name + :param image_id_or_name: identifier + :return: said image (dict) or none if not found + """ + + def get_flavor(self, instance_type): + """ + Returns flavor that has id or name flavor_id_or_name + :param instance_type: identifier + :return: said flavor (dict) or none if not found + """ + + def get_volume_snapshot_by_id_or_name(self, snapshot_id_or_name): + """ + Returns snapshot that has id or name snapshot_id_or_name + :param snapshot_id_or_name: identifier + :return: said snapshot (dict) or none if not found + """ + + def get_network_by_id_or_name(self, network_id_or_name): + """ + Returns network that has id or name network_id_or_name + :param network_id_or_name: identifier + :return: said network (dict) or none if not found + """ + + def get_subnet_by_id_or_name(self, subnet_id_or_name): + """ + Returns subnet that has id or name subnet_id_or_name + :param subnet_id_or_name: identifier + :return: said subnet (dict) or none if not found + """ + + def list_servers(self): + """ + Returns a list of all servers on logged in provider + :return: said list of servers or empty list if none found + """ + + def create_server(self, name, flavor, image, network, key_name=None, wait=True, volumes=None): # pylint: disable=too-many-arguments + """ + Creates a new server and waits for it to be accessible if wait=True. If volumes are given, they are attached. + Returns said server (dict) + :param name: name (str) + :param flavor: flavor/type (str) + :param image: image/bootable-medium (str) + :param network: network (str) + :param key_name: (str) + :param wait: (bool) + :param volumes: List of volumes (list (str)) + :return: server (dict) + """ + + def delete_server(self, name_or_id, delete_ips=True): + """ + Deletes server and floating_ip as well if delete_ips is true. The resource is then free again + :param name_or_id: + :param delete_ips: + :return: True if delete succeeded, False otherwise + """ + + def delete_keypair(self, key_name): + """ + Deletes keypair with key_name + :param key_name: (str) + :return: True if delete succeeded, False otherwise + """ + + def get_server_group_by_id_or_name(self, server_group_id_or_name): + """ + Returns server_group that has id or name server_group_id_or_name + :param server_group_id_or_name: identifier + :return: said server_group (dict) or none if not found + """ + + def close(self): + """ + Closes connection + :return: + """ + + def create_keypair(self, name, public_key): + """ + Creates a new keypair with name name and public_key public_key + :param name: name of new keypair + :param public_key: public_key of new keypair + :return: + """ + + def get_network_id_by_subnet(self, subnet): + """ + Gets network_id by subnet + :param subnet: id (str) + :return: (str) + """ + + def get_subnet_ids_by_network(self, network): + """ + Gets subnet_ids (list (str)) by network_id + :param network: id (str) + :return: subnet_ids (list (str)) + """ + + def get_free_resources(self): + """ + Gets free resources. If a resource cannot be determined, assume maximum is free. + :return: Dictionary containing the free resources + """ + + def get_volume_by_id_or_name(self, name_or_id): + """ + Returns volume that has id or name name_or_id + :param name_or_id: identifier + :return: said volume (dict) or none if not found + """ + + def create_volume_from_snapshot(self, snapshot_name_or_id): + """ + Creates a volume from snapshot. + :param snapshot_name_or_id: name or id of snapshot + :return: id of created volume or none if failed + """ + + def get_external_network(self, network_name_or_id): + """ + Finds router interface with network id equal to given network and by that the external network. + :param network_name_or_id: Name or id of network + :return: Corresponding external network + """ + + def add_auto_ip(self, server, wait=False, timeout=60, reuse=True): + """ + Add a floating IP to a server. + Will reuse floating ips or create a new one if no floating-ip is down. + :param server: the server that said floating ip will be attached to + :param wait: wait for floating-ip to be assigned + :param timeout: when to accept failing + :param reuse: if False will just create a new floating-ip and not reuse an existing down one + :return: the floating-ip + """ + + def attach_available_floating_ip(self, network=None, server=None): + """ + Get a floating IP from a network or a pool and attach it to the server + :param network: + :param server: + :return: + """ + + def get_images(self): + """ + Get a generator able ot generate all images + @return: A generator able ot generate all images + """ + + def get_flavors(self): + """ + Get a generator able ot generate all flavors + @return: A generator able ot generate all flavors + """ + + def get_active_images(self): + return [image["name"] for image in self.get_images() if image["status"].lower() == "active"] + + def get_active_flavors(self): + return [flavor["name"] for flavor in self.get_flavors() + if "legacy" not in flavor["name"].lower() and "deprecated" not in flavor["name"].lower()] diff --git a/bibigrid2/core/startup.py b/bibigrid2/core/startup.py new file mode 100755 index 000000000..7973d2ca5 --- /dev/null +++ b/bibigrid2/core/startup.py @@ -0,0 +1,139 @@ +""" +Contains main method. Interprets command line, sets logging and starts corresponding action. +""" +import logging +import math +import os +import sys +import time +import traceback + +import yaml + +from bibigrid2.core.actions import check, create, ide, list_clusters, terminate_cluster, update, version +from bibigrid2.core.utility import command_line_interpreter +from bibigrid2.core.utility.handler import configuration_handler, provider_handler + +LOGGING_HANDLER_LIST = [logging.StreamHandler(), logging.FileHandler("bibigrid2.log")] # stdout and to file +VERBOSITY_LIST = [logging.WARNING, logging.INFO, logging.DEBUG] +LOGGER_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + +LOG = logging.getLogger("bibigrid") + + +def get_cluster_id_from_mem(): + """ + Reads the cluster_id of the last created cluster and returns it. Used if no cluster_id is given. + + @return: cluster_id. If no mem file can be found, the file is not a valid yaml file or doesn't contain a cluster_id, + it returns none. + """ + if os.path.isfile(create.CLUSTER_MEMORY_PATH): + try: + with open(create.CLUSTER_MEMORY_PATH, mode="r", encoding="UTF-8") as cluster_memory_file: + mem_dict = yaml.safe_load(stream=cluster_memory_file) + return mem_dict.get("cluster_id") + except yaml.YAMLError as exc: + LOG.warning("Couldn't read configuration %s: %s", create.CLUSTER_MEMORY_PATH, exc) + return None + + +def set_logger(verbosity): + """ + Sets verbosity, format and handler. + :param verbosity: level of verbosity + :return: + """ + + capped_verbosity = min(verbosity, len(VERBOSITY_LIST) - 1) + # LOG.basicConfig(format=LOGGER_FORMAT, level=VERBOSITY_LIST[capped_verbosity], + # handlers=LOGGING_HANDLER_LIST) + logging.basicConfig(format=LOGGER_FORMAT, handlers=LOGGING_HANDLER_LIST) + + log = logging.getLogger("bibigrid") + log.setLevel(VERBOSITY_LIST[capped_verbosity]) + + log.debug(f"Logging verbosity set to {capped_verbosity}") + + +def run_action(args, configurations, config_path): # pylint: disable=too-many-nested-blocks,too-many-branches + """ + Uses args to decide which action will be executed and executes said action. + :param args: command line arguments + :param configurations: list of configurations (dicts) + :param config_path: path to configurations-file + :return: + """ + if args.version: + LOG.info("Action version selected") + print(version.__version__) + return 0 + + start_time = time.time() + exit_state = 0 + try: + providers = provider_handler.get_providers(configurations) + if providers: + if args.list_clusters: + LOG.info("Action list_clusters selected") + exit_state = list_clusters.print_list_clusters(args.cluster_id, providers) + elif args.check: + LOG.info("Action check selected") + exit_state = check.check(configurations, providers) + elif args.create: + LOG.info("Action create selected") + creator = create.Create(providers=providers, + configurations=configurations, + debug=args.debug, + config_path=config_path) + print("Creating a new cluster takes about 10 or more minutes depending on your cloud provider " + "and your configuration. Be patient.") + exit_state = creator.create() + else: + if not args.cluster_id: + args.cluster_id = get_cluster_id_from_mem() + LOG.info("No cid (cluster_id) specified. Defaulting to last created cluster: %s", + args.cluster_id or 'None found') + if args.cluster_id: + if args.terminate_cluster: + LOG.info("Action terminate_cluster selected") + exit_state = terminate_cluster.terminate_cluster(args.cluster_id, providers, args.debug) + elif args.ide: + LOG.info("Action ide selected") + exit_state = ide.ide(args.cluster_id, providers[0], configurations[0]) + elif args.update: + LOG.info("Action update selected") + exit_state = update.update(args.cluster_id, providers[0], configurations[0]) + else: + LOG.warning("Please make use of -cid .") + for provider in providers: + provider.close() + else: + exit_state = 1 + except Exception as err: # pylint: disable=broad-except + if args.debug: + traceback.print_exc() + else: + LOG.error(err) + exit_state = 2 + time_in_s = time.time() - start_time + print(f"--- {math.floor(time_in_s / 60)} minutes and {time_in_s % 60} seconds ---") + return exit_state + + +def main(): + """ + Interprets command line, sets logger, reads configuration and runs selected action. Then exits. + :return: + """ + + args = command_line_interpreter.interpret_command_line() + set_logger(args.verbose) + configurations = configuration_handler.read_configuration(args.config_input) + if configurations: + sys.exit(run_action(args, configurations, args.config_input)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/bibigrid2/core/utility/ansible_commands.py b/bibigrid2/core/utility/ansible_commands.py new file mode 100644 index 000000000..c84030d87 --- /dev/null +++ b/bibigrid2/core/utility/ansible_commands.py @@ -0,0 +1,58 @@ +""" +Module containing a bunch of useful commands to be used by sshHandler.py for cluster setup +""" + +import os +import bibigrid2.core.utility.paths.ansible_resources_path as aRP + +#TO_LOG = "| sudo tee -a /var/log/ansible.log" +#AIY = "apt-get -y install" +#SAU = "sudo apt-get update" +#NO_KEY_CHECK = "export ANSIBLE_HOST_KEY_CHECKING=False" +NO_UPDATE = ("""sudo sed -i 's/APT::Periodic::Unattended-Upgrade "1";/APT::Periodic::Unattended-Upgrade "0";/g' """ + """/etc/apt/apt.conf.d/20auto-upgrades""", "Disable apt auto update.") +# Setup (Python for everyone) +# UPDATE = f"sudo {AU} {TO_LOG}" +# PIP = f"sudo pip3 install --upgrade pip {TO_LOG}" +# SETUPTOOLS = "sudo pip3 install setuptools" +# LOG = "export ANSIBLE_LOG_PATH=~/ansible.log" +WAIT_READY = ('while sudo lsof /var/lib/dpkg/lock 2> null; do echo "/var/lib/dpkg/lock locked - wait for 10 seconds"; ' + 'sleep 10; done', "Wait for dpkg lock removed.") +# SLEEP_10 = "sleep 10s" +# RANDOM = "sudo DEBIAN_FRONTEND=noninteractive apt-get --yes install apt-transport-https ca-certificates " \ +# "software-properties-common python3 python3-pip libffi-dev libssl-dev" +# PYTHON_WORKERS = f'ansible workers -i "{aRP.HOSTS_CONFIG_FILE_REMOTE}" --become -m raw -a "{SAU} && {AIY} python3' \ +# f'"' + +# Test Ansible +# PING = (f'ansible -i "{aRP.HOSTS_CONFIG_FILE_REMOTE}" all -m ping',"Ping all hosts using ansible.") +# OK = ('if [ $? -eq 0 ]; then echo "Ansible configuration seems to work properly."; ' +# 'else echo"Ansible hosts not reachable. There seems to be a misconfiguration."; fi',"Check for ") + +# Run ansible-galaxy to install ansible-galaxy roles from galaxy, git or url (.tar.gz) +# GALAXY = f"ansible-galaxy install --roles-path {aRP.ADDITIONAL_ROLES_ROOT_PATH_REMOTE} -r {aRP.REQUIREMENTS_YML}" + +# Extract ansible roles from files (.tar.gz, .tgz) +# EXTRACT = f"for f in $(find /tmp/roles -type f -regex '.*\\.t\\(ar\\.\\)?gz'); " \ +# f"do tar -xzf $f -C {aRP.ADDITIONAL_ROLES_ROOT_PATH_REMOTE}; done" + +# Fix line endings for all text based ansible file to ensure windows files being used correctly +# GET_ASCII_FILES = "files=$(for f in $( find ~/playbook -type f); do file ${f} | grep ASCII | cut -f 1 -d ':'; done;)" +# REPLACE_ENDINGS = "for file in ${file}; do sed -i 's/\\r$//' \"${file}\"; done" + +# Utility +ADD_PLAYBOOK_TO_LINUX_HOME = ("ln -s /opt/playbook ~/playbook", "Link /opt/playbook to ~/playbook.") + +# Execute +PLAYBOOK_HOME = ("sudo mkdir -p /opt/playbook", "Create playbook home.") +PLAYBOOK_HOME_RIGHTS = ("sudo chown ubuntu:ubuntu /opt/playbook", "Adjust playbook home permission.") +MV_ANSIBLE_CONFIG = ( + "sudo install -D /opt/playbook/ansible.cfg /etc/ansible/ansible.cfg", "Move ansible configuration.") +EXECUTE = (f"ansible-playbook {os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.SITE_YML)} -i " + f"{os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.ANSIBLE_HOSTS)} -l master", + "Execute ansible playbook. Be patient.") + +# ansible setup +UPDATE = ("sudo apt-get update", "Update apt repository lists.") +PYTHON3_PIP = "sudo apt-get install -y python3-pip", "Install python3 pip using apt." +ANSIBLE_PASSLIB = ("sudo pip install ansible==6.6 passlib", "Install Ansible and Passlib using pip.") diff --git a/bibigrid2/core/utility/ansible_configurator.py b/bibigrid2/core/utility/ansible_configurator.py new file mode 100644 index 000000000..0bce57c1e --- /dev/null +++ b/bibigrid2/core/utility/ansible_configurator.py @@ -0,0 +1,305 @@ +""" +Prepares ansible files (vars, common_configuration, ...) +""" + +import logging + +import mergedeep +import yaml + +from bibigrid2.core.actions import create +from bibigrid2.core.actions import ide +from bibigrid2.core.actions import list_clusters +from bibigrid2.core.utility.handler import configuration_handler +from bibigrid2.core.utility import id_generation +from bibigrid2.core.utility.paths import ansible_resources_path as aRP +from bibigrid2.core.utility import yaml_dumper + +DEFAULT_NFS_SHARES = ["/vol/spool"] +ADDITIONAL_PATH = "additional/" +PYTHON_INTERPRETER = "/usr/bin/python3" +MASTER_ROLES = [{"role": "bibigrid", "tags": ["bibigrid", "bibigrid-master"]}] +WORKER_ROLES = [{"role": "bibigrid", "tags": ["bibigrid", "bibigrid-worker"]}] +VARS_FILES = [aRP.INSTANCES_YML, aRP.CONFIG_YML] +IDE_CONF = {"ide": False, "workspace": ide.DEFAULT_IDE_WORKSPACE, "port_start": ide.REMOTE_BIND_ADDRESS, + "port_end": ide.DEFAULT_IDE_PORT_END, "build": False} +ZABBIX_CONF = {"db": "zabbix", "db_user": "zabbix", "db_password": "zabbix", "timezone": "Europe/Berlin", + "server_name": "bibigrid", "admin_password": "bibigrid"} +SLURM_CONF = {"db": "slurm", "db_user": "slurm", "db_password": "changeme", + "munge_key": id_generation.generate_munge_key(), + "elastic_scheduling": {"SuspendTime": 3600, "ResumeTimeout": 900, "TreeWidth": 128}} +LOG = logging.getLogger("bibigrid") + +def generate_site_file_yaml(custom_roles): + """ + Generates site_yaml (dict). + Deepcopy is used in case roles might differ between servers in the future. + :param custom_roles: ansibleRoles given by the config + :return: site_yaml (dict) + """ + site_yaml = [{'hosts': 'master', "become": "yes", + "vars_files": VARS_FILES, "roles": MASTER_ROLES}, + {"hosts": "workers", "become": "yes", "vars_files": VARS_FILES, + "roles": WORKER_ROLES}] # , + # {"hosts": "vpnwkr", "become": "yes", "vars_files": copy.deepcopy(VARS_FILES), + # "roles": ["common", "vpnwkr"]}] + # add custom roles and vars + for custom_role in custom_roles: + VARS_FILES.append(custom_role["vars_file"]) + MASTER_ROLES.append(ADDITIONAL_PATH + custom_role["name"]) + WORKER_ROLES.append(ADDITIONAL_PATH + custom_role["name"]) + return site_yaml + + +def generate_instances_yaml(cluster_dict, configuration, provider, cluster_id): # pylint: disable=too-many-locals + """ + ToDo filter what information really is necessary. Determined by further development + Filters unnecessary information + :param cluster_dict: cluster_dict to get the information from + :param configuration: configuration of master cloud ToDo needs to be list in the future + :param provider: provider of master cloud ToDo needs to be list in the future + :param cluster_id: To get proper naming + :return: filtered information (dict) + """ + LOG.info("Generating instances file...") + workers = [] + flavor_keys = ["name", "ram", "vcpus", "disk", "ephemeral"] + for index, worker in enumerate(configuration.get("workerInstances", [])): + flavor = provider.get_flavor(worker["type"]) + flavor_dict = {key: flavor[key] for key in flavor_keys} + image = worker["image"] + network = configuration["network"] + worker_range = "[0-{}]" + name = create.WORKER_IDENTIFIER(worker_group=index, cluster_id=cluster_id, + additional=worker_range.format(worker.get('count', 1) - 1)) + regexp = create.WORKER_IDENTIFIER(worker_group=index, cluster_id=cluster_id, + additional=r"\d+") + workers.append({"name": name, "regexp": regexp, "image": image, "network": network, "flavor": flavor_dict}) + master = {key: cluster_dict["master"][key] for key in + ["name", "private_v4", "public_v4", "public_v6", "cloud_specification"]} + master["flavor"] = {key: cluster_dict["master"]["flavor"][key] for key in flavor_keys} + return {"master": master, "workers": workers} + + +def pass_through(dict_from, dict_to, key_from, key_to=None): + """ + If key is defined in dict_from, set key of dict_to to value of corresponding value of dict_from. Happens in place. + @param key_from: + @param key_to: + @param dict_from: + @param dict_to: + @return: + """ + if not key_to: + key_to = key_from + if dict_from.get(key_from): + dict_to[key_to] = dict_from[key_from] + + +def generate_common_configuration_yaml(cidrs, configuration, cluster_id, ssh_user, default_user): + """ + Generates common_configuration yaml (dict) + :param cidrs: str subnet cidrs (provider generated) + :param configuration: master configuration (first in file) + :param cluster_id: Id of cluster + :param ssh_user: user for ssh connections + :param default_user: Given default user + :return: common_configuration_yaml (dict) + """ + LOG.info("Generating common configuration file...") + # print(configuration.get("slurmConf", {})) + common_configuration_yaml = {"cluster_id": cluster_id, "cluster_cidrs": cidrs, + "default_user": default_user, + "local_fs": configuration.get("localFS", False), + "local_dns_lookup": configuration.get("localDNSlookup", False), + "use_master_as_compute": configuration.get("useMasterAsCompute", True), + "enable_slurm": configuration.get("slurm", False), + "enable_zabbix": configuration.get("zabbix", False), + "enable_nfs": configuration.get("nfs", False), + "enable_ide": configuration.get("ide", False), + "slurm": configuration.get("slurm", True), "ssh_user": ssh_user, + "slurm_conf": mergedeep.merge({}, SLURM_CONF, configuration.get("slurmConf", {}), + strategy=mergedeep.Strategy.TYPESAFE_REPLACE) + } + if configuration.get("nfs"): + nfs_shares = configuration.get("nfsShares", []) + nfs_shares = nfs_shares + DEFAULT_NFS_SHARES + common_configuration_yaml["nfs_mounts"] = [{"src": "/" + nfs_share, "dst": "/" + nfs_share} + for nfs_share in nfs_shares] + common_configuration_yaml["ext_nfs_mounts"] = [{"src": ext_nfs_share, "dst": ext_nfs_share} for + ext_nfs_share in (configuration.get("extNfsShares", []))] + + if configuration.get("ide"): + common_configuration_yaml["ide_conf"] = mergedeep.merge({}, IDE_CONF, configuration.get("ideConf", {}), + strategy=mergedeep.Strategy.TYPESAFE_REPLACE) + if configuration.get("zabbix"): + common_configuration_yaml["zabbix_conf"] = mergedeep.merge({}, ZABBIX_CONF, configuration.get("zabbixConf", {}), + strategy=mergedeep.Strategy.TYPESAFE_REPLACE) + + for from_key, to_key in [("waitForServices", "wait_for_services"), ("ansibleRoles", "ansible_roles"), + ("ansibleGalaxyRoles", "ansible_galaxy_roles")]: + pass_through(configuration, common_configuration_yaml, from_key, to_key) + return common_configuration_yaml + + +def generate_ansible_hosts_yaml(ssh_user, configuration, cluster_id): + """ + Generates ansible_hosts_yaml (inventory file). + :param ssh_user: str global SSH-username + :param configuration: dict + :param cluster_id: id of cluster + :return: ansible_hosts yaml (dict) + """ + LOG.info("Generating ansible hosts file...") + ansible_hosts_yaml = {"master": {"hosts": {"localhost": to_instance_host_dict(ssh_user)}}, + "workers": {"hosts": {}, "children": {"ephemeral": {"hosts": {}}}} + } + # vpnwkr are handled like workers on this level + workers = ansible_hosts_yaml["workers"] + for index, worker in enumerate(configuration.get("workerInstances", [])): + name = create.WORKER_IDENTIFIER(worker_group=index, cluster_id=cluster_id, + additional=f"[0:{worker.get('count', 1) - 1}]") + worker_dict = to_instance_host_dict(ssh_user, ip="", local=False) + if "ephemeral" in worker["type"]: + workers["children"]["ephemeral"]["hosts"][name] = worker_dict + else: + workers["hosts"][name] = worker_dict + return ansible_hosts_yaml + + +def to_instance_host_dict(ssh_user, ip="localhost", local=True): # pylint: disable=invalid-name + """ + Generates host entry + :param ssh_user: str global SSH-username + :param ip: str ip + :param local: bool + :return: host entry (dict) + """ + host_yaml = {"ansible_connection": "local" if local else "ssh", + "ansible_python_interpreter": PYTHON_INTERPRETER, + "ansible_user": ssh_user} + if ip: + host_yaml["ip"] = ip + return host_yaml + + +def get_cidrs(configurations, providers): + """ + Gets cidrs of all subnets in all providers + :param configurations: list of configurations (dict) + :param providers: list of providers + :return: + """ + all_cidrs = [] + for provider, configuration in zip(providers, configurations): + provider_cidrs = {"provider": type(provider).__name__, "provider_cidrs": []} + if isinstance(configuration["subnet"], list): + for subnet_id_or_name in configuration["subnet"]: + subnet = provider.get_subnet_by_id_or_name(subnet_id_or_name) + provider_cidrs["provider_cidrs"].append(subnet["cidr"]) # check key again + else: + subnet = provider.get_subnet_by_id_or_name(configuration["subnet"]) + provider_cidrs["provider_cidrs"].append(subnet["cidr"]) + all_cidrs.append(provider_cidrs) + return all_cidrs + + +def get_ansible_roles(ansible_roles): + """ + Checks if ansible_roles have all necessary values and returns True if so. + :param ansible_roles: ansible_roles from master configuration (first configuration) + :return: list of valid ansible_roles + """ + ansible_roles_yaml = [] + for ansible_role in (ansible_roles or []): + if ansible_role.get("file") and ansible_role.get("hosts"): + ansible_role_dict = {"file": ansible_role["file"], "hosts": ansible_role["hosts"]} + for key in ["name", "vars", "vars_file"]: + if ansible_role.get(key): + ansible_role_dict[key] = ansible_role[key] + ansible_roles_yaml.append(ansible_role_dict) + else: + LOG.warning("Ansible role %s had neither galaxy,git nor url. Not added.", ansible_role) + return ansible_roles_yaml + + +def get_ansible_galaxy_roles(ansible_galaxy_roles): + """ + Checks if ansible_galaxy_role have all necessary values and adds it to the return list if so. + :param ansible_galaxy_roles: + :return: list of valid ansible_galaxy_roles + """ + ansible_galaxy_roles_yaml = [] + for ansible_galaxy_role in (ansible_galaxy_roles or []): + if ansible_galaxy_role.get("galaxy") or ansible_galaxy_role.get("git") or ansible_galaxy_role.get("url"): + ansible_galaxy_role_dict = {"hosts": ansible_galaxy_role["hosts"]} + for key in ["name", "galaxy", "git", "url", "vars", "vars_file"]: + if ansible_galaxy_role.get(key): + ansible_galaxy_role_dict[key] = ansible_galaxy_role[key] + ansible_galaxy_roles_yaml.append(ansible_galaxy_role_dict) + else: + LOG.warning("Galaxy role %s had neither galaxy,git nor url. Not added.", ansible_galaxy_role) + return ansible_galaxy_roles_yaml + + +def generate_worker_specification_file_yaml(configurations): + """ + Generates worker_specification_file_yaml + :param configurations: list of configurations (dict) + :return: worker_specification_yaml + """ + LOG.info("Generating worker specification file...") + worker_groups_list = configuration_handler.get_list_by_key(configurations, "workerInstances", False) + # create.prepare_configuration guarantees that key is set + network_list = configuration_handler.get_list_by_key(configurations, "network", False) + worker_specification_yaml = [] + for worker_groups_provider_list, network in zip(worker_groups_list, network_list): + for worker_group in worker_groups_provider_list: + worker_specification_yaml.append({"TYPE": worker_group["type"], + "IMAGE": worker_group["image"], + "NETWORK": network}) + return worker_specification_yaml + + +def write_yaml(path, generated_yaml, alias=False): + """ + Writes generated_yaml to file path with or without alias + @param path: + @param generated_yaml: + @param alias: + @return: + """ + LOG.debug("Writing yaml %s", path) + with open(path, mode="w+", encoding="UTF-8") as file: + if alias: + yaml.safe_dump(data=generated_yaml, stream=file) + else: + yaml.dump(data=generated_yaml, stream=file, Dumper=yaml_dumper.NoAliasSafeDumper) + + +def configure_ansible_yaml(providers, configurations, cluster_id): + """ + Generates and writes all ansible-configuration-yaml files. + :param providers: list of providers + :param configurations: list of configurations (dict) + :param cluster_id: id of cluster to create + :return: + """ + LOG.info("Writing ansible files...") + alias = configurations[0].get("aliasDumper", False) + cluster_dict = list_clusters.dict_clusters(providers)[cluster_id] + ansible_roles = get_ansible_roles(configurations[0].get("ansibleRoles")) + default_user = providers[0].cloud_specification["auth"].get("username", configurations[0].get("sshUser", "Ubuntu")) + for path, generated_yaml in [ + (aRP.WORKER_SPECIFICATION_FILE, generate_worker_specification_file_yaml(configurations)), + (aRP.COMMONS_CONFIG_FILE, generate_common_configuration_yaml(cidrs=get_cidrs(configurations, providers), + configuration=configurations[0], + cluster_id=cluster_id, + ssh_user=configurations[0]["sshUser"], + default_user=default_user)), + (aRP.COMMONS_INSTANCES_FILE, generate_instances_yaml(cluster_dict, configurations[0], + providers[0], cluster_id)), + (aRP.HOSTS_CONFIG_FILE, generate_ansible_hosts_yaml(configurations[0]["sshUser"], configurations[0], + cluster_id)), + (aRP.SITE_CONFIG_FILE, generate_site_file_yaml(ansible_roles))]: + write_yaml(path, generated_yaml, alias) diff --git a/bibigrid2/core/utility/command_line_interpreter.py b/bibigrid2/core/utility/command_line_interpreter.py new file mode 100644 index 000000000..b057bb82b --- /dev/null +++ b/bibigrid2/core/utility/command_line_interpreter.py @@ -0,0 +1,44 @@ +""" +Has necessary methods and variables to interpret the command line +""" + +import argparse +import os + +STANDARD_CONFIG_INPUT_PATH = os.path.expanduser("~/.config/bibigrid") +FOLDER_START = ("~/", "/") + + +def interpret_command_line(): + """ + Interprets commandline. Used in startup.py + :return: + """ + parser = argparse.ArgumentParser(description='Bibigrid2 sets up cluster easily inside a cloud environment') + parser.add_argument("-v", "--verbose", action="count", default=0, + help="Increases logging verbosity. `-v` adds more info to the logfile, " + "`-vv` adds debug information to the logfile.") + parser.add_argument("-d", "--debug", action='store_true', help="Keeps cluster active. Asks before shutdown. " + "Offers termination after create") + parser.add_argument("-i", "--config_input", metavar="", help="Path to YAML configurations file. " + "Relative paths can be used and start " + "at ~/.config/bibigrid", required=True, + type=lambda s: s if s.startswith(FOLDER_START) else os.path.join(STANDARD_CONFIG_INPUT_PATH, s)) + parser.add_argument("-cid", "--cluster_id", metavar="", type=str, default="", + help="Cluster id is needed for ide and termination") + + actions = parser.add_mutually_exclusive_group(required=True) + actions.add_argument("-V", "--version", action='store_true', help="Displays version") + actions.add_argument("-t", "--terminate_cluster", action='store_true', + help="Terminates cluster. Needs cluster-id set.") + actions.add_argument("-c", "--create", action='store_true', help="Creates cluster") + actions.add_argument("-l", "--list_clusters", action='store_true', + help="Lists all running clusters. If cluster-id is set, will list this cluster in detail only") + actions.add_argument("-ch", "--check", action='store_true', help="Validates cluster configuration") + actions.add_argument("-ide", "--ide", action='store_true', + help="Establishes a secured connection to ide. Needs cluster-id set") + actions.add_argument("-u", "--update", action='store_true', help="Updates master's playbook. " + "Needs cluster-id set, no job running " + "and no workers up") + args = parser.parse_args() + return args diff --git a/bibigrid2/core/utility/handler/cluster_ssh_handler.py b/bibigrid2/core/utility/handler/cluster_ssh_handler.py new file mode 100644 index 000000000..78500ade0 --- /dev/null +++ b/bibigrid2/core/utility/handler/cluster_ssh_handler.py @@ -0,0 +1,40 @@ +""" +This module gets information about ssh connection. +""" + +import logging +import os + +from bibigrid2.core.actions import create, list_clusters + +LOG = logging.getLogger("bibigrid") +def get_ssh_connection_info(cluster_id, master_provider, master_configuration): + """ + Gets master_ip, ssh_user and private key to enable other modules to create an ssh connection to a clusters master + @param cluster_id: id of cluster to connect to + @param master_provider: master's provider + @param master_configuration: master's configuration + @return: triple (master_ip, ssh_user, private_key) + """ + # If cluster_id is an ip, cluster_id will be used for master_ip + if "." in cluster_id: + LOG.info("Interpreting %s as ip since it doesn't match cluster_id", cluster_id) + master_ip = cluster_id + else: + master_ip = list_clusters.get_master_access_ip(cluster_id, master_provider) + ssh_user = master_configuration.get("sshUser") + public_keys = master_configuration.get("sshPublicKeyFiles") + used_private_key = None + + # first check configuration then if not found take the temporary key + if public_keys: + public_key = public_keys[0] + if isinstance(public_key, str): + private_key = public_key.strip(".pub") + if os.path.isfile(private_key): + used_private_key = private_key + if not used_private_key: + private_key = os.path.join(create.KEY_FOLDER, create.KEY_NAME.format(cluster_id=cluster_id)) + if os.path.isfile(private_key): + used_private_key = private_key + return master_ip, ssh_user, used_private_key diff --git a/bibigrid2/core/utility/handler/configuration_handler.py b/bibigrid2/core/utility/handler/configuration_handler.py new file mode 100644 index 000000000..51e555e3c --- /dev/null +++ b/bibigrid2/core/utility/handler/configuration_handler.py @@ -0,0 +1,142 @@ +""" +This module contains methods to read the configuration and cloud specification. +""" + +import logging +import os + +import mergedeep +import yaml + +CLOUDS_YAML_PATHS = ["~/.config/bibigrid", "/etc/bibigrid", ""] +CLOUDS_YAML = "clouds.yaml" +CLOUDS_PUBLIC_YAML = "clouds-public.yaml" +CLOUD_ROOT_KEY = "clouds" +CLOUD_PUBLIC_ROOT_KEY = "public-clouds" +CLOUDS_PUBLIC_NAME_KEY = "profile" +CLOUD_CONFIGURATION_KEY = "cloud" + +LOG = logging.getLogger("bibigrid") + +def read_configuration(path="bibigrid.yml"): + """ + Reads yaml from file and returns the list of all configurations + :param path: Path to yaml file + :return: configurations (dict) + """ + configuration = None + if os.path.isfile(path): + with open(path, mode="r", encoding="UTF-8") as stream: + try: + configuration = yaml.safe_load(stream) + except yaml.YAMLError as exc: + LOG.warning("Couldn't read configuration %s: %s", path, exc) + else: + LOG.warning("No such configuration file %s.", path) + return configuration + + +def get_list_by_key(configurations, key, get_empty=True): + """ + Returns a list of objects which are value to the key. + :param get_empty: if true empty configurations return None + :param configurations: YAML of configuration File containing the configuration-data for each provider + :param key: Key that is looked out for + :return: List of values of said key through all configs + """ + return [configuration.get(key) for configuration in configurations if configuration.get(key) or get_empty] + + +# def get_dict_list_by_key_list(configurations, keys, get_empty=True): +# return [{key: configuration.get(key) for key in keys if configuration.get(key) or get_empty} +# for configuration in configurations] + + +def find_file_in_folders(file_name, folders): + """ + Searches all folders for a file with name file_name, loads (expects yaml) the first match and returns the dict + @param file_name: name of the file to look for + @param folders: folders to search for file named file_name + @return: dict of match content or None if not found + """ + for folder_path in folders: + file_path = os.path.expanduser(os.path.join(folder_path, file_name)) + if os.path.isfile(file_path): + LOG.debug("File %s found in folder %s.", file_name, folder_path) + return read_configuration(file_path) + LOG.debug("File %s in folder %s not found.", file_name, folder_path) + return None + + +def get_clouds_files(): + """ + Wrapper to call find_file_in_folders with the right arguments to find the clouds.yaml and clouds-public.yaml + @return: tuple of dicts containing the clouds.yaml and clouds-public.yaml data or None if not found. + """ + clouds_yaml = find_file_in_folders(CLOUDS_YAML, CLOUDS_YAML_PATHS) + clouds_public_yaml = find_file_in_folders(CLOUDS_PUBLIC_YAML, CLOUDS_YAML_PATHS) + clouds = None + clouds_public = None + if clouds_yaml: + clouds = clouds_yaml.get(CLOUD_ROOT_KEY) + if not clouds: + LOG.warning("%s is not valid. Must contain key '%s:'", CLOUDS_YAML, CLOUD_ROOT_KEY) + else: + LOG.warning("No %s at %s! Please copy your %s to one of those listed folders. Aborting...", + CLOUDS_YAML, CLOUDS_YAML_PATHS, CLOUDS_YAML) + if clouds_public_yaml: + clouds_public = clouds_public_yaml.get(CLOUD_PUBLIC_ROOT_KEY) + if not clouds_public: + LOG.warning("%s is not valid. Must contain key '%s'", CLOUDS_PUBLIC_YAML, CLOUD_PUBLIC_ROOT_KEY) + return clouds, clouds_public + + +def get_cloud_specification(cloud_name, clouds, clouds_public): + """ + As in openstack cloud_public_specification will be overwritten by cloud_private_specification + :param cloud_name: name of the cloud to look for in clouds.yaml + :param clouds: dict containing the data loaded from clouds.yaml + :param clouds_public: dict containing the data loaded from clouds-public.yaml + :return: + """ + cloud_full_specification = {} + cloud_private_specification = clouds.get(cloud_name) + if cloud_private_specification: + cloud_full_specification = cloud_private_specification + public_cloud_name = cloud_private_specification.get(CLOUDS_PUBLIC_NAME_KEY) + if public_cloud_name and clouds_public: + LOG.debug("Trying to find profile...") + cloud_public_specification = clouds_public.get(public_cloud_name) + if not cloud_public_specification: + LOG.warning("%s is not a valid profile name. " + "Must be contained under key '%s'", public_cloud_name, CLOUD_PUBLIC_ROOT_KEY) + else: + LOG.debug("Profile found. Merging begins...") + try: + mergedeep.merge(cloud_full_specification, cloud_public_specification, + strategy=mergedeep.Strategy.TYPESAFE_REPLACE) + except TypeError as exc: + LOG.warning("Existing %s and %s configuration keys don't match in type: %s", + CLOUDS_YAML, CLOUDS_PUBLIC_YAML, exc) + return {} + else: + LOG.debug("Using only clouds.yaml since no clouds-public profile is set.") + else: + LOG.warning("%s is not a valid cloud name. Must be contained under key '%s'", cloud_name, CLOUD_ROOT_KEY) + return cloud_full_specification + + +def get_cloud_specifications(configurations): + """ + Calls get_cloud_specification to get the cloud_specification for every configuration + @param configurations: + @return: list of dicts: cloud_specifications of every configuration + """ + clouds, clouds_public = get_clouds_files() + cloud_specifications = [] + if isinstance(clouds, dict): + for configuration in configurations: + cloud = configuration.get(CLOUD_CONFIGURATION_KEY) + if cloud: + cloud_specifications.append(get_cloud_specification(cloud, clouds, clouds_public)) # might be None + return cloud_specifications diff --git a/bibigrid2/core/utility/handler/logging_path_handler.py b/bibigrid2/core/utility/handler/logging_path_handler.py new file mode 100644 index 000000000..420314520 --- /dev/null +++ b/bibigrid2/core/utility/handler/logging_path_handler.py @@ -0,0 +1,18 @@ +""" +This module holds methods to return the logfile's path. +""" + +import logging + +LOG = logging.getLogger("bibigrid") + +def get_logging_path(): + """ + Returns the path were the logfile is stored + @return: the path were the logfile is stored + """ + for handler in LOG.getLoggerClass().root.handlers: + if hasattr(handler, 'baseFilename'): + log_path = handler.baseFilename + return log_path + return None diff --git a/bibigrid2/core/utility/handler/provider_handler.py b/bibigrid2/core/utility/handler/provider_handler.py new file mode 100644 index 000000000..45434505e --- /dev/null +++ b/bibigrid2/core/utility/handler/provider_handler.py @@ -0,0 +1,64 @@ +""" +This module contains different selectors to pick and create a connection to the right provider. +""" + +import logging + +from bibigrid2.core.utility.handler import configuration_handler +from bibigrid2.openstack import openstack_provider + +PROVIDER_NAME_DICT = {"openstack": openstack_provider.OpenstackProvider} +PROVIDER_CLASS_DICT = {provider.__name__: provider for provider in PROVIDER_NAME_DICT.values()} +LOG = logging.getLogger("bibigrid") + +def get_provider_by_class_name(provider_name, provider_dict=PROVIDER_CLASS_DICT): # pylint: disable=dangerous-default-value + """ + Returns provider that is associated with the key provider_name in provider_dict. + Otherwise a KeyError is thrown. + :param provider_name: key of provider_dict + :return: provider + """ + return provider_dict[provider_name] + + +def get_provider_by_name(provider_name, provider_dict=PROVIDER_NAME_DICT): # pylint: disable=dangerous-default-value + """ + Returns provider that is associated with the key provider_name in provider_dict. + Otherwise a KeyError is thrown. + :param provider_name: key of provider_dict + :return: provider + """ + return provider_dict.get(provider_name) + + +def get_provider_list_by_name_list(provider_name_list, cloud_specifications): + """ + Returns provider list for given provider_name_list + If name is not found in PROVIDER_NAME_DICT, PROVIDER_CLASS_DICT is tried instead. + If not found in both a key error is thrown. + :param provider_name_list: list of provider names + :param cloud_specifications: list of cloud specifications + :return: list of providers + """ + provider_list = [ + (get_provider_by_name(provider_name) or get_provider_by_class_name(provider_name))(cloud_specification) + for provider_name, cloud_specification in zip(provider_name_list, cloud_specifications)] + return provider_list + + +def get_providers(configurations): + """ + Reads list of provider_names from configurations. + Determines list of providers by provider_names and returns it. + If providers don't match a key error is thrown and the program exits with failure state 1. + :param configurations: + :return: + """ + cloud_specifications = configuration_handler.get_cloud_specifications(configurations) + if cloud_specifications: + try: + provider_names = configuration_handler.get_list_by_key(configurations, "infrastructure") + return get_provider_list_by_name_list(provider_names, cloud_specifications) + except KeyError as exc: + LOG.warning("Check infrastructure in configurations! Key: %s", str(exc)) + return None diff --git a/bibigrid2/core/utility/handler/ssh_handler.py b/bibigrid2/core/utility/handler/ssh_handler.py new file mode 100644 index 000000000..c0c6f152d --- /dev/null +++ b/bibigrid2/core/utility/handler/ssh_handler.py @@ -0,0 +1,229 @@ +""" +This module handles ssh and sftp connections to master and vpnwkrs. It also holds general execution routines used to +setup the Cluster. +""" + +import logging +import os +import time +import socket +import paramiko +import yaml + +from bibigrid2.models.exceptions import ConnectionException, ExecutionException +from bibigrid2.core.utility import ansible_commands as aC + +PRIVATE_KEY_FILE = ".ssh/id_ecdsa" # to name bibigrid-temp keys identically on remote +ANSIBLE_SETUP = [aC.NO_UPDATE, aC.UPDATE, + aC.PYTHON3_PIP, aC.ANSIBLE_PASSLIB, + (f"chmod 600 {PRIVATE_KEY_FILE}","Adjust private key permissions."), + aC.PLAYBOOK_HOME, + aC.PLAYBOOK_HOME_RIGHTS, + aC.ADD_PLAYBOOK_TO_LINUX_HOME] +# ANSIBLE_START = [aC.WAIT_READY, aC.UPDATE, aC.MV_ANSIBLE_CONFIG, aC.EXECUTE] # another UPDATE seems to not necessary. +ANSIBLE_START = [aC.WAIT_READY, aC.MV_ANSIBLE_CONFIG, aC.EXECUTE] +VPN_SETUP = ["echo Example"] +LOG = logging.getLogger("bibigrid") + + +def get_ac_command(master_provider, name): + """ + Get command to write application credentials to remote ( + @param master_provider: provider that holds the master + @param name: how the application credential shall be called + @return: command to execute on remote to create application credential + """ + master_cloud_specification = master_provider.cloud_specification + auth = master_cloud_specification["auth"] + ac_clouds_yaml = {"clouds": {"master": None}} + if auth.get("application_credential_id") and auth.get("application_credential_secret"): + wanted_keys = ["auth", "region_name", "interface", "identity_api_version", "auth_type"] + ac_cloud_specification = {k: master_cloud_specification[k] for k in wanted_keys if k in + master_cloud_specification} + else: + wanted_keys = ["region_name", "interface", "identity_api_version"] + ac = master_provider.create_application_credential(name=name) # pylint: disable=invalid-name + ac_dict = {"application_credential_id": ac["id"], "application_credential_secret": ac["secret"], + "auth_type": "v3applicationcredential", "auth_url": auth["auth_url"]} + ac_cloud_specification = {k: master_cloud_specification[k] for k in wanted_keys if k in + master_cloud_specification} + ac_cloud_specification.update(ac_dict) + ac_clouds_yaml["clouds"]["master"] = ac_cloud_specification + return (f"echo '{yaml.safe_dump(ac_clouds_yaml)}' | sudo install -D /dev/stdin /etc/openstack/clouds.yaml", + "Copy application credentials.") + + +def get_add_ssh_public_key_commands(ssh_public_key_files): + """ + Builds and returns the necessary commands to add given public keys to remote for additional access. + :param ssh_public_key_files: public keys to add + :return: list of public key add commands + """ + commands = [] + if ssh_public_key_files: + for ssh_public_key_file in ssh_public_key_files: + with open(ssh_public_key_file, mode="r", encoding="UTF-8") as ssh_public_key: + commands.append((f"echo {ssh_public_key.readline().strip()} >> .ssh/authorized_keys", + f"Add SSH Key {ssh_public_key_file}.")) + return commands + + +def copy_to_server(sftp, localpath, remotepath): + """ + Recursively copies files and folders to server. + If a folder is given as localpath, the structure within will be kept. + :param sftp: sftp connection + :param localpath: file or folder locally + :param remotepath: file or folder locally + :return: + """ + LOG.debug("Copy %s to %s...", localpath, remotepath) + if os.path.isfile(localpath): + sftp.put(localpath, remotepath) + else: + try: + sftp.mkdir(remotepath) + except OSError: + pass + for filename in os.listdir(localpath): + copy_to_server(sftp, localpath + "/" + filename, remotepath + "/" + filename) + + +def is_active(client, floating_ip_address, private_key, username, timeout=5): + """ + Checks if connection is possible and therefore if server is active. + Raises paramiko.ssh_exception.NoValidConnectionsError if timeout is reached + :param client: created client + :param floating_ip_address: ip to connect to + :param private_key: SSH-private_key + :param username: SSH-username + :param timeout: how long to wait between ping + (waiting grows quadratically till 2**timeout before accepting failure) + """ + attempts = 0 + establishing_connection = True + while establishing_connection: + try: + client.connect(hostname=floating_ip_address, username=username, pkey=private_key, timeout=5, auth_timeout=5) + establishing_connection = False + except paramiko.ssh_exception.NoValidConnectionsError as exc: + LOG.info(f"Attempting to connect to {floating_ip_address}... This might take a while", ) + if attempts < timeout: + time.sleep(2 ** attempts) + attempts += 1 + else: + LOG.error(f"Attempt to connect to {floating_ip_address} failed.") + raise ConnectionException(exc) from exc + except socket.timeout as exc: + LOG.warning("Socket timeout exception occurred. Try again ...") + if attempts < timeout: + attempts += 1 + else: + LOG.error(f"Attempt to connect to {floating_ip_address} failed, due to a socket timeout.") + raise ConnectionException(exc) from exc + except TimeoutError as exc: # pylint: disable=duplicate-except + LOG.error("The attempt to connect to %s failed. Possible known reasons:" + "\n\t-Your network's security group doesn't allow SSH.", floating_ip_address) + raise ConnectionException(exc) from exc + + +def line_buffered(f): + """ + https://stackoverflow.com/questions/25260088/paramiko-with-continuous-stdout + temporary hangs? + :param f: + :return: + """ + line_buf = b"" + while not f.channel.exit_status_ready(): + + line_buf += f.read(1024) + if line_buf.endswith(b'\n'): + yield line_buf + line_buf = b'' + + +def execute_ssh_cml_commands(client, commands): + """ + Executes commands and logs exit_status accordingly. + :param client: Client with connection to remote + :param commands: Commands to execute on remote + """ + for command in commands: + ssh_stdin, ssh_stdout, ssh_stderr = client.exec_command(command[0]) # pylint: disable=unused-variable + ssh_stdout.channel.set_combine_stderr(True) + LOG.info(f"REMOTE: {command[1]}") + + while True: + line = ssh_stdout.readline() + if len(line) == 0: + break + if "[BIBIGRID]" in line: + LOG.info(f"REMOTE: {line.strip()}") + else: + LOG.debug(f"REMOTE: {line.strip()}") + + # get exit status + exit_status = ssh_stdout.channel.recv_exit_status() + # close handler + ssh_stdout.close() + + if exit_status: + msg = f"{command[1]} ... Exit status: {exit_status}" + LOG.warning(msg) + raise ExecutionException(msg) + + +def ansible_preparation(floating_ip, private_key, username, commands=None, filepaths=None): + """ + Installs python and pip. Then installs ansible over pip. + Copies private key to instance so cluster-nodes are reachable and sets permission as necessary. + Copies additional files and executes additional commands if given. + The playbook is copied later, because it needs all servers setup and is not time intensive. + See: create.update_playbooks + :param floating_ip: public ip of server to ansible-prepare + :param private_key: generated private key of all cluster-server + :param username: username of all server + :param commands: additional commands to execute + :param filepaths: additional files to copy: (localpath, remotepath) + """ + if filepaths is None: + filepaths = [] + if commands is None: + commands = [] + LOG.info("Ansible preparation...") + commands = ANSIBLE_SETUP + commands + filepaths.append((private_key, PRIVATE_KEY_FILE)) + execute_ssh(floating_ip, private_key, username, commands, filepaths) + + +def execute_ssh(floating_ip, private_key, username, commands=None, filepaths=None): + """ + Executes commands on remote and copies files given in filepaths + :param floating_ip: public ip of remote + :param private_key: key of remote + :param username: username of remote + :param commands: commands + :param filepaths: filepaths (localpath, remotepath) + """ + if commands is None: + commands = [] + paramiko_key = paramiko.ECDSAKey.from_private_key_file(private_key) + with paramiko.SSHClient() as client: + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + try: + is_active(client=client, + floating_ip_address=floating_ip, + username=username, + private_key=paramiko_key) + except ConnectionException as exc: + LOG.error(f"Couldn't connect to floating ip {floating_ip} using private key {private_key}.") + raise exc + else: + if filepaths: + sftp = client.open_sftp() + for localpath, remotepath in filepaths: + copy_to_server(sftp=sftp, localpath=localpath, remotepath=remotepath) + LOG.debug("SFTP: Files %s copied.", filepaths) + if commands: + execute_ssh_cml_commands(client, commands) diff --git a/bibigrid2/core/utility/id_generation.py b/bibigrid2/core/utility/id_generation.py new file mode 100644 index 000000000..24d6718e5 --- /dev/null +++ b/bibigrid2/core/utility/id_generation.py @@ -0,0 +1,59 @@ +""" +Generates ids and munge keys +""" + +import shortuuid + +from bibigrid2.core.actions import create + +MAX_ID_LENGTH = 15 +CLUSTER_UUID_ALPHABET = '0123456789abcdefghijkmnopqrstuvwxyz' + + +def generate_cluster_id(): + """ + Generates an encrypted shortUUID with length MAX_ID_LENGTH + :return: + """ + uuid = shortuuid.ShortUUID() + uuid.set_alphabet(CLUSTER_UUID_ALPHABET) + return uuid.random(MAX_ID_LENGTH) + + +def generate_safe_cluster_id(providers): + """ + Generates a cluster_id and checks if cluster_id is not in use. When a unique id is found it is returned + :param providers: providers to check whether they use said cluster_id + :return: cluster_id + """ + id_is_unique = False + cluster_id = None + while not id_is_unique: + cluster_id = generate_cluster_id() + id_is_unique = is_unique_cluster_id(cluster_id, providers) + return cluster_id + + +def is_unique_cluster_id(cluster_id, providers): + """ + Checks if cluster_id is not in use on any provider + :param cluster_id: generated cluster_ird + :param providers: providers to check + :return: True if cluster_id is unique. False else. + """ + for provider in providers: + for server in provider.list_servers(): + master = create.MASTER_IDENTIFIER(cluster_id=cluster_id) + vpnwkr = create.VPN_WORKER_IDENTIFIER(cluster_id=cluster_id) + worker = create.WORKER_IDENTIFIER(cluster_id=cluster_id) + if server["name"] in [master, vpnwkr, worker]: + return False + return True + + +def generate_munge_key(): + """ + Generates a munge key (UUID) for slurm + :return: + """ + return shortuuid.ShortUUID().random(32) diff --git a/bibigrid2/core/utility/paths/ansible_resources_path.py b/bibigrid2/core/utility/paths/ansible_resources_path.py new file mode 100644 index 000000000..d48336568 --- /dev/null +++ b/bibigrid2/core/utility/paths/ansible_resources_path.py @@ -0,0 +1,54 @@ +""" +Paths that are used by Ansible. Especially playbook, vars files and Co. +""" + +import os + +import bibigrid2.core.utility.paths.basic_path as bP + +# UNIVERSAL +ANSIBLE_HOSTS: str = "ansible_hosts" +COMMON_YML: str = "common.yml" +SITE_YML: str = "site.yml" +REQUIREMENTS_YML: str = "requirements.yml" +UPLOAD_PATH: str = "/tmp/roles/" +VARS_PATH: str = "vars/" +ROLES_PATH: str = "roles/" +LOGIN_YML: str = VARS_PATH + "login.yml" +INSTANCES_YML: str = VARS_PATH + "instances.yml" +CONFIG_YML: str = VARS_PATH + "common_configuration.yml" +WORKER_SPECIFICATION_YML: str = VARS_PATH + "worker_specification.yml" +ADDITIONAL_ROLES_PATH: str = ROLES_PATH + "additional/" +DEFAULT_IP_FILE = VARS_PATH + "{{ ansible_default_ipv4.address }}.yml" +# ANSIBLE_CFG = "ansible.cfg" + +# LOCAL +# ANSIBLE_CFG_PATH = os.path.join(bP.RESOURCES_PATH, ANSIBLE_CFG) +PLAYBOOK = "playbook/" +PLAYBOOK_PATH: str = os.path.join(bP.RESOURCES_PATH, PLAYBOOK) +HOSTS_CONFIG_FILE: str = PLAYBOOK_PATH + ANSIBLE_HOSTS +CONFIG_ROOT_PATH: str = PLAYBOOK_PATH + VARS_PATH +ROLES_ROOT_PATH: str = PLAYBOOK_PATH + ROLES_PATH +COMMONS_LOGIN_FILE: str = PLAYBOOK_PATH + LOGIN_YML +COMMONS_INSTANCES_FILE: str = PLAYBOOK_PATH + INSTANCES_YML +COMMONS_CONFIG_FILE: str = PLAYBOOK_PATH + CONFIG_YML +SITE_CONFIG_FILE: str = PLAYBOOK_PATH + SITE_YML +WORKER_SPECIFICATION_FILE: str = PLAYBOOK_PATH + WORKER_SPECIFICATION_YML +ADDITIONAL_ROLES_ROOT_PATH: str = ROLES_ROOT_PATH + ADDITIONAL_ROLES_PATH +VARS_FOLDER = os.path.join(PLAYBOOK_PATH, VARS_PATH) + +# REMOTE +ROOT_PATH_REMOTE = "~" +PLAYBOOK_PATH_REMOTE: str = os.path.join("/opt/", PLAYBOOK) +# PLAYBOOK_PATH_REMOTE: str = os.path.join(ROOT_PATH_REMOTE, PLAYBOOK) +# PLAYBOOK_PATH_REMOTE_SLURM: str = os.path.join("/opt/slurm/", PLAYBOOK) +HOSTS_CONFIG_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + ANSIBLE_HOSTS +CONFIG_ROOT_PATH_REMOTE: str = PLAYBOOK_PATH_REMOTE + VARS_PATH +ROLES_ROOT_PATH_REMOTE: str = PLAYBOOK_PATH_REMOTE + ROLES_PATH +COMMONS_LOGIN_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + LOGIN_YML +COMMONS_INSTANCES_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + INSTANCES_YML +COMMONS_CONFIG_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + CONFIG_YML +SITE_CONFIG_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + SITE_YML +WORKER_SPECIFICATION_FILE_REMOTE: str = PLAYBOOK_PATH_REMOTE + WORKER_SPECIFICATION_YML +ADDITIONAL_ROLES_ROOT_PATH_REMOTE: str = ROLES_ROOT_PATH + ADDITIONAL_ROLES_PATH +REQUIREMENTS_CONFIG_FILE_REMOTE: str = ADDITIONAL_ROLES_ROOT_PATH_REMOTE + REQUIREMENTS_YML diff --git a/bibigrid2/core/utility/paths/basic_path.py b/bibigrid2/core/utility/paths/basic_path.py new file mode 100644 index 000000000..742fc0ed4 --- /dev/null +++ b/bibigrid2/core/utility/paths/basic_path.py @@ -0,0 +1,11 @@ +""" +Module containing the most basic paths. Must stay at the same place relative to root. +""" + +import os +from pathlib import Path + +RESOURCES = "resources" +# if the relative path from this file to resources is altered, the next line must be adapted or files will not be found. +ROOT_PATH = Path(__file__).absolute().parents[4] +RESOURCES_PATH = os.path.join(ROOT_PATH, RESOURCES) diff --git a/bibigrid2/core/utility/paths/bin_path.py b/bibigrid2/core/utility/paths/bin_path.py new file mode 100644 index 000000000..a99058dcb --- /dev/null +++ b/bibigrid2/core/utility/paths/bin_path.py @@ -0,0 +1,13 @@ +""" +Paths that are used by bin script copying +""" + + +import os + +import bibigrid2.core.utility.paths.basic_path as bP + +BIN: str = "bin/" +BIN_PATH: str = os.path.join(bP.RESOURCES_PATH, BIN) + +BIN_PATH_REMOTE: str = BIN diff --git a/bibigrid2/core/utility/validate_configuration.py b/bibigrid2/core/utility/validate_configuration.py new file mode 100644 index 000000000..93662d1fe --- /dev/null +++ b/bibigrid2/core/utility/validate_configuration.py @@ -0,0 +1,439 @@ +""" +Validates configuration and cloud_specification +""" + +import logging +import os + +from bibigrid2.core.utility.handler import configuration_handler + +ACCEPTED_KEY_IDENTIFIERS = {"RSA": 4096, "ECDSA": 521, "ED25519": 256} +LOG = logging.getLogger("bibigrid") + +def evaluate(check_name, check_result): + """ + Logs check_resul as warning if failed and as success if succeeded. + :param check_name: + :param check_result: + :return: + """ + if check_result: + LOG.info("Checking %s: Success", check_name) + else: + LOG.warning("Checking %s: Failure", check_name) + return check_result + + +def check_provider_data(provider_data_list, provider_count): + """ + Checks if all provider datas are unique and if enough providers are given + #ToDo for multiple cloud locations additional provider data needs to be added + :param provider_data_list: list of all provider data + :param provider_count: number of providers + :return: True if enough providers are given and all providers are unique + """ + LOG.info("Checking provider names") + success = True + duplicates = [] + seen = [] + for elem in provider_data_list: + if elem in seen: + duplicates.append(elem) + else: + seen.append(elem) + if duplicates: + LOG.warning("Duplicate provider(s) %s. For each provider you can only create one configuration. " + "Please check your configurations.", duplicates) + success = False + else: + LOG.info("All providers are unique.") + if not len(provider_data_list) == provider_count: + LOG.warning("Not enough providers given. %s/%s", len(provider_data_list), provider_count) + success = False + else: + LOG.info("Enough providers given. %s/%s", len(provider_data_list), provider_count) + return success + + +def evaluate_ssh_public_key_file_security(ssh_public_key_file): + """ + Checks if key encryption is sufficiently strong. Uses empiric values and therefore will fail if key type is unknown + @param ssh_public_key_file: + @return: + """ + success = True + # length, key, comment list, identifier_dirty + key_info = os.popen(f'ssh-keygen -l -f {ssh_public_key_file}').read().split() + length = key_info[0] + identifier_clean = key_info[-1].strip("()\n") + minimum_size = ACCEPTED_KEY_IDENTIFIERS.get(identifier_clean) + + if not minimum_size: + LOG.warning("sshPublicKey '%s' is %s. Which secure length is unknown to bibigrid2.\n" + "Known encryptions are (with minimum size): %s", + ssh_public_key_file, identifier_clean, ACCEPTED_KEY_IDENTIFIERS) + else: + LOG.info("sshPublicKey '%s' is a known encryption.", ssh_public_key_file) + if minimum_size > int(length): + LOG.warning("sshPublicKey '%s' is not long enough! %s should be >= %s, but is %s", + ssh_public_key_file, identifier_clean, minimum_size, int(length)) + else: + LOG.info("sshPublicKey '%s' is long enough (%s/%s)!", ssh_public_key_file, int(length), minimum_size) + return success + + +def has_enough(maximum, needed, keeper, thing): + """ + Method logs and compares whether enough free things are available + :param maximum: maximum (available) resources of thing + :param needed: minimum needed to run + :param keeper: description of the object having the thing that is checked (for logging) + :param thing: description of what resource is checked (RAM for example) (for logging) + :return: True if maximum is larger or equal to the needed + """ + success = True + if maximum >= needed: + LOG.info("%s has enough %s: %s/%s", keeper, thing, needed, maximum) + elif maximum < 0: + LOG.warning("%s returns no valid value for %s: %s/%s -- Ignored.", keeper, thing, needed, maximum) + else: + LOG.warning("%s has not enough %s: %s/%s", keeper, thing, needed, maximum) + success = False + return success + + +def check_clouds_yaml_security(): + """ + Checks security of all clouds in clouds.yaml i.e. whether sensitive information is stored in clouds-public.yaml + @return: True if no sensitive information is stored in clouds-public.yaml. False else. + """ + success = True + LOG.info("Checking validity of entire clouds.yaml and clouds-public.yaml") + clouds, clouds_public = configuration_handler.get_clouds_files() # pylint: disable=unused-variable + if clouds_public: + for cloud in clouds_public: + if clouds_public[cloud].get("profile"): + LOG.warning(f"{cloud}: Profiles should be placed in clouds.yaml not clouds-public.yaml! " + f"Key ignored.") + success = False + if clouds_public[cloud].get("auth"): + for key in ["password", "username", "application_credential_id", "application_credential_secret"]: + if clouds_public[cloud]["auth"].get(key): + LOG.warning(f"{cloud}: {key} shouldn't be shared. Move {key} to clouds.yaml!") + success = False + return success + + +def check_cloud_yaml(cloud_specification): + """ + Check if cloud_specification is valid i.e. contains the necessary authentification data. + @param cloud_specification: dict to check whether it is a valid cloud_specification + @return: True if cloud_specification is valid. False else. + """ + success = True + if cloud_specification: + keys = cloud_specification.keys() + auth = cloud_specification.get("auth") + if auth: + auth_keys = auth.keys() + if not ("password" in auth_keys and "username" in auth_keys) \ + and not ("auth_type" in keys and "application_credential_id" in auth_keys and + "application_credential_secret" in auth_keys): + LOG.warning("Insufficient authentication information. Needs either password and username or " + "if using application credentials: " + "auth_type, application_credential_id and application_credential_secret.") + success = False + if "auth_url" not in auth_keys: + LOG.warning("Authentification URL auth_url is missing.") + success = False + else: + LOG.warning("Missing all auth information!") + success = False + if "region_name" not in keys: + LOG.warning("region_name is missing.") + success = False + else: + LOG.warning("Missing all cloud_specification information!") + return success + + +class ValidateConfiguration: + """ + This class contains necessary algorithms to validate configuration files + """ + + def __init__(self, configurations, providers): + """ + Sets configurations, providers and prepares the required_resources_dict. + While executing the checks, needed resources are counted. + In the end check_quotas will decide whether enough resources are available. + :param configurations: List of configurations (dicts) + :param providers: List of providers + """ + self.configurations = configurations + self.providers = providers + self.required_resources_dict = {'total_cores': 0, 'floating_ips': 0, 'instances': 0, 'total_ram': 0, + 'Volumes': 0, 'VolumeGigabytes': 0, 'Snapshots': 0, 'Backups': 0, + 'BackupGigabytes': 0} + + def validate(self): + """ + Validation of the configuration file with the selected cloud provider. + The validation steps are as follows: + Check connection can be established + Check provider uniqueness + Check servergroup + Check instances are available + Check images and volumes are available + Check network and subnet are available + Check quotas + :return: + """ + success = bool(self.providers) + LOG.info("Validating config file...") + success = check_provider_data( + configuration_handler.get_list_by_key(self.configurations, "infrastructure"), + len(self.configurations)) and success + if not success: + LOG.warning("Providers not set correctly in configuration file. Check log for more detail.") + return success + checks = [("master/vpn", self.check_master_vpn_worker), ("servergroup", self.check_server_group), + ("instances", self.check_instances), ("volumes", self.check_volumes), + ("network", self.check_network), ("quotas", self.check_quotas), + ("sshPublicKeyFiles", self.check_ssh_public_key_files), ("cloudYamls", self.check_clouds_yamls)] + if success: + for check_name, check_function in checks: + success = evaluate(check_name, check_function()) and success + return success + + def check_master_vpn_worker(self): + """ + Checks if first configuration has a masterInstance defined + and every other configuration has a vpnInstance defined. + If one is missing said provider wouldn't be reachable over the cluster, because no floating IP would be given. + :return: True if first configuration has a masterInstance and every other a vpnInstance + """ + LOG.info("Checking master/vpn") + success = True + if not self.configurations[0].get("masterInstance") or self.configurations[0].get("vpnInstance"): + success = False + for configuration in self.configurations[1:]: + if not configuration.get("vpnInstance") or configuration.get("masterInstance"): + success = False + return success + + def check_provider_connections(self): + """ + Checks if all providers are reachable + :return: True if all providers are reachable + """ + success = True + providers_unconnectable = [] + for provider in self.providers: + if not provider.conn: + providers_unconnectable.append(provider.name) + if providers_unconnectable: + LOG.warning("API connection to %s not successful. Please check your configuration.", + providers_unconnectable) + success = False + return success + + def check_instances(self): + """ + Checks if all instances exist and image and instance-type are compatible + :return: true if image and instance-type (flavor) exist for all instances and are compatible + """ + LOG.info("Checking instance images and type") + success = True + configuration = None + try: + for configuration, provider in zip(self.configurations, self.providers): + self.required_resources_dict["floating_ips"] += 1 + if configuration.get("masterInstance"): + success = self.check_instance("masterInstance", configuration["masterInstance"], provider) \ + and success + else: + success = self.check_instance("vpnInstance", configuration["vpnInstance"], provider) and success + for worker in configuration.get("workerInstances", []): + success = self.check_instance("workerInstance", worker, provider) and success + except KeyError as exc: + LOG.warning("Not found %s, but required in configuration %s.", str(exc), configuration) + success = False + return success + + def check_instance(self, instance_name, instance, provider): + """ + Checks if instance image exists and whether it is compatible with the defined instance/server type (flavor). + :param instance_name: containing name for logging purposes + :param instance: dict containing image, type and count (count is not used) + :param provider: provider + :return: true if type and image compatible and existing + """ + self.required_resources_dict["instances"] += instance.get("count") or 1 + instance_image_id_or_name = instance["image"] + instance_image = provider.get_image_by_id_or_name(image_id_or_name=instance_image_id_or_name) + if not instance_image: + LOG.warning("Instance %s image: %s not found", instance_name, instance_image_id_or_name) + print("Available active images:") + print("\n".join(provider.get_active_images())) + return False + if instance_image["status"] != "active": + LOG.warning("Instance %s image: %s not active", instance_name, instance_image_id_or_name) + print("Available active images:") + print("\n".join(provider.get_active_images)) + return False + LOG.info("Instance %s image: %s found", instance_name, instance_image_id_or_name) + instance_type = instance["type"] + return self.check_instance_type_image_combination(instance_type, instance_image, provider) + + def check_instance_type_image_combination(self, instance_type, instance_image, provider): + """ + Checks, if enough ram, disk space for instance_image are provided by instance_type on provider. + :param instance_type + :param instance_image + :param provider + :return true, if enough resources available + """ + success = True + # check + flavor = provider.get_flavor(instance_type) + if not flavor: + LOG.warning("Flavor %s does not exist.", instance_type) + print("Available flavors:") + print("\n".join(provider.get_active_flavors())) + return False + type_max_disk_space = flavor["disk"] + type_max_ram = flavor["ram"] + image_min_disk_space = provider.get_image_by_id_or_name(instance_image)["min_disk"] + image_min_ram = provider.get_image_by_id_or_name(instance_image)["min_ram"] + for maximum, needed, thing in [(type_max_disk_space, image_min_disk_space, "disk space"), + (type_max_ram, image_min_ram, "ram")]: + success = has_enough(maximum, needed, f"Type {instance_type}", thing) and success + # prepare check quotas + self.required_resources_dict["total_ram"] += type_max_ram + self.required_resources_dict["total_cores"] += flavor["vcpus"] + return success + + def check_volumes(self): + """ + Checking if volume or snapshot exists for all volumes + :return: True if all snapshot and volumes are found. Else false. + """ + LOG.info("Checking volumes...") + success = True + for configuration, provider in zip(self.configurations, self.providers): + volume_identifiers = configuration.get("masterMounts") + if volume_identifiers: + # check individually if volumes exist + for volume_identifier in volume_identifiers: + if ":" in volume_identifier: + volume_name_or_id = volume_identifier[:volume_identifier.index(":")] + else: + volume_name_or_id = volume_identifier + volume = provider.get_volume_by_id_or_name(volume_name_or_id) + if not volume: + snapshot = provider.get_volume_snapshot_by_id_or_name(volume_name_or_id) + if not snapshot: + LOG.warning("Neither Volume nor Snapshot '%s' found", volume_name_or_id) + success = False + else: + LOG.info("Snapshot '%s' found", volume_name_or_id) + self.required_resources_dict["Volumes"] += 1 + self.required_resources_dict["VolumeGigabytes"] += snapshot["size"] + else: + LOG.info(f"Volume '{volume_name_or_id}' found") + return success + + def check_network(self): + """ + Check if network (or subnet) is accessible + :return True if any given network or subnet is accessible by provider + """ + LOG.info("Checking network...") + success = True + for configuration, provider in zip(self.configurations, self.providers): + network_name_or_id = configuration.get("network") + if network_name_or_id: + network = provider.get_network_by_id_or_name(network_name_or_id) + if not network: + LOG.warning(f"Network '{network_name_or_id}' not found", network_name_or_id) + success = False + else: + LOG.info(f"Network '{subnet_name_or_id}' found") + subnet_name_or_id = configuration.get("subnet") + if subnet_name_or_id: + subnet = provider.get_subnet_by_id_or_name(subnet_name_or_id) + if not subnet: + LOG.warning(f"Subnet '{subnet_name_or_id}' not found") + success = False + else: + LOG.info(f"Subnet '{subnet_name_or_id}' found") + return bool(success and (network_name_or_id or subnet_name_or_id)) + + def check_server_group(self): + """ + :return: True if server group accessible + """ + success = True + for configuration, provider in zip(self.configurations, self.providers): + server_group_name_or_id = configuration.get("serverGroup") + if server_group_name_or_id: + server_group = provider.get_server_group_by_id_or_name(server_group_name_or_id) + if not server_group: + LOG.warning("ServerGroup '%s' not found", server_group_name_or_id) + success = False + else: + LOG.info("ServerGroup '%s' found", server_group_name_or_id) + return success + + def check_quotas(self): + """ + Gets remaining resources from the provider and compares them to the needed resources. + Needed resources are set during the other checks. + Covered resources are: cores, floating_ips, instances, ram, volumes, volumeGigabytes, snapshots, backups and + backupGigabytes. If a concrete provider implementation is unable to return remaining resources a maximum number + is returned to make the check not fail because of the missing API implementation. + :return: True if check succeeded. Else false. + """ + LOG.info("Checking quotas") + success = True + LOG.info("required/available") + for provider in self.providers: + free_resources_dict = provider.get_free_resources() + for key, value in self.required_resources_dict.items(): + success = has_enough(free_resources_dict[key], + value, + f"Project {self.providers[0].cloud_specification['identifier']}", + key) and success + return success + + def check_ssh_public_key_files(self): + """ + Checks if keys listed in the config exist + :return: True if check succeeded. Else false. + """ + success = True + for configuration in self.configurations: + for ssh_public_key_file in configuration.get("sshPublicKeyFiles") or []: + if not os.path.isfile(ssh_public_key_file): + LOG.warning("sshPublicKeyFile '%s' not found", ssh_public_key_file) + success = False + else: + LOG.info("sshPublicKeyFile '%s' found", ssh_public_key_file) + success = evaluate_ssh_public_key_file_security(ssh_public_key_file) and success + return success + + def check_clouds_yamls(self): + """ + Checks if every cloud in clouds_yaml is valid + @return: True if all clouds are valid + """ + LOG.info("Checking cloud specifications...") + success = True + cloud_specifications = configuration_handler.get_cloud_specifications(self.configurations) + for index, cloud_specification in enumerate(cloud_specifications): + if not check_cloud_yaml(cloud_specification): + success = False + LOG.warning("Cloud specification %s is faulty. BiBiGrid understood %s.", index, cloud_specification) + success = check_clouds_yaml_security() and success + return success diff --git a/bibigrid2/core/utility/yaml_dumper.py b/bibigrid2/core/utility/yaml_dumper.py new file mode 100644 index 000000000..b301f045a --- /dev/null +++ b/bibigrid2/core/utility/yaml_dumper.py @@ -0,0 +1,15 @@ +""" +Alternative version of yaml.SafeDumper that ignores aliases. +""" + +import yaml + + +class NoAliasSafeDumper(yaml.SafeDumper): + """ + Only difference to the regular yaml.SafeDumper class is that ignore_aliases is true + and therefore aliases are ignored. + """ + + def ignore_aliases(self, data): + return True diff --git a/bibigrid2/models/exceptions.py b/bibigrid2/models/exceptions.py new file mode 100644 index 000000000..9691e4728 --- /dev/null +++ b/bibigrid2/models/exceptions.py @@ -0,0 +1,9 @@ +""" module for additional exceptions """ + + +class ConnectionException(Exception): + """ Connection exception. """ + + +class ExecutionException(Exception): + """ Execution exception. """ diff --git a/bibigrid2/models/return_threading.py b/bibigrid2/models/return_threading.py new file mode 100644 index 000000000..a7c7a1b43 --- /dev/null +++ b/bibigrid2/models/return_threading.py @@ -0,0 +1,31 @@ +""" +Expands threading. +""" + +import threading + + +class ReturnThread(threading.Thread): + """ + Extends the Thread functionality: + - Return value of called function is returned by join() + - An exception occurred within the called function is raised by join() + """ + + def __init__(self, group=None, target=None, name=None, args=(), kwargs={}): # pylint: disable=dangerous-default-value + threading.Thread.__init__(self, group, target, name, args, kwargs) + self._return = None + self._exc = None + + def run(self): + if self._target is not None: + try: + self._return = self._target(*self._args, **self._kwargs) + except Exception as exc: # pylint: disable=broad-except + self._exc = exc + + def join(self, *args): + threading.Thread.join(self, *args) + if self._exc: + raise self._exc + return self._return diff --git a/bibigrid2/openstack/openstack_provider.py b/bibigrid2/openstack/openstack_provider.py new file mode 100644 index 000000000..c41fce210 --- /dev/null +++ b/bibigrid2/openstack/openstack_provider.py @@ -0,0 +1,260 @@ +""" +Concrete implementation of provider.py for openstack +""" + +import logging + +import keystoneclient +import openstack +from cinderclient import client +from keystoneauth1 import session +from keystoneauth1.exceptions.http import NotFound +from keystoneauth1.identity import v3 + +from bibigrid2.core import provider +from bibigrid2.core.actions import create +from bibigrid2.core.actions import version +from bibigrid2.models.exceptions import ExecutionException + +LOG = logging.getLogger("bibigrid") + + +class OpenstackProvider(provider.Provider): # pylint: disable=too-many-public-methods + """ + Specific implementation of the Provider class for openstack + """ + NAME = "OpenstackProvider" + + # to be read from clouds.yaml file. + + def __init__(self, cloud_specification): + super().__init__(cloud_specification) + self.conn = self.create_connection() + sess = self.create_session() + self.keystone_client = keystoneclient.client.Client(session=sess, interface='public') + self.cinder = client.Client(3, session=sess) + + def create_session(self, app_name="openstack_scripts", app_version="1.0"): + """ + Creates and returns a session that can be used to create a connection to different openstack services + @param app_name: + @param app_version: + @return: session + """ + # print(v3) + auth = self.cloud_specification["auth"] + if all(key in auth for key in ["auth_url", "application_credential_id", "application_credential_secret"]): + auth_session = v3.ApplicationCredential( + auth_url=auth["auth_url"], + application_credential_id=auth["application_credential_id"], + application_credential_secret=auth["application_credential_secret"] + ) + elif all(key in auth for key in ["auth_url", "username", "password", "project_id", "user_domain_name"]): + auth_session = v3.Password(auth_url=auth["auth_url"], + username=auth["username"], + password=auth["password"], + project_id=auth["project_id"], + user_domain_name=auth["user_domain_name"]) + else: + raise KeyError("Not enough authentication information in clouds.yaml/clouds-public.yaml " + "to create a session. Use one:\n" + "Application Credentials: auth_url, application_credential_id and " + "application_credential_secret\n" + "Password: auth_url, username, password, project_id and user_domain_name") + return session.Session(auth=auth_session, + app_name=app_name, app_version=app_version) + + def create_connection(self, app_name="openstack_bibigrid", app_version=version.__version__): + auth = self.cloud_specification["auth"] + return openstack.connect( + load_yaml_config=False, + load_envvars=False, + auth_url=auth["auth_url"], + project_name=auth.get("project_name"), + username=auth.get("username"), + password=auth.get("password"), + region_name=self.cloud_specification["region_name"], + user_domain_name=auth.get("user_domain_name"), + project_domain_name=auth.get("user_domain_name"), + app_name=app_name, + app_version=app_version, + application_credential_id=auth.get("application_credential_id"), + application_credential_secret=auth.get("application_credential_secret"), + interface=self.cloud_specification.get("interface"), + identity_api_version=self.cloud_specification.get("identity_api_version"), + auth_type=self.cloud_specification.get("auth_type") + ) + + def create_application_credential(self, name=None): + return self.keystone_client.application_credentials.create(name=name).to_dict() + + def delete_application_credential_by_id_or_name(self, ac_id_or_name): + """ + Deletes existing application credential by id or name and returns true. + If application credential not found it returns false. + :param ac_id_or_name: application credential id or name + :return: True if deleted else false + """ + try: + self.keystone_client.application_credentials.delete(ac_id_or_name) # id + return True + except NotFound: + try: + self.keystone_client.application_credentials.delete( + self.keystone_client.application_credentials.find(name=ac_id_or_name)) # name + return True + except NotFound: + return False + + def get_image_by_id_or_name(self, image_id_or_name): + return self.conn.get_image(name_or_id=image_id_or_name) + + def get_flavor(self, instance_type): + return self.conn.get_flavor(instance_type) + + def get_volume_snapshot_by_id_or_name(self, snapshot_id_or_name): + return self.conn.get_volume_snapshot(name_or_id=snapshot_id_or_name) + + def get_network_by_id_or_name(self, network_id_or_name): + return self.conn.get_network(name_or_id=network_id_or_name) + + def get_subnet_by_id_or_name(self, subnet_id_or_name): + return self.conn.get_subnet(name_or_id=subnet_id_or_name) + + def list_servers(self): + return [elem.toDict() for elem in self.conn.list_servers()] + + def create_server(self, name, flavor, image, + network, key_name=None, wait=True, volumes=None): + try: + server = self.conn.create_server(name=name, flavor=flavor, image=image, + network=network, key_name=key_name, volumes=volumes) + except openstack.exceptions.BadRequestException as exc: + raise ConnectionError() from exc + except openstack.exceptions.SDKException as exc: + raise ExecutionException() from exc + except AttributeError as exc: + raise ExecutionException("Unable to create server due to faulty configuration.") from exc + if wait: + self.conn.wait_for_server(server=server, auto_ip=False, timeout=600) + server = self.conn.get_server(server["id"]) + return server + + def delete_server(self, name_or_id, delete_ips=True): + """ + Deletes server. floating_ip as well if delete_ips is true. The resources are then free again + :param name_or_id: + :param delete_ips: + :return: + """ + return self.conn.delete_server(name_or_id=name_or_id, wait=False, + timeout=180, delete_ips=delete_ips, + delete_ip_retry=1) + + def delete_keypair(self, key_name): + return self.conn.delete_keypair(key_name) + + def get_server_group_by_id_or_name(self, server_group_id_or_name): + return self.conn.get_server_group(name_or_id=server_group_id_or_name) + + def close(self): + return self.conn.close() + + def create_keypair(self, name, public_key): + return self.conn.create_keypair(name=name, public_key=public_key) + + def get_network_id_by_subnet(self, subnet): + subnet = self.conn.get_subnet(subnet) + return subnet["network_id"] if subnet else subnet + + def get_subnet_ids_by_network(self, network): + network = self.conn.get_network(network) + return network["subnets"] if network else network + + def get_free_resources(self): + """ + Uses the cinder API to get all relevant volume resources. + https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/limits.py + Uses the nova API to get all relevant compute resources. Floating-IP is not returned correctly by openstack. + :return: Dictionary containing the free resources + """ + compute_limits = dict(self.conn.compute.get_limits()["absolute"]) + # maybe needs limits.get(os.environ["OS_PROJECT_NAME"]) in the future + volume_limits_generator = self.cinder.limits.get().absolute + volume_limits = {absolut_limit.name: absolut_limit.value for absolut_limit in + volume_limits_generator} + # ToDo TotalVolumeGigabytes needs totalVolumeGigabytesUsed, but is not given + volume_limits["totalVolumeGigabytesUsed"] = 0 + free_resources = {} + for key in ["total_cores", "floating_ips", "instances", "total_ram"]: + free_resources[key] = compute_limits[key] - compute_limits[key + "_used"] + for key in ["Volumes", "VolumeGigabytes", "Snapshots", "Backups", "BackupGigabytes"]: + free_resources[key] = volume_limits["maxTotal" + key] - volume_limits[ + "total" + key + "Used"] + return free_resources + + def get_volume_by_id_or_name(self, name_or_id): + return self.conn.get_volume(name_or_id) + + def create_volume_from_snapshot(self, snapshot_name_or_id): + """ + Uses the cinder API to create a volume from snapshot: + https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/volumes.py + :param snapshot_name_or_id: name or id of snapshot + :return: id of created volume + """ + LOG.debug("Trying to create volume from snapshot") + snapshot = self.conn.get_volume_snapshot(snapshot_name_or_id) + if snapshot: + LOG.debug(f"Snapshot {snapshot_name_or_id} found.") + if snapshot["status"] == "available": + LOG.debug("Snapshot %s is available.", {snapshot_name_or_id}) + size = snapshot["size"] + name = create.PREFIX_WITH_SEP + snapshot["name"] + description = f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" + volume = self.cinder.volumes.create(size=size, snapshot_id=snapshot["id"], name=name, + description=description) + return volume.to_dict()["id"] + LOG.warning("Snapshot %s is %s; must be available.", snapshot_name_or_id, snapshot['status']) + else: + LOG.warning("Snapshot %s not found.", snapshot_name_or_id) + return None + + def get_external_network(self, network_name_or_id): + """ + Finds router interface with network id equal to given network and by that the external network. + :param network_name_or_id:Name or id of network + :return:Corresponding external network + """ + network_id = self.conn.get_network(network_name_or_id)["id"] + for router in self.conn.list_routers(): + for interface in self.conn.list_router_interfaces(router): + if interface.network_id == network_id: + return router.external_gateway_info["network_id"] + return None + + def attach_available_floating_ip(self, network=None, server=None): + """ + Get a floating IP from a network or a pool and attach it to the server + :param network: + :param server: + :return: + """ + floating_ip = self.conn.available_floating_ip(network=network) + if server: + self.conn.compute.add_floating_ip_to_server(server, floating_ip["floating_ip_address"]) + return floating_ip + + def get_images(self): + """ + Get a generator able ot generate all images + @return: A generator able ot generate all images + """ + return self.conn.compute.images() + + def get_flavors(self): + """ + Get a generator able ot generate all flavors + @return: A generator able ot generate all flavors + """ + return self.conn.compute.flavors() diff --git a/documentation/images/actions.jpg b/documentation/images/actions.jpg new file mode 100644 index 000000000..d09040272 Binary files /dev/null and b/documentation/images/actions.jpg differ diff --git a/documentation/images/features/cloud_specification_data/ac_screen1.png b/documentation/images/features/cloud_specification_data/ac_screen1.png new file mode 100644 index 000000000..4c9ab50fa Binary files /dev/null and b/documentation/images/features/cloud_specification_data/ac_screen1.png differ diff --git a/documentation/images/features/cloud_specification_data/ac_screen2.png b/documentation/images/features/cloud_specification_data/ac_screen2.png new file mode 100644 index 000000000..1f4b25613 Binary files /dev/null and b/documentation/images/features/cloud_specification_data/ac_screen2.png differ diff --git a/documentation/images/features/cloud_specification_data/ac_screen3.png b/documentation/images/features/cloud_specification_data/ac_screen3.png new file mode 100644 index 000000000..f42a05f26 Binary files /dev/null and b/documentation/images/features/cloud_specification_data/ac_screen3.png differ diff --git a/documentation/images/features/cloud_specification_data/pw_screen1.png b/documentation/images/features/cloud_specification_data/pw_screen1.png new file mode 100644 index 000000000..7fb53987f Binary files /dev/null and b/documentation/images/features/cloud_specification_data/pw_screen1.png differ diff --git a/documentation/images/software/theia/theia.png b/documentation/images/software/theia/theia.png new file mode 100644 index 000000000..419f27658 Binary files /dev/null and b/documentation/images/software/theia/theia.png differ diff --git a/documentation/markdown/bibigrid_feature_list.md b/documentation/markdown/bibigrid_feature_list.md new file mode 100644 index 000000000..c5cae99ea --- /dev/null +++ b/documentation/markdown/bibigrid_feature_list.md @@ -0,0 +1,16 @@ +# BiBiGrid Features + +| Name | Purpose | +|:----------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------:| +| [Version](features/version.md) | Returns BiBiGrid's version for opening issues and the like | +| [Terminate Cluster](features/terminate_cluster.md) | Terminates the cluster specified by cluster-id i.e. removes key, application credentials, servers and floating-ips. | +| [Create](features/create.md) | Creates the cluster specified by the configuration. | + | [List Clusters](features/list_clusters.md) | Shows info of all clusters if no cluster-id is specified. Otherwise the cluster-id's cluster will be shown in great detail. | +| [Check](features/check.md) | Checks if given configuration is valid and necessary security measures are taken. | +| [Web IDE](features/ide.md) | Connects to running IDE of cluster-id's cluster. Requires that given cluster was setup with an ide. | +| [Update](features/update.md) | Updates the master's playbook and runs that playbook for the master. Requires that no job is running and no workers up. | +| [Cloud Specification Data](features/cloud_specification_data.md) | Contains necessary data to establish a general connection to the provider. | + | [Configuration](features/configuration.md) | Contains all data regarding cluster setup for all providers. | +| [Command Line Interface](features/CLI.md) | What command line arguments can be passed into BiBiGrid. | + +![](../images/actions.jpg) \ No newline at end of file diff --git a/documentation/markdown/bibigrid_software_list.md b/documentation/markdown/bibigrid_software_list.md new file mode 100644 index 000000000..d3c2d4d13 --- /dev/null +++ b/documentation/markdown/bibigrid_software_list.md @@ -0,0 +1,8 @@ +# BiBiGrid Used Software + +| Name | Purpose | Official Link | +|:------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:| +| [Ansible](software/ansible.md) | Ansible, an open source community project by Red Hat, enables the idempotent setup of servers. Ansible is used to **prepare** all cluster nodes. | [Getting started with Ansible](https://docs.ansible.com/ansible/latest/getting_started/index.html) | +| [Slurm](software/slurm.md) | Slurm is an open source cluster management and job scheduling system. Slurm is used to **schedule** cluster nodes i.e. Slurm will start and shutdown nodes as needed. | [Quick Start User Guide](https://slurm.schedmd.com/quickstart.html) | +| [Theia IDE](software/theia_ide.md) | Theia IDE is a Web IDE, build using the Theia Framework, that allows easy, intuitive and abstract **web access** to cluster nodes. Theia IDE is optional. | [Using "Theia" as an End User](https://theia-ide.org/docs/user_getting_started/) | +| [Zabbix](software/zabbix.md) | Zabbix is an open source **monitoring** solution for networks, servers, clouds, applications and services. Zabbix is optional. | [What is Zabbix](https://www.zabbix.com/documentation/current/en/manual/introduction/about) | \ No newline at end of file diff --git a/documentation/markdown/features/CLI.md b/documentation/markdown/features/CLI.md new file mode 100644 index 000000000..baca937db --- /dev/null +++ b/documentation/markdown/features/CLI.md @@ -0,0 +1,17 @@ +# CLI +Available command line parameters: +- `-h, --help` show help message and exit +- `-v, --verbose` Increases output verbosity (can be of great use when cluster fails to start). `-v` adds more detailed info to the logfile, `-vv` adds debug information to the logfile. +- `-d, --debug` Keeps cluster active in case of an error. Offers termination after successful create. +- `-i , --config_input (required)` Path to YAML configurations file. Relative paths can be used and start at `~/.config/bibigrid` +- `-cid , --cluster_id ` Cluster id is needed for ide and termination. If no cluster id is set, the last started cluster's id will be used (except for `list_clusters`). +## Mutually exclusive actions: choose exactly one +- `-V, --version` Displays version. +- `-t, --terminate_cluster` Terminates cluster. Needs cluster-id set. +- `-c, --create` Creates cluster. +- `-l, --list_clusters` Lists all running clusters. If cluster-id is + set, will list this cluster in detail only. +- `-ch, --check` Validates cluster configuration. +- `-ide, --ide` Establishes a secured connection to ide. + Needs cluster-id set. +- `-u, --update` Updates master's playbook. Needs cluster-id set, no job running and no workers powered up. \ No newline at end of file diff --git a/documentation/markdown/features/check.md b/documentation/markdown/features/check.md new file mode 100644 index 000000000..c92c8a814 --- /dev/null +++ b/documentation/markdown/features/check.md @@ -0,0 +1 @@ +# Check \ No newline at end of file diff --git a/documentation/markdown/features/cloud_specification_data.md b/documentation/markdown/features/cloud_specification_data.md new file mode 100644 index 000000000..c70a776db --- /dev/null +++ b/documentation/markdown/features/cloud_specification_data.md @@ -0,0 +1,76 @@ +# Cloud Specification Data +To access the cloud, authentication information is required. The BiBiGrid2 no longer uses environment variables, but a two file system instead. +`clouds.yaml` and `clouds-public.yaml` can be placed in `~/.config/bibigrid/` or `/etc/bibigrid/` and will be loaded by BiBiGrid2 on execution. +While you store your password and username in `clouds.yaml` (private), you can store all other information ready to share in `clouds-public.yaml` (shareable). +However, all information can just be stored in `clouds.yaml`. + +Keys set in `clouds.yaml` will overwrite keys from `clouds-public.yaml`. + +## Openstack +Be aware that the downloaded `clouds.yaml` file contains all information. +OpenStack does not split information into `clouds.yaml` and `clouds-public.yaml` on its own. +The example files show an example split. + +### Password Example +Using the password `clouds.yaml` is easy. However, since passwords - unlike [Application Credentials](#application-credentials-example) +don't have an expiration date, caution is advised. + +![Download](../../images/features/cloud_specification_data/pw_screen1.png) + +Move the downloaded file to `~/.config/bibigrid/` or `/etc/bibigrid/`. + +##### Password clouds.yaml +```yaml +clouds: + openstack: + profile: nameOfCloudsPublicYamlEntry + auth: + username: SamSampleman + password: SecurePassword +``` + +##### Password clouds-public.yaml +```yaml +public-clouds: + nameOfCloudsPublicYamlEntry: + auth: + auth_url: https://somelink:someport + project_id: someProjectId + project_name: someProjectName + user_domain_name: someDomainName + region_name: someRegionName + interface: "public" + identity_api_version: 3 +``` +### Application Credentials Example +The following show, how an Application Credential can be created and the related `clouds.yaml` downloaded. +Application Credentials are the preferred way of authentication since they do have an expiration date and +their access can be limited. + +![Navigation](../../images/features/cloud_specification_data/ac_screen1.png) +![Creation](../../images/features/cloud_specification_data/ac_screen2.png) +![Download](../../images/features/cloud_specification_data/ac_screen3.png) + +Move the downloaded file to `~/.config/bibigrid/` or `/etc/bibigrid/`. + +#### Application Credential clouds.yaml +```yaml +clouds: + openstack: + profile: nameOfCloudsPublicYamlEntry + auth: + application_credential_id: SomeID + application_credential_secret: SecureSecret +``` + +#### Application Credential clouds-public.yaml +```yaml +public-clouds: + nameOfCloudsPublicYamlEntry: + auth: + auth_url: https://somelink:someport + region_name: SomeRegion + interface: "public" + identity_api_version: 3 + auth_type: "v3applicationcredential" +``` \ No newline at end of file diff --git a/documentation/markdown/features/configuration.md b/documentation/markdown/features/configuration.md new file mode 100644 index 000000000..3e0309323 --- /dev/null +++ b/documentation/markdown/features/configuration.md @@ -0,0 +1,200 @@ +# Configuration + +The configuration file (often called `bibigrid.yml`) contains important information about cluster creation. +The cluster configuration holds a list of configurations where each configuration is assigned to a specific provider +(location). That allows cluster to stretch over multiple providers. The configuration file is best stored in +`~/.config/bibigrid/` since BiBiGrid starts its relative search there. + +## Configuration List +The first configuration is always the master's provider configuration. +Only the first configuration is allowed to have a master key. +Every following configuration describes a provider that is not the master's provider containing a number of worker and a +vpnwkr (vpn worker). The vpnwkr is a worker with a floating IP. That allows the master - that knows all vpnwkrs to access +all workers using the floating IP as an entry point into the other local networks. However, all that will be covered by +an abstraction layer using a virtual network. Therefore, end users can work on a spread cluster without noticing it. + +### Master Provider Configuration +As mentioned before, the first configuration has a master key. Apart from that it also holds all information that is - +simply put - true over the entire cluster. We also call those keys global. +Keys that belong only to a single provider configuration are called local. +For example whether the master works alongside the workers is a general fact. +Therefore, it is stored within the first configuration. The master provider configuration. + +## Keys + +### Global + +#### sshPublicKeyFiles (optional) +`sshPublicKeyFiles` expects a list of public keyfiles to be registered on every node. That allows you to grant access to +created clusters to the owners of the private keyfile. For example, you can add colleges public key to the list and allow +him to access your started cluster later on to debug it. + +#### masterMounts (optional) +`masterMounts` expects a list of volumes or snapshots that will then be mounted to the master. If any snapshots are +given, the related volumes are first created and then those volumes are used by BiBiGrid. Those volumes are not deleted +after Cluster termination. + +
+ + What is mounting? + + +[Mounting](https://man7.org/linux/man-pages/man8/mount.8.html) adds a new filesystem to the file tree allowing access. + +
+ + + +#### nfsShares (optional) +`nfsShares` expects a list of folder paths to share using nfs. In every case, `/vol/spool/` is always an nfsShare. +This key only makes sense if the [nfs key](#nfs) is set `True`. + +
+ +What is NFS? + + +NFS (Network File System) is a stable and well-functioning network protocol for exchanging files over the local network. +
+ +#### ansibleRoles (optional) +Yet to be explained. +``` + - file: SomeFile + hosts: SomeHosts + name: SomeName + vars: SomeVars + vars_file: SomeVarsFile +``` +#### ansibleGalaxyRoles (optional) +Yet to be explained. +``` + - hosts: SomeHost + name: SomeName + galaxy: SomeGalaxy + git: SomeGit + url: SomeURL + vars: SomeVars + vars_file: SomeVarsFile +``` + +#### localFS (optional) +This key helps some users to create a filesystem to their liking. It is not used in general. + +#### localDNSlookup (optional) +If `True`, master will store the link to his workers. This is called +[Local DNS Lookup](https://helpdeskgeek.com/networking/edit-hosts-file/). + +#### zabbix (optional) +If `True`, the monitoring solution [zabbix](https://www.zabbix.com/) will be installed on the master. + +#### nfs (optional) +If `True`, nfs is created. + +
+ +What is NFS? + + +NFS (Network File System) is a stable and well-functioning network protocol for exchanging files over the local network. +
+ +#### useMasterAsCompute (optional) +Default the master always works together with the workers on submitted jobs. If you set `useMasterWithPublicIp` + to `False` the master will instead no longer support the workers. + +#### waitForServices (optional): +Expects a list of services to wait for. This is required if your provider has any post-launch services. If not set, +seemingly random errors can occur when the service interrupts the ansible execution. Providers and their services are +listed on [de.NBI Wiki](https://cloud.denbi.de/wiki/) at `Computer Center Specific`. + +### Local + +#### infrastructure (required) +`infrastructure` sets the used provider implementation for this configuration. Currently only `openstack` is available. +Other infrastructures would be AWS and so on. + +#### cloud +`cloud` decides which entry in the `clouds.yaml` is used. +When using OpenStack the downloaded `clouds.yaml` is named `openstack` + +`cloud: openstack` + +#### workerInstances (optional) +`workerInstances` expects a list of workers to be used on this specific provider the configuration is for. +`Instances` are also called `servers`. + +``` +workerInstance: + - type: de.NBI tiny + image: Ubuntu 22.04 LTS (2022-10-14) + count: 2 +``` +- `type` sets the instance's hardware configuration. Also called `flavor` sometimes. +- `image` sets the bootable operating system to be installed on the instance. +- `count` sets how many workers of that `type` `image` combination are to be used by the cluster + +Find your active `images`: + +``` +openstack image list --os-cloud=openstack | grep active +``` + +Find your active `flavors`: + +``` +openstack flavor list --os-cloud=openstack +``` + +#### Master or vpnWorker? + +##### Master +Only in the first configuration and only one: +``` + masterInstance: + type: de.NBI tiny + image: Ubuntu 22.04 LTS (2022-10-14) +``` + +##### vpnWorker: +Exactly once in every configuration but the first: +``` + vpnWorker: + type: de.NBI tiny + image: Ubuntu 22.04 LTS (2022-10-14) +``` + +#### sshUser (required) +`sshUser` is the standard user of the installed images. For `Ubuntu 22.04` this would be `ubuntu`. + +#### region (required) +Every [region](https://docs.openstack.org/python-openstackclient/rocky/cli/command-objects/region.html) has its own +openstack deployment. Every [avilability zone](#availabilityzone-required) belongs to a region. + +Find your `regions`: +``` +openstack region list --os-cloud=openstack +``` + + +#### availabilityZone (required) +[availability zones](https://docs.openstack.org/nova/latest/admin/availability-zones.html) allow to logically group +nodes. + +Find your `availabilityZones`: +``` +openstack region list --os-cloud=openstack +``` + +#### subnet (required) +`subnet` is a block of ip addresses. + +Find available `subnets`: + +``` +openstack subnet list --os-cloud=openstack +``` + +#### localDNSLookup (optional) +If no full DNS service for started instances is available, set `localDNSLookup: True`. +Currently the case in Berlin, DKFZ, Heidelberg and Tuebingen. \ No newline at end of file diff --git a/documentation/markdown/features/create.md b/documentation/markdown/features/create.md new file mode 100644 index 000000000..6efe52f02 --- /dev/null +++ b/documentation/markdown/features/create.md @@ -0,0 +1,2 @@ +# Create +Temporary cluster keys will be stored in `~/.config/bibigrid/keys`. \ No newline at end of file diff --git a/documentation/markdown/features/ide.md b/documentation/markdown/features/ide.md new file mode 100644 index 000000000..6093e7468 --- /dev/null +++ b/documentation/markdown/features/ide.md @@ -0,0 +1,2 @@ +# Web IDE + diff --git a/documentation/markdown/features/list_clusters.md b/documentation/markdown/features/list_clusters.md new file mode 100644 index 000000000..0f8321173 --- /dev/null +++ b/documentation/markdown/features/list_clusters.md @@ -0,0 +1 @@ +# List Clusters \ No newline at end of file diff --git a/documentation/markdown/features/terminate_cluster.md b/documentation/markdown/features/terminate_cluster.md new file mode 100644 index 000000000..a47eb2894 --- /dev/null +++ b/documentation/markdown/features/terminate_cluster.md @@ -0,0 +1 @@ +# Terminate Cluster \ No newline at end of file diff --git a/documentation/markdown/features/update.md b/documentation/markdown/features/update.md new file mode 100644 index 000000000..3e9ff9ecf --- /dev/null +++ b/documentation/markdown/features/update.md @@ -0,0 +1 @@ +# Update \ No newline at end of file diff --git a/documentation/markdown/features/version.md b/documentation/markdown/features/version.md new file mode 100644 index 000000000..e04a043bb --- /dev/null +++ b/documentation/markdown/features/version.md @@ -0,0 +1 @@ +# Version \ No newline at end of file diff --git a/documentation/markdown/software/ansible.md b/documentation/markdown/software/ansible.md new file mode 100644 index 000000000..f7e02ac8a --- /dev/null +++ b/documentation/markdown/software/ansible.md @@ -0,0 +1,39 @@ +# Ansible + +## Ansible Tutorial +- [Ansible Workshop Presentation](https://docs.google.com/presentation/d/1W4jVHLT8dB1VsdtxXqtKlMqGbeyEWTQvSHh0WMfWo2c/edit#slide=id.p10) +- [de.NBI Cloud's Ansible Course](https://gitlab.ub.uni-bielefeld.de/denbi/ansible-course) + +## Executing BiBiGrid's Playbook Manually +Only execute BiBiGrid's playbook manually when no worker is up. The playbook is executed automatically for workers powering up. + +If you've implemented changes to BiBiGrid's playbook, you might want to execute BiBiGrid's playbook manually to see how +those changes play out. For this we need the preinstalled `bibigrid-playbook` command. However, BiBiGrid has a handy +shortcut for that called `bibiplay`. + +### bibiplay +To make things easier we wrote the [bibiplay](..%2F..%2F..%2Fresources%2Fbin%2Fbibiplay) wrapper. It's used like this: +```sh +bibiplay +``` +is the same as: +```sh +ansible-playbook /opt/playbook/site.yml /opt/playbook/ansible_hosts/ +``` +any additional arguments are passed to `ansible-playbook`: +```sh +bibiplay -l master +``` +is the same as: +```sh +ansible-playbook /opt/playbook/site.yml /opt/playbook/ansible_hosts/ -l master +``` + +### Useful commands +For more options see [ansible-playbook's manpage](https://linux.die.net/man/1/ansible-playbook). + + +| Summary | Command | +|:----------------------------------------------------------------:|:-----------------------------:| +| Prepare master manually | `bibiplay -l master` | +| Prepare only slurm on master manually | `bibiplay -l master -t slurm` | diff --git a/documentation/markdown/software/slurm.md b/documentation/markdown/software/slurm.md new file mode 100644 index 000000000..a97a2b3f6 --- /dev/null +++ b/documentation/markdown/software/slurm.md @@ -0,0 +1,21 @@ +# Slurm +Be aware that due to BiBiGrid's slurm configuration the default behavior of commands might differ slightly from slurm's defaults. +Everything described below explains how slurm will behave in BiBiGrid's context. + +## Slurm Client +### Useful commands +For more options see [slurm client's manpage](https://manpages.debian.org/testing/slurm-client/slurm-wlm.1). + +| Summary | Command | Explanation & Comment | +|:----------------------:|:----------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| List all present nodes | `sinfo` | Cloud nodes that are powered down are marked`~`. Knowing [Node State Codes](https://manpages.debian.org/testing/slurm-client/sinfo.1.en.html#NODE_STATE_CODES) helps a lot. | +| Shutdown an instance | `sudo scontrol update NodeName=[node-name] state=POWER_DOWN reason=[reason]` | Powers down the node. The instance will be deleted. | +| Powerup an instance | `sudo scontrol update NodeName=[node-name] state=POWER_UP reason=[reason]` | Powers up the node. An instance will be created. | +| Lists all running jobs | `squeue` | Allows you to see whether everything runs as expected. | + +### Read more + +| Summary | Explanation | +|:---------------------------------------------------------------------------------:|:--------------------------------------------:| +| [NODE STATE CODES](https://slurm.schedmd.com/sinfo.html#SECTION_NODE-STATE-CODES) | Very helpful to interpret `sinfo` correctly. | + diff --git a/documentation/markdown/software/theia_ide.md b/documentation/markdown/software/theia_ide.md new file mode 100644 index 000000000..92e9123b6 --- /dev/null +++ b/documentation/markdown/software/theia_ide.md @@ -0,0 +1,6 @@ +# Theia IDE +[Theia Web IDE's](https://www.theia-ide.org/) many features make it easier to work on your cloud instances. + +![Theia](../../images/software/theia/theia.png) +## Installing Python Syntax Highlighter + diff --git a/documentation/markdown/software/zabbix.md b/documentation/markdown/software/zabbix.md new file mode 100644 index 000000000..23df10393 --- /dev/null +++ b/documentation/markdown/software/zabbix.md @@ -0,0 +1,3 @@ +# Zabbix + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..2cbce46da --- /dev/null +++ b/requirements.txt @@ -0,0 +1,58 @@ +appdirs==1.4.4 +attrs==22.1.0 +autopage==0.5.1 +bcrypt==4.0.1 +certifi==2022.9.24 +cffi==1.15.1 +charset-normalizer==2.1.1 +cliff==4.1.0 +cmd2==2.4.2 +cryptography==38.0.4 +debtcollector==2.5.0 +decorator==5.1.1 +dogpile.cache==1.1.8 +idna==3.4 +importlib-metadata==5.1.0 +iso8601==1.1.0 +jmespath==1.0.1 +jsonpatch==1.32 +jsonpointer==2.3 +keystoneauth1==5.1.0 +mergedeep==1.3.4 +msgpack==1.0.4 +munch==2.5.0 +netaddr==0.8.0 +netifaces==0.11.0 +openstacksdk==0.103.0 +os-service-types==1.7.0 +osc-lib==2.6.2 +oslo.config==9.0.0 +oslo.i18n==5.1.0 +oslo.serialization==5.0.0 +oslo.utils==6.1.0 +packaging==21.3 +paramiko==2.12.0 +pbr==5.11.0 +prettytable==3.5.0 +pycparser==2.21 +PyNaCl==1.5.0 +pyparsing==3.0.9 +pyperclip==1.8.2 +python-cinderclient==9.1.0 +python-keystoneclient==5.0.1 +python-novaclient==18.2.0 +python-openstackclient==6.0.0 +pytz==2022.6 +PyYAML==6.0 +requests==2.28.1 +requestsexceptions==1.4.0 +rfc3986==2.0.0 +shortuuid==1.0.11 +simplejson==3.18.0 +six==1.16.0 +sshtunnel==0.4.0 +stevedore==4.1.1 +urllib3==1.26.13 +wcwidth==0.2.5 +wrapt==1.14.1 +zipp==3.11.0 diff --git a/resources/bin/bibigrid-hello-world.sh b/resources/bin/bibigrid-hello-world.sh new file mode 100755 index 000000000..4cd7c728e --- /dev/null +++ b/resources/bin/bibigrid-hello-world.sh @@ -0,0 +1,30 @@ +#!/bin/bash +exe() { echo "\$" "$@" ; "$@" ; } + +echo "Hello, World! This program will show very basic slurm scheduling." +echo "I) Only execute this just after logging in and without any prior changes" +echo "II) You need to have at least one worker in your configuration or this program will hang at some point." +echo "III) The master should be configured to work as well or this program will hang at some point." +read -n 1 -r -s -p $'Press enter to continue...\n' +echo "Let's see which servers are up using sinfo (slurm info)!" +exe sinfo +echo -e "\nOnly the master is up, since all other workers are configured, but not powered up ('~' is used for nodes that are powered down)." +echo "See here for more info about node states: https://slurm.schedmd.com/sinfo.html#SECTION_NODE-STATE-CODES" +read -n 1 -r -s -p $'Press enter to continue...\n' +echo -e "\nLet's execute the 'hostname' command:" +exe srun hostname +echo -e "\nAnd see if a server started" +exe sinfo +echo -e "\nSince the master is a worker, too, no need to start new workers." +read -n 1 -r -s -p $'Press enter to continue...\n' +echo -e "\nWhat if we need another server? Let's exclude $(hostname) for now using (-x node-name-to-exclude), so slurm has to power up a worker node." +echo "While it starts, open another terminal and execute 'squeue'. That will show you the running job." +echo "Also execute 'sinfo' that will show you the node is powering up ('#' is used for nodes that are powering up). But now let's start another node:" +start_time=$(date +%T) +exe srun -x "$(hostname)" hostname +echo "We triggered the power up at: $(date +%T). Now it's $start_time." +echo -e "\nLet's see what changed." +exe sinfo +echo "Now a worker powered up as we can see looking at 'sinfo'" +read -n 1 -r -s -p $'Press enter to continue...\n' +echo -e "\nWorkers that are not used will be shut down after a while." diff --git a/resources/bin/bibiplay b/resources/bin/bibiplay new file mode 100644 index 000000000..6b4d6148f --- /dev/null +++ b/resources/bin/bibiplay @@ -0,0 +1,3 @@ +#!/bin/bash +# allows for an easier execution of the ansible playbook no matter where you are +ansible-playbook /opt/playbook/site.yml -i /opt/playbook/ansible_hosts "$@" \ No newline at end of file diff --git a/resources/playbook/ansible.cfg b/resources/playbook/ansible.cfg new file mode 100644 index 000000000..ee5363109 --- /dev/null +++ b/resources/playbook/ansible.cfg @@ -0,0 +1,10 @@ +# This file is moved programmatically to /etc/ansible/ansible.cfg on the master so it shouldn't be moved manually +[defaults] +inventory = ./ansible_hosts +host_key_checking = False +forks=50 +pipelining = True +log_path=~/ansible.log +timeout = 60 +[ssh_connection] +ssh_args = -o ControlMaster=auto -o ControlPersist=60s \ No newline at end of file diff --git a/resources/playbook/roles/additional/example/meta/main.yml b/resources/playbook/roles/additional/example/meta/main.yml new file mode 100644 index 000000000..8ff216df2 --- /dev/null +++ b/resources/playbook/roles/additional/example/meta/main.yml @@ -0,0 +1,28 @@ +galaxy_info: + role_name: Hello-World Example + author: Tim Dilger + description: Shows working example of installing Ansible Role. + company: Bielefeld university, CeBiTec, BiBiServ + + license: BSD + + min_ansible_version: 2.7 + + platforms: + - name: EL + versions: + - 7 + - name: Debian + versions: + - stretch + - name: Ubuntu + versions: + - xenial + - bionic + + galaxy_tags: + - hello-world + +dependencies: [] + # List your role dependencies here, one per line. Be sure to remove the '[]' above, + # if you add dependencies to this list. diff --git a/resources/playbook/roles/additional/example/tasks/main.yml b/resources/playbook/roles/additional/example/tasks/main.yml new file mode 100644 index 000000000..63ea8e434 --- /dev/null +++ b/resources/playbook/roles/additional/example/tasks/main.yml @@ -0,0 +1,3 @@ +- debug: + msg: + - "Hello {{ ansible_user }}!" diff --git a/resources/playbook/roles/bibigrid/defaults/main.yml b/resources/playbook/roles/bibigrid/defaults/main.yml new file mode 100644 index 000000000..2702e6fbb --- /dev/null +++ b/resources/playbook/roles/bibigrid/defaults/main.yml @@ -0,0 +1,6 @@ +nvm_install_dir: /opt/nvm + +theia_version: "next" +theia_ide_install_dir: /opt/theia-ide +theia_ide_bind_address: localhost +theia_ide_bind_port: 8181 diff --git a/resources/playbook/roles/bibigrid/files/disable-auto-upgrades.conf b/resources/playbook/roles/bibigrid/files/disable-auto-upgrades.conf new file mode 100644 index 000000000..8717231ce --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/disable-auto-upgrades.conf @@ -0,0 +1,4 @@ +APT::Periodic::Update-Package-Lists "0"; +APT::Periodic::Download-Upgradeable-Packages "0"; +APT::Periodic::AutocleanInterval "0"; +APT::Periodic::Unattended-Upgrade "0"; \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/cgroup.conf b/resources/playbook/roles/bibigrid/files/slurm/cgroup.conf new file mode 100644 index 000000000..2b8ba9c6a --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/cgroup.conf @@ -0,0 +1,14 @@ +CgroupMountpoint="/sys/fs/cgroup" +CgroupAutomount=yes +CgroupReleaseAgentDir="/etc/slurm/cgroup" +AllowedDevicesFile="/etc/slurm/cgroup_allowed_devices_file.conf" +ConstrainCores=no +TaskAffinity=no +ConstrainRAMSpace=yes +ConstrainSwapSpace=no +ConstrainDevices=no +AllowedRamSpace=100 +AllowedSwapSpace=0 +MaxRAMPercent=100 +MaxSwapPercent=100 +MinRAMSpace=30 \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/cgroup_allowed_devices_file.conf b/resources/playbook/roles/bibigrid/files/slurm/cgroup_allowed_devices_file.conf new file mode 100644 index 000000000..471ad8cfd --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/cgroup_allowed_devices_file.conf @@ -0,0 +1,7 @@ +/dev/null +/dev/urandom +/dev/zero +/dev/sd* +/dev/vd* +/dev/cpu/*/* +/dev/pts/* \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/create.sh b/resources/playbook/roles/bibigrid/files/slurm/create.sh new file mode 100644 index 000000000..98df3be7f --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/create.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# redirect stderr and stdout +exec >> /var/log/slurm/create.out.log +exec 2>> /var/log/slurm/create.err.log + + +hosts=$(scontrol show hostnames "$1") + + +# create and configure requested instances +python3 /usr/local/bin/create_server.py "${hosts}" +exit $? \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/create_server.py b/resources/playbook/roles/bibigrid/files/slurm/create_server.py new file mode 100644 index 000000000..53e79005a --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/create_server.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +""" +Creates one or more instances from comma separated name list. +Is called automatically by create.sh (called by slurm user automatically) which sources a virtual environment. +""" +import logging +import math +from openstack.exceptions import OpenStackCloudException +import re +import sys +import time + +import ansible_runner +import os_client_config +import paramiko +import yaml + +LOGGER_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" +logging.basicConfig(format=LOGGER_FORMAT, filename="/var/log/slurm/create_server.log", level=logging.INFO) + +logging.info("create_server.py started") +start_time = time.time() + + +def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu", timeout=5): + """ + Waits until SSH connects successful. This guarantees that the node can be reached via Ansible. + @param private_ip: ip of node + @param private_key: private ssh key + @param username: username of node + @param timeout: how long to try + @return: + """ + # Wait for SSH Connection available + paramiko_key = paramiko.ECDSAKey.from_private_key_file(private_key) + with paramiko.SSHClient() as client: + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + attempts = 0 + establishing_connection = True + while establishing_connection: + try: + client.connect(hostname=private_ip, username=username, pkey=paramiko_key) + establishing_connection = False + except paramiko.ssh_exception.NoValidConnectionsError as exc: + logging.info("Attempting to connect to %s... This might take a while", private_ip) + if attempts < timeout: + time.sleep(2 ** attempts) + attempts += 1 + else: + logging.warning("Attempt to connect to %s failed.", private_ip) + raise ConnectionError from exc + + +def run_playbook(run_instances): + """ + Runs the BiBiGrid playbook for run_instances + @param run_instances: instances to run the playbook for + @return: + """ + logging.info("run_playbook with \ninstances: %s", run_instances) + + # cmdline_args = ["/opt/playbook/site.yml", '-i', '/opt/playbook/ansible_hosts', '-vvvv', '-l', instances] + cmdline_args = ["/opt/playbook/site.yml", '-i', '/opt/playbook/ansible_hosts', '-l', ",".join(instances)] + executable_cmd = '/usr/local/bin/ansible-playbook' + logging.info(f"run_command...\nexecutable_cmd: {executable_cmd}\ncmdline_args: {cmdline_args}") + + runner = ansible_runner.interface.init_command_config( + executable_cmd=executable_cmd, + cmdline_args=cmdline_args) + + runner.run() + runner_response = runner.stdout.read() + runner_error = runner.stderr.read() + return runner, runner_response, runner_error, runner.rc + + +if len(sys.argv) < 2: + logging.warning("usage: $0 instance1_name[,instance2_name,...]") + logging.info("Your input % with length %s", sys.argv, len(sys.argv)) + sys.exit(1) + +sdk = os_client_config.make_sdk(cloud="master") + +# read instances configuration +with open("/opt/playbook/vars/instances.yml", mode="r") as f: + worker_types = yaml.safe_load(f) + +# read common configuration +with open("/opt/playbook/vars/common_configuration.yml", mode="r") as f: + common_config = yaml.safe_load(f) + +instances = sys.argv[1].split("\n") +logging.info("Instances: %s", instances) + +server_list = [] +openstack_exception_list = [] +# Iterate over all names and search for a fitting ... +for worker in instances: + # ... worker_type + for worker_type in worker_types["workers"]: + if re.match(worker_type["regexp"], worker): + try: + logging.info("Create server %s.", worker) + # create server and ... + server = sdk.create_server( + name=worker, + flavor=worker_type["flavor"]["name"], + image=worker_type["image"], + network=worker_type["network"], + key_name=f"tempKey_bibi-{common_config['cluster_id']}", + wait=False) + # ... add it to server + server_list.append(server) + # ToDo Better handling, Check edge cases, ... + except OpenStackCloudException as exc: + logging.warning("While creating %s the OpenStackCloudException %s occurred. Worker ignored.", + worker, exc) + openstack_exception_list.append(worker) + +# ToDo implement better error handling +no_ssh_list = [] +return_list = [] +openstack_wait_exception_list = [] +for server in server_list: + try: + sdk.wait_for_server(server, auto_ip=False, timeout=600) + server = sdk.get_server(server["id"]) + except OpenStackCloudException as exc: + logging.warning("While creating %s the OpenStackCloudException %s occurred.", worker, exc) + openstack_wait_exception_list.append(server.name) + continue + logging.info("%s is active. Checking ssh", server.name) + try: + check_ssh_active(server.private_v4) + logging.info(f"Server {server.name} is {server.status}.") + return_list.append(server.name) + except ConnectionError as exc: + logging.warning(f"{exc}: Couldn't connect to {server.name}.") + no_ssh_list.append(server.name) + +# If no suitable server can be started: abort +if len(return_list) == 0: + logging.warning("No suitable server found! Abort!") + exit(1) + +logging.info("Call Ansible to configure instances.") +# run ansible +# ToDo: use https://ansible-runner.readthedocs.io/en/latest/ instead of subprocess +runnable_instances = ",".join(return_list) + +r, response, error, rc = run_playbook(runnable_instances) +logging.info("Ansible executed!") +unreachable_list = list(r.stats["dark"].keys()) +failed_list = list(r.stats["failures"].keys()) +overall_failed_list = unreachable_list + failed_list + no_ssh_list + openstack_wait_exception_list +if overall_failed_list or openstack_exception_list: + logging.warning(f"Openstack exception list: {openstack_exception_list}") + logging.warning(f"Unable to connect via ssh list: {no_ssh_list}") + logging.warning(f"Unreachable list: {unreachable_list}") + logging.warning(f"Failed list: {failed_list}") + logging.warning(f"Return code: {rc}") + for server_name in overall_failed_list: + logging.warning(f"Deleting server {server_name}: {sdk.delete_server(server_name)}") + logging.warning("Exit Code 1") + exit(1) +logging.info("Successful create_server.py execution!") +time_in_s = time.time() - start_time +logging.info(f"--- %s minutes and %s seconds ---", math.floor(time_in_s / 60), time_in_s % 60) +logging.info("Exit Code 0") +exit(0) diff --git a/resources/playbook/roles/bibigrid/files/slurm/fail.sh b/resources/playbook/roles/bibigrid/files/slurm/fail.sh new file mode 100644 index 000000000..b6fa68398 --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/fail.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# redirect stderr and stdout +exec >> /var/log/slurm/fail.out.log +exec 2>> /var/log/slurm/fail.err.log + +# $1 is in slurm node format for example: bibigrid-worker0-cid-[0-1],bibigrid-worker1-cid-0 and needs no converting +scontrol update NodeName="$1" state=RESUME reason=FailedStartup # no sudo needed cause executed by slurm user + +exit $? diff --git a/resources/playbook/roles/bibigrid/files/slurm/requirements.txt b/resources/playbook/roles/bibigrid/files/slurm/requirements.txt new file mode 100644 index 000000000..a0a23f56f --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/requirements.txt @@ -0,0 +1,5 @@ +python-openstackclient==6.0.0 +ipython +os_client_config +paramiko +ansible-runner \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_default b/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_default new file mode 100644 index 000000000..b6d2fd860 --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_default @@ -0,0 +1,9 @@ +# /etc/default/slurmrestd +# Additional options that are passed to the slurmrestd daemon +#SLURMRESTD_OPTIONS="" +SLURM_CONF="/etc/slurm/slurm.conf" +#SLURMRESTD_DEBUG="8" +SLURM_JWT="" +SLURMRESTD_LISTEN=":6820" +SLURMRESTD_AUTH_TYPES="rest_auth/jwt" +SLURMRESTD_OPENAPI_PLUGINS="openapi/v0.0.36" \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_override.conf b/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_override.conf new file mode 100644 index 000000000..eebbe66f7 --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/slurmrestd_override.conf @@ -0,0 +1,6 @@ +# Override systemd service ExecStart command to disable unixSocket of slurmrestd +[Unit] +After=slurmdbd.service +[Service] +ExecStart= +ExecStart=/usr/sbin/slurmrestd $SLURMRESTD_OPTIONS \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/terminate.sh b/resources/playbook/roles/bibigrid/files/slurm/terminate.sh new file mode 100644 index 000000000..ba6e6232a --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/slurm/terminate.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# redirect stderr and stdout +exec >> /var/log/slurm/terminate.out.log +exec 2>> /var/log/slurm/terminate.err.log + +function log { + echo "$(date) $*" +} + +log "Terminate invoked $0 $*" +# extract all hosts from argumentlist +hosts=$(scontrol show hostnames "$1") +for host in $hosts +do + # ToDo: Implement better logging in case of an error + log "Delete instance ${host} from Zabbix host list." + python3 /usr/local/bin/zabbix_host_delete.py --pwd bibigrid "${host}" + log "Terminate instance ${host}" + openstack --os-cloud master server delete "${host}" + log "done" +done diff --git a/resources/playbook/roles/bibigrid/files/zabbix/index.html b/resources/playbook/roles/bibigrid/files/zabbix/index.html new file mode 100644 index 000000000..076bd06a9 --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/zabbix/index.html @@ -0,0 +1,40 @@ + + + + BiBiGrid Overview + + + + + + + + +
+

BiBiGrid Overview

+

All you need on one page!

+
+ +
+
+
+

ZABBIX

+

What is ZABBIX?

+

Get to ZABBIX by clicking here

+

Username: Admin

+

Password: bibigrid

+
+
+

BiBiGrid

+

What is BiBiGrid? [old link]

+

You are already on a BiBiGrid-Cluster

+
+
+

Slurm

+

What is Slurm?

+
+
+
+ + + \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/zabbix/zabbix.conf b/resources/playbook/roles/bibigrid/files/zabbix/zabbix.conf new file mode 100644 index 000000000..17dac269b --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/zabbix/zabbix.conf @@ -0,0 +1,68 @@ +# Define /zabbix alias, this is the default + + Alias /zabbix /usr/share/zabbix + + + + Options FollowSymLinks + AllowOverride None + Order allow,deny + Allow from all + + + php_value max_execution_time 300 + php_value memory_limit 128M + php_value post_max_size 16M + php_value upload_max_filesize 2M + php_value max_input_time 300 + php_value max_input_vars 10000 + php_value always_populate_raw_post_data -1 + php_value date.timezone Europe/Berlin + + + php_value max_execution_time 300 + php_value memory_limit 128M + php_value post_max_size 16M + php_value upload_max_filesize 2M + php_value max_input_time 300 + php_value max_input_vars 10000 + php_value always_populate_raw_post_data -1 + php_value date.timezone Europe/Berlin + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/zabbix/zabbix_host_delete.py b/resources/playbook/roles/bibigrid/files/zabbix/zabbix_host_delete.py new file mode 100644 index 000000000..765355849 --- /dev/null +++ b/resources/playbook/roles/bibigrid/files/zabbix/zabbix_host_delete.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +from zabbix_api import ZabbixAPI, ZabbixAPIException +import argparse + +parser = argparse.ArgumentParser(description='Delete hosts from Zabbix host database.') +parser.add_argument("--url",help="Zabbix server url (default: http://127.0.0.1/zabbix )", type=str, default="http://127.0.0.1/zabbix") +parser.add_argument("--user",help="Zabbix user (default: Admin)",type=str, default="Admin") +parser.add_argument("--pwd",help="Zabbix user password",type=str, required=True) +parser.add_argument("hosts",help="List of Zabbix host names", metavar='host', type=str, nargs="+") + + +args = parser.parse_args() + +try: + zapi = ZabbixAPI(server=args.url) + zapi.login(args.user,args.pwd) + hosts = zapi.host.get({ + "filter": { + "host" : args.hosts + }, + "output": "hostid" + }) + + for host in hosts: + result = zapi.host.delete([host["hostid"]]) + print (f"Host[s] {','.join(result['hostids'])} removed from Zabbix database.") + +except ZabbixAPIException as e: + print(e) + exit(1) \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/handlers/main.yml b/resources/playbook/roles/bibigrid/handlers/main.yml new file mode 100644 index 000000000..fa4d54312 --- /dev/null +++ b/resources/playbook/roles/bibigrid/handlers/main.yml @@ -0,0 +1,50 @@ +- name: nfs-server + systemd: + name: nfs-kernel-server + state: restarted + +- name: docker + systemd: + name: docker + state: restarted + +- name: munge + systemd: + name: munge + state: restarted + +- name: slurmdbd + systemd: + name: slurmdbd + state: restarted + +- name: slurmrestd + systemd: + name: slurmrestd + state: restarted + daemon_reload: true + +- name: slurmctld + systemd: + name: slurmctld + state: restarted + +- name: slurmd + systemd: + name: slurmd + state: restarted + +- name: zabbix-agent + systemd: + name: zabbix-agent + state: restarted + +- name: zabbix-server + systemd: + name: zabbix-server + state: restarted + +- name: apache2 + systemd: + name: apache2 + state: restarted diff --git a/resources/playbook/roles/bibigrid/tasks/000-playbook-rights-server.yml b/resources/playbook/roles/bibigrid/tasks/000-playbook-rights-server.yml new file mode 100644 index 000000000..b57c5a54c --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/000-playbook-rights-server.yml @@ -0,0 +1,13 @@ +- name: Assure existence of ansible group + group: + name: ansible + state: present + +- name: Change mode of /opt/slurm directory + file: + owner: ubuntu + group: ansible + path: /opt/playbook/ + state: directory + recurse: true + mode: "0770" diff --git a/resources/playbook/roles/bibigrid/tasks/001-apt.yml b/resources/playbook/roles/bibigrid/tasks/001-apt.yml new file mode 100644 index 000000000..5d3b58f74 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/001-apt.yml @@ -0,0 +1,53 @@ +- name: Debian based system + debug: + msg: Using apt to install packages + +- name: Disable auto-update/upgrade during ansible-run + copy: + src: disable-auto-upgrades.conf + dest: /etc/apt/apt.conf.d/20auto-upgrades + owner: root + group: root + mode: 0644 + +- name: Wait for post-launch services to stop + service_facts: + register: result + until: result.ansible_facts.services['{{ item }}'].state == 'stopped' + retries: 12 + delay: 30 + with_items: + - "{{ wait_for_services }}" + when: wait_for_services is defined + +- name: Update + apt: + update_cache: "yes" + upgrade: "yes" + +- name: Install common packages + apt: + name: + - "apt-transport-https" + - "ca-certificates" + - "curl" + - "software-properties-common" + - "python3-pip" + - "python3-setuptools" + state: "present" + +- name: Add zabbix repositories + apt: + deb: "https://repo.zabbix.com/zabbix/6.2/{{ ansible_distribution | lower }}\ + /pool/main/z/zabbix-release/zabbix-release_6.2-1%2B{{ ansible_distribution | lower }}\ + {{ ansible_distribution_version }}_all.deb" + state: present + force: true + tags: + - skip_ansible_lint + +- name: Update apt cache + apt: + upgrade: 'yes' + update_cache: 'yes' + cache_valid_time: 86400 # One day diff --git a/resources/playbook/roles/bibigrid/tasks/001-yum.yml b/resources/playbook/roles/bibigrid/tasks/001-yum.yml new file mode 100644 index 000000000..5287f29ca --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/001-yum.yml @@ -0,0 +1,3 @@ +- name: RHEL based system + debug: + msg: Using yum to install packages diff --git a/resources/playbook/roles/bibigrid/tasks/004-hosts.yml b/resources/playbook/roles/bibigrid/tasks/004-hosts.yml new file mode 100644 index 000000000..041d32df7 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/004-hosts.yml @@ -0,0 +1,12 @@ +- name: Update the /etc/hosts file with master/worker informations + lineinfile: + path: "/etc/hosts" + regexp: "{{ item.private_v4 }}\t{{ item.name }}" + line: "{{ item.private_v4 }}\t{{ item.name }}" + state: present + backup: true + with_items: + - "{{ master }}" + - "{{ workers }}" + tags: + - skip_ansible_lint diff --git a/resources/playbook/roles/bibigrid/tasks/006-database.yml b/resources/playbook/roles/bibigrid/tasks/006-database.yml new file mode 100644 index 000000000..56155e2f9 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/006-database.yml @@ -0,0 +1,7 @@ +- name: Install maria-db-server + apt: + name: "mariadb-server" + +- name: Install PyMySQL via pip + pip: + name: pymysql diff --git a/resources/playbook/roles/bibigrid/tasks/010-bin-server.yml b/resources/playbook/roles/bibigrid/tasks/010-bin-server.yml new file mode 100644 index 000000000..f8635ef1f --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/010-bin-server.yml @@ -0,0 +1,23 @@ +# - name: Does folder exist +# local_action: stat path=~{{ ansible_facts.env.SUDO_USER }}/bin +# register: folder + +- name: Does folder exist + delegate_to: localhost + stat: + path: ~{{ ansible_facts.env.SUDO_USER }}/bin + register: folder + +- when: folder.stat.exists + block: + - name: Copy bin folder with owner and permission + copy: + src: ~/bin + dest: /usr/local + owner: ubuntu + group: ubuntu + mode: '0774' + - name: Delete origin folder + file: + path: ~{{ ansible_facts.env.SUDO_USER }}/bin + state: absent diff --git a/resources/playbook/roles/bibigrid/tasks/011-zabbix-agent.yml b/resources/playbook/roles/bibigrid/tasks/011-zabbix-agent.yml new file mode 100644 index 000000000..890aaf339 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/011-zabbix-agent.yml @@ -0,0 +1,103 @@ +- name: Install zabbix python-api + pip: + name: zabbix-api + +- name: Install zabbix agent + apt: + name: zabbix-agent + state: present + when: "ansible_distribution_file_variety == 'Debian'" + +- name: Install zabbix agent + dnf: + name: zabbix-agent + state: present + when: "ansible_distribution_file_variety == 'RedHat'" + +- name: Create zabbix_agent dropin directory + file: + path: /etc/zabbix/zabbix_agentd.d/ + state: directory + mode: 0755 + +- name: Create zabbix_agent log directory + file: + path: /var/log/zabbix + state: directory + owner: zabbix + group: zabbix + mode: 0755 + +- name: Adjust zabbix agent configuration + template: + src: zabbix/zabbix_agentd.conf.j2 + dest: /etc/zabbix/zabbix_agentd.conf + mode: 0644 + notify: zabbix-agent + +- name: Start and Enable zabbix-agent + systemd: + name: zabbix-agent + state: started + enabled: true + +- name: Install zabbix python-api + pip: + name: zabbix-api + +- name: Copy Zabbix Host delete script + copy: + src: zabbix/zabbix_host_delete.py + dest: /usr/local/bin/zabbix_host_delete.py + mode: 0755 + +# -------------------------------------- +# -- Add worker node as zabbix hosts -- +# -------------------------------------- + +- name: Add worker node to zabbix hosts + zabbix_host: + server_url: "http://127.0.0.1/zabbix/" + login_user: Admin + login_password: "{{ zabbix_conf.admin_password }}" + host_name: "{{ ansible_hostname }}" + visible_name: "{{ ansible_hostname }}" + description: add by ansible + status: enabled + state: present + host_groups: + - 'Linux servers' + link_templates: + - 'Linux by Zabbix Agent' + interfaces: + - type: 1 # agent + main: 1 # default + ip: "{{ ansible_default_ipv4.address }}" + useip: 1 # connect using host IP address + port: 10050 + become: false + when: "'master' not in group_names" + delegate_to: localhost + +- name: Add master node to zabbix hosts + zabbix_host: + server_url: "http://127.0.0.1/zabbix/" + login_user: Admin + login_password: "{{ zabbix_conf.admin_password }}" + host_name: "{{ ansible_hostname }}" + visible_name: "{{ ansible_hostname }}" + description: add by ansible + status: enabled + state: present + host_groups: + - 'Linux servers' + link_templates: + - 'Linux by Zabbix Agent' + interfaces: + - type: 1 # agent + main: 1 # default + ip: "127.0.0.1" + useip: 1 # connect using host IP address + port: 10050 + become: false + when: "'master' in group_names" diff --git a/resources/playbook/roles/bibigrid/tasks/011-zabbix-server.yml b/resources/playbook/roles/bibigrid/tasks/011-zabbix-server.yml new file mode 100644 index 000000000..ea830b4d1 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/011-zabbix-server.yml @@ -0,0 +1,151 @@ +# ------------------- +# -- Zabbix Server -- +# ------------------- + +- name: Debian based OS (Install database & Zabbix server) + apt: + name: + - mariadb-server + - zabbix-server-mysql + - zabbix-sql-scripts + state: present + update_cache: true + when: "ansible_distribution_file_variety == 'Debian' or ansible_distribution == 'Ubuntu'" + +- name: REHL based OS (Install database & Zabbix server) + dnf: + name: + - mariadb + - zabbix-server-mysql + - zabbix-sql-scripts + state: present + when: "ansible_distribution_file_variety == 'RedHat' " + +- name: Install python modul 'PyMySQL' + pip: + name: PyMySQL + +- name: Create zabbix database + mysql_db: + name: "{{ zabbix_conf.db }}" + encoding: 'utf8' + state: present + login_user: root + login_unix_socket: /run/mysqld/mysqld.sock + +- name: Create zabbix database user + mysql_user: + name: "{{ zabbix_conf.db_user }}" + password: "{{ zabbix_conf.db_password }}" + priv: '{{ zabbix_conf.db }}.*:ALL' + login_user: root + login_unix_socket: /run/mysqld/mysqld.sock + +- name: Check if zabbix schema exists + shell: "echo describe users | mysql --user={{ zabbix_conf.db_user }} --password={{ zabbix_conf.db_password }} zabbix" + ignore_errors: true + changed_when: false + register: zabbix_schema_exists + +- name: Import initial db schema + shell: "set -o pipefail && zcat /usr/share/zabbix-sql-scripts/mysql/server.sql.gz | \ + mysql --user={{ zabbix_conf.db_user }} --password={{ zabbix_conf.db_password }} zabbix" + when: zabbix_schema_exists.rc == 1 + args: + executable: bash + tags: + - skip_ansible_lint + +- name: Update Admin password + mysql_query: + login_db: "{{ zabbix_conf.db }}" + login_user: "{{ zabbix_conf.db_user }}" + login_password: "{{ zabbix_conf.db_password }}" + query: update users set passwd='{{ zabbix_conf.admin_password | password_hash("bcrypt") }}' where username='Admin' + +- name: Adjust zabbix server configuration + template: + src: zabbix/zabbix_server.conf.j2 + dest: /etc/zabbix/zabbix_server.conf + mode: 0644 + notify: zabbix-server + +- name: Start and Enable zabbix-server + systemd: + name: zabbix-server + state: started + enabled: true + +# ------------------------- +# -- Zabbix Web Frontend -- +# ------------------------- + +- name: Zabbix Web Frontend related packages for Debian based OS + when: "ansible_distribution_file_variety == 'Debian'" + block: + - name: Install apache web server and php + apt: + name: + - apache2 + - php + state: present + + - name: Install zabbix web frontend + apt: + name: + - zabbix-frontend-php + - zabbix-apache-conf + state: present + +- name: Zabbix Web Frontend related packages for REHL based OS + when: "ansible_distribution_file_variety == 'RedHat'" + block: + - name: Install apache web server + dnf: + name: apache2 + state: present + + - name: Install zabbix web frontend + dnf: + name: + - zabbix-frontend-php + - zabbix-apache-conf + state: present + + +- name: Create Zabbix web dir (if not exists) + file: + path: /etc/zabbix/web + state: directory + owner: root + group: root + mode: '0755' + +- name: Adjust zabbix web frontend configuration + notify: apache2 + block: + - name: Adjust apache.conf + template: + src: zabbix/apache.conf.j2 + dest: /etc/zabbix/apache.conf + mode: 0644 + - name: Adjust zabbix.conf + template: + src: zabbix/zabbix.conf.php.j2 + dest: /etc/zabbix/web/zabbix.conf.php + mode: 0644 + +- name: Start and enable apache web server + systemd: + name: apache2 + state: started + enabled: true + +- name: Change default page of apache + copy: + src: zabbix/index.html + dest: /var/www/html/index.html + mode: 0644 + +- name: Force all notified handlers to run at this point + meta: flush_handlers diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk-server.yml b/resources/playbook/roles/bibigrid/tasks/020-disk-server.yml new file mode 100644 index 000000000..e267dc684 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/020-disk-server.yml @@ -0,0 +1,18 @@ +- name: Create /vol bind mount from /mnt ephemeral + mount: + path: /vol + src: /mnt + fstype: none + opts: bind,auto + state: mounted + when: master.flavor.ephemeral > 0 + +- name: Mount disks + mount: + path: "{{ item.dst }}" + src: "{{ item.src }}" + fstype: ext4 + state: mounted + with_items: + - "{{ master.disks }}" + when: master.disks is defined diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk-worker.yml b/resources/playbook/roles/bibigrid/tasks/020-disk-worker.yml new file mode 100644 index 000000000..77f31b16f --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/020-disk-worker.yml @@ -0,0 +1,14 @@ +- when: "'ephemeral' in group_names" + block: + - name: Mount ephemeral + mount: + path: /vol/scratch + src: /mnt + fstype: none + opts: bind,auto + state: mounted + - name: Set 0777 rights for ephemeral mount + file: + path: /vol/scratch + state: directory + mode: 0777 diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk.yml b/resources/playbook/roles/bibigrid/tasks/020-disk.yml new file mode 100644 index 000000000..08b4b9802 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/020-disk.yml @@ -0,0 +1,35 @@ +- name: Generate directory structure available on all hosts + file: + path: /etc/bibigrid + state: directory + owner: root + group: '{{ ansible_distribution | lower }}' + mode: 0775 + +- name: Create /vol/ directory with rights 0775 owned by root + file: + path: /vol/ + state: directory + owner: root + group: '{{ ansible_distribution | lower }}' + mode: 0775 + +- name: Create /vol/spool/ directory with rights 0777 + file: + path: /vol/spool/ + state: directory + mode: 0777 + +- name: Change rights of /opt directory to 0775 and set group to ansible_distribution + file: + path: /opt + state: directory + owner: root + group: '{{ ansible_distribution | lower }}' + mode: 0775 + +- name: Create link in '{{ ansible_distribution | lower }}' home + file: + src: /vol/ + dest: '/home/{{ ansible_distribution | lower }}/vol' + state: link diff --git a/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yml b/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yml new file mode 100644 index 000000000..78cb082ee --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yml @@ -0,0 +1,25 @@ +- name: Install NFS server package + apt: + name: "nfs-kernel-server" + state: present + +- name: Create export directories + file: + path: "{{ item.src }}" + state: directory + owner: root + group: root + mode: 0777 + with_items: + - "{{ nfs_mounts }}" + +- name: Configure nfs exports + lineinfile: + path: /etc/exports + state: present + regexp: '^{{ item.src }}' + line: '{{ item.src }} {{ cluster_cidrs[0].provider_cidrs[0] }}(rw,nohide,insecure,no_subtree_check,async)' + with_items: + - "{{ nfs_mounts }}" + notify: + - nfs-server diff --git a/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yml b/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yml new file mode 100644 index 000000000..539d7f01e --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yml @@ -0,0 +1,30 @@ +- name: Install NFS common package + apt: + name: "nfs-common" + state: present + +- name: Wait for nfs-server available + wait_for: + host: "{{ master.private_v4 }}" + port: 2049 + delay: 2 + state: started + +- name: Create mount points + file: + path: "{{ item.dst }}" + state: directory + owner: root + group: root + mode: 0777 + with_items: + - "{{ nfs_mounts }}" + +- name: Mount shares + mount: + path: "{{ item.dst }}" + src: "{{ master.private_v4 }}:{{ item.src }}" + fstype: nfs4 + state: mounted + with_items: + - "{{ nfs_mounts }}" diff --git a/resources/playbook/roles/bibigrid/tasks/030-docker.yml b/resources/playbook/roles/bibigrid/tasks/030-docker.yml new file mode 100644 index 000000000..07830b2b4 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/030-docker.yml @@ -0,0 +1,40 @@ +- name: Install Docker + apt: + name: docker.io + state: present + tags: install + +- name: Configure Docker + copy: + content: |- + { + "mtu":{{ ansible_default_ipv4.mtu }} + } + dest: /etc/docker/daemon.json + owner: root + group: root + mode: 0644 + notify: + docker + +- name: Create docker group and change GID + group: + name: docker + gid: 1234 + state: present + +- name: Append ubuntu user to group docker + user: + name: ubuntu + append: true + groups: docker + # Add when condition in case of debian + +- name: Install docker-api via PIP + pip: + name: ['docker', 'docker-compose'] # reevaluate + +- name: (Re-)start docker socket + systemd: + name: docker.socket + state: restarted diff --git a/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yml b/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yml new file mode 100644 index 000000000..bae35efaf --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yml @@ -0,0 +1,214 @@ +- name: Create slurm db + mysql_db: + name: "{{ slurm_conf.db }}" + state: present + login_unix_socket: /var/run/mysqld/mysqld.sock + +- name: Create slurm db user + mysql_user: + name: "{{ slurm_conf.db_user }}" + password: "{{ slurm_conf.db_password }}" + priv: '*.*:ALL' + state: present + login_unix_socket: /var/run/mysqld/mysqld.sock + +- name: Install Slurm database and RestAPI packages + apt: + name: + - slurmdbd + - slurmrestd + +- name: Create slurmdb configuration file + template: + src: slurm/slurmdbd.conf + dest: /etc/slurm/slurmdbd.conf + owner: slurm + group: root + mode: "0600" + notify: + - slurmdbd + - slurmctld + +- name: Generate random JWT Secret + command: + cmd: "dd if=/dev/random of=/etc/slurm/jwt-secret.key bs=32 count=1" + creates: "/etc/slurm/jwt-secret.key" # only run the command when file is not present + +- name: Change file Properties of JWT Secret file + file: + path: /etc/slurm/jwt-secret.key + owner: slurm + group: slurm + mode: "0600" + +- name: Copy env file for configuration of slurmrestd + copy: + src: slurm/slurmrestd_default + dest: /etc/default/slurmrestd + owner: root + group: root + mode: "0644" + notify: + - slurmdbd + - slurmrestd + +- name: Create Service Directory + file: + path: /etc/systemd/system/slurmrestd.service.d + group: root + owner: root + mode: "0755" + state: directory + +- name: Copy systemd Service override file + copy: + src: slurm/slurmrestd_override.conf + dest: /etc/systemd/system/slurmrestd.service.d/override.conf + mode: "0644" + owner: root + group: root + notify: + - slurmrestd + +- name: Start slurm explicit after all dependencies are configured + systemd: + name: slurmctld + state: started + +- name: Register Slurm users home dir + shell: "set -o pipefail && grep slurm /etc/passwd | cut -d ':' -f 6" + register: slurm_home + changed_when: false + args: + executable: bash + +# - name: Install python3-virtualenv +# apt: +# name: python3-virtualenv +# state: present + +- name: Change mode of /opt/slurm directory + file: + owner: slurm + group: ansible + path: /opt/slurm/ + state: directory + mode: "0770" + +# - name: Create slurm venv in opt +# command: +# cmd: virtualenv /opt/slurm/venv -p python3.10 +# creates: "/opt/slurm/venv" + +- name: Ensures /etc/slurm dir exists + file: + path: /etc/slurm/ + state: directory + mode: 0755 + +- name: Ensures /opt/slurm/.ssh/ dir exists + file: + path: /opt/slurm/.ssh/ + group: slurm + owner: slurm + state: directory + mode: 0700 + +- name: Copy private key (openstack keypair) + copy: + src: ~/.ssh/id_ecdsa + dest: /opt/slurm/.ssh/id_ecdsa + owner: slurm + group: slurm + mode: "0600" + +- name: Copy create program script (power) + copy: + src: slurm/create.sh + dest: /opt/slurm/create.sh + owner: slurm + group: ansible + mode: "0550" + +- name: Copy terminate program script (power) + copy: + src: slurm/terminate.sh + dest: /opt/slurm/terminate.sh + owner: slurm + group: ansible + mode: "0550" + +- name: Copy fail program script (power) + copy: + src: slurm/fail.sh + dest: /opt/slurm/fail.sh + owner: slurm + group: ansible + mode: "0550" + +- name: Copy "create_server.py" script + copy: + src: slurm/create_server.py + dest: /usr/local/bin/create_server.py + owner: slurm + group: ansible + mode: "0750" + + +# - name: Copy requirements to correct location +# copy: +# src: slurm/requirements.txt +# dest: /opt/slurm/requirements.txt +# #owner: slurm +# group: ansible +# mode: "0550" + +# - name: Install requirements +# pip: requirements=/opt/slurm/requirements.txt +# virtualenv=/opt/slurm/venv + +- name: Install python dependencies + pip: + name: + - python-openstackclient==6.0.0 + - os_client_config + - paramiko + - ansible-runner + +- name: 'Add default user to ansible group' + user: + name: '{{ ssh_user }}' + append: true + groups: + - ansible + + +- when: slurm_home.stdout != '/opt/slurm' + block: + - name: Stop Slurm Services + systemd: + name: "{{ item }}" + state: stopped + with_items: + - slurmctld + - slurmd + - slurmdbd + - slurmrestd + - name: Add slurm user to ansible and give slurm user a home + user: + name: slurm + append: true + create_home: true + groups: + - ansible + home: /opt/slurm + shell: /bin/false + - name: Start Slurm Services + systemd: + name: "{{ item }}" + state: started + with_items: + - slurmctld + - slurmd + - slurmdbd + - slurmrestd diff --git a/resources/playbook/roles/bibigrid/tasks/042-slurm.yml b/resources/playbook/roles/bibigrid/tasks/042-slurm.yml new file mode 100644 index 000000000..5d6b3cf2c --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/042-slurm.yml @@ -0,0 +1,54 @@ +- name: Install Slurm base packages and dependencies + apt: + name: + - slurm-wlm + - munge + +- name: Create new secret (Munge) + copy: + content: '{{ slurm_conf.munge_key }}' + dest: /etc/munge/munge.key + owner: munge + group: munge + mode: 0600 + notify: + - munge + +- name: Disable slurmctld (on worker) + systemd: + name: slurmctld + enabled: false + when: "'master' not in group_names" + +- name: SLURM configuration + template: + src: slurm/slurm.conf + dest: /etc/slurm/slurm.conf + owner: slurm + group: root + mode: 0444 + notify: + - slurmctld + - slurmd + +- name: SLURM cgroup configuration + copy: + src: slurm/cgroup.conf + dest: /etc/slurm/cgroup.conf + owner: slurm + group: root + mode: 0444 + notify: + - slurmctld + - slurmd + +- name: SLURM cgroup allowed devices conf + copy: + src: slurm/cgroup_allowed_devices_file.conf + dest: /etc/slurm/cgroup_allowed_devices_file.conf + owner: root + group: root + mode: 0444 + notify: + - slurmctld + - slurmd diff --git a/resources/playbook/roles/bibigrid/tasks/999-theia.yml b/resources/playbook/roles/bibigrid/tasks/999-theia.yml new file mode 100644 index 000000000..578c41294 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/999-theia.yml @@ -0,0 +1,110 @@ +- name: Install packages + apt: + name: ["python3", "make", "g++", "curl", "git", "pkg-config", "libsecret-1-dev"] + when: ansible_distribution_file_variety == 'Debian' + +- name: Install packages + yum: + name: ["python3", "curl", "git", "make", "gcc", "gcc-c++"] + when: ansible_distribution_file_variety == 'RedHat' + +- name: Create NVM install dir + file: + path: "{{ nvm_install_dir }}" + state: directory + mode: 0755 + +- name: Set fact 'theia_ide_user' when not defined + set_fact: + theia_ide_user: "{{ ansible_env.SUDO_USER }}" + when: theia_ide_user is not defined + +- when: ide_conf.workspace is defined and ide_conf.workspace != "${HOME}" + block: + - name: Check if given workspace exists + stat: + path: "{{ ide_conf.workspace }}" + register: workspace_path + + - name: Set fact 'theia_ide_workspace' when defined workspace exists + set_fact: + theia_ide_workspace: "{{ ide_conf.workspace }}" + when: workspace_path.stat.exists and workspace.stat.isDir + + +- name: Set fact 'theia_ide_workspace' to '${HOME}' when not defined + set_fact: + theia_ide_workspace: "${HOME}" + when: theia_ide_conf.workspace is not defined + +- name: Install nvm + shell: | + set -o pipefail && curl -o- https://raw.githubusercontent.com/creationix/nvm/v0.39.1/install.sh | NVM_DIR={{ nvm_install_dir }} /bin/bash + args: + creates: "{{ nvm_install_dir }}/nvm.sh" + executable: bash + warn: false + +- name: Install node 16 and yarn + shell: | + source {{ nvm_install_dir }}/nvm.sh + nvm install 16 + npm install -g yarn + args: + executable: bash + tags: + - skip_ansible_lint + +- when: not ide_conf.build + block: + - name: Install prebuild theia + shell: | + curl -o- https://bibiserv.cebitec.uni-bielefeld.de/resources/bibigrid/plugins/theia-ide_node16.tar.gz | tar -xzf - + args: + chdir: "/opt" + creates: "{{ theia_ide_install_dir }}" + tags: + - skip_ansible_lint + +- when: ide_conf.build|default(false)|bool + block: + - name: Create IDE build dir + file: + path: "{{ theia_ide_install_dir }}" + state: directory + mode: 0755 + + - name: Copy IDE configuration to IDE build dir + template: + src: theia/package.json.j2 + dest: "{{ theia_ide_install_dir }}/package.json" + mode: 0644 + + - name: Build ide + shell: | + source {{ nvm_install_dir }}/nvm.sh + yarn + yarn theia build + args: + chdir: "{{ theia_ide_install_dir }}" + executable: bash + tags: + - skip_ansible_lint + +- name: Generate IDE start skript to /usr/local/bin + template: + src: theia/theia-ide.sh.j2 + dest: "{{ theia_ide_install_dir }}/theia-ide.sh" + mode: 0755 + +- name: Generate systemd service + template: + src: theia/theia-ide.service.j2 + dest: /etc/systemd/system/theia-ide.service + mode: 0644 + +- name: Enable and Start service + systemd: + name: theia-ide.service + enabled: true + state: started diff --git a/resources/playbook/roles/bibigrid/tasks/main.yml b/resources/playbook/roles/bibigrid/tasks/main.yml new file mode 100644 index 000000000..01a949e10 --- /dev/null +++ b/resources/playbook/roles/bibigrid/tasks/main.yml @@ -0,0 +1,124 @@ +# - name: 'YUM - async task' +# yum: +# name: docker-io +# state: installed +# async: 1000 +# poll: 0 +# register: yum_sleeper +# +# - name: 'YUM - check on async task' +# async_status: +# jid: "{{ yum_sleeper.ansible_job_id }}" +# register: job_result +# until: job_result.finished +# retries: 30 + +- name: Setup common software and dependencies - server rights + when: "'master' in group_names" + block: + - name: Running 000-playbook-rights-server.yml + debug: + msg: "[BIBIGRID] Update permissions" + - import_tasks: 000-playbook-rights-server.yml + +- name: Setup common software and dependencies for Debian + when: "ansible_distribution_file_variety == 'Debian'" + tags: ["pkg"] + block: + - name: Running 001-apt.yml + debug: + msg: "[BIBIGRID] Setup common software and dependencies" + - import_tasks: 001-apt.yml + +- name: Setup common software and dependencies for RedHat + when: "ansible_distribution_file_variety == 'RedHat'" + tags: ["pkg"] + block: + - debug: + msg: "[BIBIGRID] Setup common software and dependencies" + - import_tasks: 001-yum.yml + +- name: Add all hosts (IP/AC_NAME) to /etc/hosts for local DNS lockup. + when: local_dns_lookup|default(false)|bool + tags: ['hosts'] + block: + - debug: + msg: "[BIBIGRID] Add all hosts (IP/AC_NAME) to /etc/hosts for local DNS lockup." + - import_tasks: 004-hosts.yml + +- name: Configure database + when: "'master' in group_names" + tags: ["database", "slurm"] + block: + - debug: + msg: "[BIBIGRID] Configure database" + - import_tasks: 006-database.yml + +- name: Setup additional binary executables /usr/local/bin/ + when: + - "'master' in group_names" + tags: ["bin"] + block: + - debug: + msg: "[BIBIGRID] Setup additional binary executables /usr/local/bin/" + - import_tasks: 010-bin-server.yml + +- name: Setup Zabbix Agent + when: + - enable_zabbix|default(false)|bool + tags: ["zabbix"] + block: + - debug: + msg: "[BIBIGRID] Setup Zabbix Agent" + - import_tasks: 011-zabbix-server.yml + when: "'master' in group_names" + - import_tasks: 011-zabbix-agent.yml + +- debug: + msg: "[BIBIGRID] Generate directory structure available on all hosts" +- name: Generate general directory structure available on all hosts + import_tasks: 020-disk.yml +- name: Generate server directory structure available on all hosts + import_tasks: 020-disk-server.yml + when: "'master' in group_names" +- name: Generate worker directory structure available on all hosts + import_tasks: 020-disk-worker.yml + when: "'master' not in group_names" + tags: ["disk"] + +- name: Setup NFS + when: + - enable_nfs|default(false)|bool + tags: ["nfs"] + block: + - debug: + msg: "[BIBIGRID] Setup NFS" + - import_tasks: 025-nfs-server.yml + when: "'master' in group_names" + - import_tasks: 025-nfs-worker.yml + when: "'master' not in group_names" + +- name: Setup Docker + debug: + msg: "[BIBIGRID] Setup Docker" +- import_tasks: 030-docker.yml + tags: ["docker"] + +- name: Setup Slurm + tags: ["slurm"] + block: + - debug: + msg: "[BIBIGRID] Setup Slurm" + - import_tasks: 042-slurm.yml + - import_tasks: 042-slurm-server.yml + when: "'master' in group_names" + +- name: Setup Theia + when: + - enable_ide|default(false)|bool + - "'master' in group_names" + tags: ["theia"] + block: + - debug: + msg: "[BIBIGRID] Setup Theia" + - import_tasks: 999-theia.yml diff --git a/resources/playbook/roles/bibigrid/templates/slurm/slurm.conf b/resources/playbook/roles/bibigrid/templates/slurm/slurm.conf new file mode 100644 index 000000000..c448386aa --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/slurm/slurm.conf @@ -0,0 +1,99 @@ +# MANAGEMENT POLICIES +SlurmctldHost={{ master.name | lower }} +AuthType=auth/munge +CryptoType=crypto/munge +SlurmUser=slurm +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=/etc/slurm/jwt-secret.key + +ClusterName=bibigrid + +MpiDefault=none +ProctrackType=proctrack/linuxproc +ReturnToService=2 +SwitchType=switch/none +TaskPlugin=task/none +#TaskPlugin=task/cgroup +JobAcctGatherType=jobacct_gather/linux + +# see https://slurm.schedmd.com/slurm.conf.html#OPT_cloud_dns:~:text=for%20additional%20details.-,cloud_dns,-By%20default%2C%20Slurm +# SlurmctldParameters=cloud_dns +# Funktioniert nicht wie vermutet. slurmctld versucht mit diesem Paramter schon beim Start alle Clients aufzulösen, +# was natürlich nicht funktioniert. + +# PRIORITY +PriorityType=priority/multifactor +PriorityFavorSmall=NO +PriorityWeightJobSize=100000 +AccountingStorageTRES=cpu,mem,gres/gpu +PriorityWeightTRES=cpu=1000,mem=2000,gres/gpu=3000 + +# STATE INFO +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid + +# CONNECTION +SlurmctldPort=6817 +SlurmdPort=6818 + +# DIRECTORIES +#JobCheckpointDir=/var/lib/slurm/job_checkpoint +SlurmdSpoolDir=/var/lib/slurm/slurmd +StateSaveLocation=/var/lib/slurm/state_checkpoint + +# TIMERS +InactiveLimit=0 +KillWait=30 +MinJobAge=300 +SlurmctldTimeout=120 +SlurmdTimeout=300 +Waittime=0 + +# SCHEDULING +SchedulerType=sched/backfill +SelectType=select/cons_tres +SelectTypeParameters=CR_Core + +# ACCOUNTING +AccountingStorageType=accounting_storage/slurmdbd +AccountingStoreFlags=job_comment +AccountingStorageHost={{ master.name | lower }} +AccountingStorageUser={{ slurm_conf.db_user }} + +# LOGGING +SlurmctldDebug=info +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdDebug=info +SlurmdLogFile=/var/log/slurm/slurmd.log + +# COMPUTE NODES +{% set mem = master.flavor.ram // 1024 * 1000 %} +NodeName={{ master.name }} SocketsPerBoard={{ master.flavor.vcpus }} CoresPerSocket=1 RealMemory={{ mem - [mem // 2, 16000] | min }} State=UNKNOWN +{% set sl = [] %} +{% for type in workers %} +{% set mem = type.flavor.ram // 1024 * 1000 %} +NodeName={{ type.name }} SocketsPerBoard={{ type.flavor.vcpus }} CoresPerSocket=1 RealMemory={% if mem < 16001 %}{{ mem - [ mem // 16, 512] | max }}{% endif %}{% if mem > 16000 %}{{ mem - [mem // 16, 4000] | min }}{% endif %} State=CLOUD {{ sl.append(type.name)}} +{% endfor %} + +PartitionName=bibigrid Nodes={% if use_master_as_compute %}{{master.name | lower}},{%endif%}{{sl|join(",")}} default=yes + + +# POWER /ELASTIC SCHEDULING +ResumeProgram=/opt/slurm/create.sh +# Resume time is 15 minutes (900 seconds) +ResumeTimeout= {{ slurm_conf.elastic_scheduling.ResumeTimeout }} +SuspendProgram=/opt/slurm/terminate.sh +# Suspend time is 10 minutes (600 seconds) +SuspendTime= {{ slurm_conf.elastic_scheduling.SuspendTime }} +# Excludes {{ master.name }} from suspend +SuspendExcNodes={{ master.name }} +# Maximum number of nodes +TreeWidth= {{ slurm_conf.elastic_scheduling.TreeWidth }} +# Do not cache dns names +CommunicationParameters=NoAddrCache +# Mark node status idle on suspend so DOWN is removed +SlurmctldParameters=idle_on_node_suspend +# Show slurm nodes all the time +PrivateData=cloud +# return node to idle when startup fails +ResumeFailProgram=/opt/slurm/fail.sh diff --git a/resources/playbook/roles/bibigrid/templates/slurm/slurmdbd.conf b/resources/playbook/roles/bibigrid/templates/slurm/slurmdbd.conf new file mode 100644 index 000000000..b663a3fba --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/slurm/slurmdbd.conf @@ -0,0 +1,28 @@ +ArchiveEvents=yes +ArchiveJobs=yes +ArchiveResvs=yes +ArchiveSteps=no +ArchiveSuspend=no +ArchiveTXN=no +ArchiveUsage=no +#ArchiveScript=/usr/sbin/slurm.dbd.archive +AuthInfo=/var/run/munge/munge.socket.2 +AuthType=auth/munge +DbdHost={{ master.name | lower }} +DebugLevel=debug +PurgeEventAfter=1month +PurgeJobAfter=1month +PurgeResvAfter=1month +PurgeStepAfter=1month +PurgeSuspendAfter=1month +PurgeTXNAfter=1month +PurgeUsageAfter=1month +LogFile=/var/log/slurmdbd.log +PidFile=/var/run/slurmdbd.pid +SlurmUser=slurm +StorageLoc={{ slurm_conf.db }} +StoragePass={{ slurm_conf.db_password }} +StorageType=accounting_storage/mysql +StorageUser={{ slurm_conf.db_user }} +StoragePort=3306 +StorageHost=127.0.0.1 \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/theia/package.json.j2 b/resources/playbook/roles/bibigrid/templates/theia/package.json.j2 new file mode 100644 index 000000000..02037c347 --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/theia/package.json.j2 @@ -0,0 +1,41 @@ +{ + "private": true, + "dependencies": { + "@theia/callhierarchy": "{{ theia_version }}", + "@theia/file-search": "{{ theia_version }}", + "@theia/git": "{{ theia_version }}", + "@theia/markers": "{{ theia_version }}", + "@theia/messages": "{{ theia_version }}", + "@theia/navigator": "{{ theia_version }}", + "@theia/outline-view": "{{ theia_version }}", + "@theia/plugin-ext-vscode": "{{ theia_version }}", + "@theia/preferences": "{{ theia_version }}", + "@theia/preview": "{{ theia_version }}", + "@theia/search-in-workspace": "{{ theia_version }}", + "@theia/terminal": "{{ theia_version }}", + "@theia/vsx-registry": "{{ theia_version }}" + }, + "devDependencies": { + "@theia/cli": "{{ theia_version }}" + }, + "scripts": { + "prepare": "yarn run clean && yarn build && yarn run download:plugins", + "clean": "theia clean", + "build": "theia build ", + "start": "theia start --plugins=local-dir:plugins", + "download:plugins": "theia download:plugins" + }, + "theiaPluginsDir": "plugins", + "theiaPlugins": { + "vscode-builtin-extensions-pack": "https://open-vsx.org/api/eclipse-theia/builtin-extension-pack/1.50.1/file/eclipse-theia.builtin-extension-pack-1.50.1.vsix" + }, + "theiaPluginsExcludeIds": [ + "ms-vscode.js-debug-companion", + "vscode.extension-editing", + "vscode.git", + "vscode.git-ui", + "vscode.github", + "vscode.github-authentication", + "vscode.microsoft-authentication" + ] +} \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/theia/theia-ide.service.j2 b/resources/playbook/roles/bibigrid/templates/theia/theia-ide.service.j2 new file mode 100644 index 000000000..2381198b6 --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/theia/theia-ide.service.j2 @@ -0,0 +1,14 @@ +[Unit] +Description=Theia-IDE service for user {{ theia_ide_user }} +After=network.target +StartLimitIntervalSec=0 + +[Service] +Type=simple +Restart=always +RestartSec=1 +User={{ theia_ide_user }} +ExecStart={{ theia_ide_install_dir }}/theia-ide.sh {{ theia_ide_workspace }} {{ theia_ide_bind_address }} {{ theia_ide_bind_port }} + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/theia/theia-ide.sh.j2 b/resources/playbook/roles/bibigrid/templates/theia/theia-ide.sh.j2 new file mode 100644 index 000000000..ac74caaa3 --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/theia/theia-ide.sh.j2 @@ -0,0 +1,5 @@ +#!/bin/bash + +source {{ nvm_install_dir }}/nvm.sh +cd $(dirname ${0}) +yarn theia start ${1} --hostname ${2} --port ${3} \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/zabbix/apache.conf.j2 b/resources/playbook/roles/bibigrid/templates/zabbix/apache.conf.j2 new file mode 100644 index 000000000..567bb70cb --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/zabbix/apache.conf.j2 @@ -0,0 +1,68 @@ +# Define /zabbix alias, this is the default + + Alias /zabbix /usr/share/zabbix + + + + Options FollowSymLinks + AllowOverride None + Order allow,deny + Allow from all + + + php_value max_execution_time 300 + php_value memory_limit 128M + php_value post_max_size 16M + php_value upload_max_filesize 2M + php_value max_input_time 300 + php_value max_input_vars 10000 + php_value always_populate_raw_post_data -1 + php_value date.timezone {{ zabbix_conf.timezone }} + + + php_value max_execution_time 300 + php_value memory_limit 128M + php_value post_max_size 16M + php_value upload_max_filesize 2M + php_value max_input_time 300 + php_value max_input_vars 10000 + php_value always_populate_raw_post_data -1 + php_value date.timezone {{ zabbix_conf.timezone }} + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + + + + Order deny,allow + Deny from all + + Order deny,allow + Deny from all + + diff --git a/resources/playbook/roles/bibigrid/templates/zabbix/zabbix.conf.php.j2 b/resources/playbook/roles/bibigrid/templates/zabbix/zabbix.conf.php.j2 new file mode 100644 index 000000000..18720d99a --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/zabbix/zabbix.conf.php.j2 @@ -0,0 +1,19 @@ + | # @ +# Additionally, newline characters are not allowed. +# 0 - do not allow +# 1 - allow +# +# Mandatory: no +# Range: 0-1 +# Default: +# UnsafeUserParameters=0 + +### Option: UserParameter +# User-defined parameter to monitor. There can be several user-defined parameters. +# Format: UserParameter=, +# See 'zabbix_agentd' directory for examples. +# +# Mandatory: no +# Default: +# UserParameter= + +####### LOADABLE MODULES ####### + +### Option: LoadModulePath +# Full path to location of agent modules. +# Default depends on compilation options. +# To see the default path run command "zabbix_agentd --help". +# +# Mandatory: no +# Default: +# LoadModulePath=${libdir}/modules + +### Option: LoadModule +# Module to load at agent startup. Modules are used to extend functionality of the agent. +# Format: LoadModule= +# The modules must be located in directory specified by LoadModulePath. +# It is allowed to include multiple LoadModule parameters. +# +# Mandatory: no +# Default: +# LoadModule= + +####### TLS-RELATED PARAMETERS ####### + +### Option: TLSConnect +# How the agent should connect to server or proxy. Used for active checks. +# Only one value can be specified: +# unencrypted - connect without encryption +# psk - connect using TLS and a pre-shared key +# cert - connect using TLS and a certificate +# +# Mandatory: yes, if TLS certificate or PSK parameters are defined (even for 'unencrypted' connection) +# Default: +# TLSConnect=unencrypted + +### Option: TLSAccept +# What incoming connections to accept. +# Multiple values can be specified, separated by comma: +# unencrypted - accept connections without encryption +# psk - accept connections secured with TLS and a pre-shared key +# cert - accept connections secured with TLS and a certificate +# +# Mandatory: yes, if TLS certificate or PSK parameters are defined (even for 'unencrypted' connection) +# Default: +# TLSAccept=unencrypted + +### Option: TLSCAFile +# Full pathname of a file containing the top-level CA(s) certificates for +# peer certificate verification. +# +# Mandatory: no +# Default: +# TLSCAFile= + +### Option: TLSCRLFile +# Full pathname of a file containing revoked certificates. +# +# Mandatory: no +# Default: +# TLSCRLFile= + +### Option: TLSServerCertIssuer +# Allowed server certificate issuer. +# +# Mandatory: no +# Default: +# TLSServerCertIssuer= + +### Option: TLSServerCertSubject +# Allowed server certificate subject. +# +# Mandatory: no +# Default: +# TLSServerCertSubject= + +### Option: TLSCertFile +# Full pathname of a file containing the agent certificate or certificate chain. +# +# Mandatory: no +# Default: +# TLSCertFile= + +### Option: TLSKeyFile +# Full pathname of a file containing the agent private key. +# +# Mandatory: no +# Default: +# TLSKeyFile= + +### Option: TLSPSKIdentity +# Unique, case sensitive string used to identify the pre-shared key. +# +# Mandatory: no +# Default: +# TLSPSKIdentity= + +### Option: TLSPSKFile +# Full pathname of a file containing the pre-shared key. +# +# Mandatory: no +# Default: +# TLSPSKFile= diff --git a/resources/playbook/roles/bibigrid/templates/zabbix/zabbix_server.conf.j2 b/resources/playbook/roles/bibigrid/templates/zabbix/zabbix_server.conf.j2 new file mode 100644 index 000000000..3969bb3ee --- /dev/null +++ b/resources/playbook/roles/bibigrid/templates/zabbix/zabbix_server.conf.j2 @@ -0,0 +1,727 @@ +# This is a configuration file for Zabbix server daemon +# To get more information about Zabbix, visit http://www.zabbix.com + +############ GENERAL PARAMETERS ################# + +### Option: ListenPort +# Listen port for trapper. +# +# Mandatory: no +# Range: 1024-32767 +# Default: +# ListenPort=10051 + +### Option: SourceIP +# Source IP address for outgoing connections. +# +# Mandatory: no +# Default: +# SourceIP= + +### Option: LogType +# Specifies where log messages are written to: +# system - syslog +# file - file specified with LogFile parameter +# console - standard output +# +# Mandatory: no +# Default: +# LogType=file + +### Option: LogFile +# Log file name for LogType 'file' parameter. +# +# Mandatory: yes, if LogType is set to file, otherwise no +# Default: +# LogFile= + +LogFile=/var/log/zabbix/zabbix_server.log + +### Option: LogFileSize +# Maximum size of log file in MB. +# 0 - disable automatic log rotation. +# +# Mandatory: no +# Range: 0-1024 +# Default: +# LogFileSize=1 + +LogFileSize=0 + +### Option: DebugLevel +# Specifies debug level: +# 0 - basic information about starting and stopping of Zabbix processes +# 1 - critical information +# 2 - error information +# 3 - warnings +# 4 - for debugging (produces lots of information) +# 5 - extended debugging (produces even more information) +# +# Mandatory: no +# Range: 0-5 +# Default: +# DebugLevel=3 + +### Option: PidFile +# Name of PID file. +# +# Mandatory: no +# Default: +# PidFile=/tmp/zabbix_server.pid + +PidFile=/var/run/zabbix/zabbix_server.pid + +### Option: SocketDir +# IPC socket directory. +# Directory to store IPC sockets used by internal Zabbix services. +# +# Mandatory: no +# Default: +# SocketDir=/tmp + +SocketDir=/var/run/zabbix + +### Option: DBHost +# Database host name. +# If set to localhost, socket is used for MySQL. +# If set to empty string, socket is used for PostgreSQL. +# +# Mandatory: no +# Default: +# DBHost=localhost + +### Option: DBName +# Database name. +# +# Mandatory: yes +# Default: +# DBName= + +DBName={{ zabbix_conf.db }} + +### Option: DBSchema +# Schema name. Used for IBM DB2 and PostgreSQL. +# +# Mandatory: no +# Default: +# DBSchema= + +### Option: DBUser +# Database user. +# +# Mandatory: no +# Default: +# DBUser= + +DBUser={{ zabbix_conf.db_user }} + +### Option: DBPassword +# Database password. +# Comment this line if no password is used. +# +# Mandatory: no +# Default: +# DBPassword= + +DBPassword={{ zabbix_conf.db_password }} + +### Option: DBSocket +# Path to MySQL socket. +# +# Mandatory: no +# Default: +# DBSocket= + +### Option: DBPort +# Database port when not using local socket. +# +# Mandatory: no +# Range: 1024-65535 +# Default: +# DBPort= + +### Option: HistoryStorageURL +# History storage HTTP[S] URL. +# +# Mandatory: no +# Default: +# HistoryStorageURL= + +### Option: HistoryStorageTypes +# Comma separated list of value types to be sent to the history storage. +# +# Mandatory: no +# Default: +# HistoryStorageTypes=uint,dbl,str,log,text + +### Option: HistoryStorageDateIndex +# Enable preprocessing of history values in history storage to store values in different indices based on date. +# 0 - disable +# 1 - enable +# +# Mandatory: no +# Default: +# HistoryStorageDateIndex=0 + +### Option: ExportDir +# Directory for real time export of events, history and trends in newline delimited JSON format. +# If set, enables real time export. +# +# Mandatory: no +# Default: +# ExportDir= + +### Option: ExportFileSize +# Maximum size per export file in bytes. +# Only used for rotation if ExportDir is set. +# +# Mandatory: no +# Range: 1M-1G +# Default: +# ExportFileSize=1G + +############ ADVANCED PARAMETERS ################ + +### Option: StartPollers +# Number of pre-forked instances of pollers. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartPollers=5 + +### Option: StartIPMIPollers +# Number of pre-forked instances of IPMI pollers. +# The IPMI manager process is automatically started when at least one IPMI poller is started. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartIPMIPollers=0 + +### Option: StartPreprocessors +# Number of pre-forked instances of preprocessing workers. +# The preprocessing manager process is automatically started when preprocessor worker is started. +# +# Mandatory: no +# Range: 1-1000 +# Default: +# StartPreprocessors=3 + +### Option: StartPollersUnreachable +# Number of pre-forked instances of pollers for unreachable hosts (including IPMI and Java). +# At least one poller for unreachable hosts must be running if regular, IPMI or Java pollers +# are started. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartPollersUnreachable=1 + +### Option: StartTrappers +# Number of pre-forked instances of trappers. +# Trappers accept incoming connections from Zabbix sender, active agents and active proxies. +# At least one trapper process must be running to display server availability and view queue +# in the frontend. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartTrappers=5 + +### Option: StartPingers +# Number of pre-forked instances of ICMP pingers. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartPingers=1 + +### Option: StartDiscoverers +# Number of pre-forked instances of discoverers. +# +# Mandatory: no +# Range: 0-250 +# Default: +# StartDiscoverers=1 + +### Option: StartHTTPPollers +# Number of pre-forked instances of HTTP pollers. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartHTTPPollers=1 + +### Option: StartTimers +# Number of pre-forked instances of timers. +# Timers process maintenance periods. +# Only the first timer process handles host maintenance updates. Problem suppression updates are shared +# between all timers. +# +# Mandatory: no +# Range: 1-1000 +# Default: +# StartTimers=1 + +### Option: StartEscalators +# Number of pre-forked instances of escalators. +# +# Mandatory: no +# Range: 0-100 +# Default: +# StartEscalators=1 + +### Option: StartAlerters +# Number of pre-forked instances of alerters. +# Alerters send the notifications created by action operations. +# +# Mandatory: no +# Range: 0-100 +# Default: +# StartAlerters=3 + +### Option: JavaGateway +# IP address (or hostname) of Zabbix Java gateway. +# Only required if Java pollers are started. +# +# Mandatory: no +# Default: +# JavaGateway= + +### Option: JavaGatewayPort +# Port that Zabbix Java gateway listens on. +# +# Mandatory: no +# Range: 1024-32767 +# Default: +# JavaGatewayPort=10052 + +### Option: StartJavaPollers +# Number of pre-forked instances of Java pollers. +# +# Mandatory: no +# Range: 0-1000 +# Default: +# StartJavaPollers=0 + +### Option: StartVMwareCollectors +# Number of pre-forked vmware collector instances. +# +# Mandatory: no +# Range: 0-250 +# Default: +# StartVMwareCollectors=0 + +### Option: VMwareFrequency +# How often Zabbix will connect to VMware service to obtain a new data. +# +# Mandatory: no +# Range: 10-86400 +# Default: +# VMwareFrequency=60 + +### Option: VMwarePerfFrequency +# How often Zabbix will connect to VMware service to obtain performance data. +# +# Mandatory: no +# Range: 10-86400 +# Default: +# VMwarePerfFrequency=60 + +### Option: VMwareCacheSize +# Size of VMware cache, in bytes. +# Shared memory size for storing VMware data. +# Only used if VMware collectors are started. +# +# Mandatory: no +# Range: 256K-2G +# Default: +# VMwareCacheSize=8M + +### Option: VMwareTimeout +# Specifies how many seconds vmware collector waits for response from VMware service. +# +# Mandatory: no +# Range: 1-300 +# Default: +# VMwareTimeout=10 + +### Option: SNMPTrapperFile +# Temporary file used for passing data from SNMP trap daemon to the server. +# Must be the same as in zabbix_trap_receiver.pl or SNMPTT configuration file. +# +# Mandatory: no +# Default: +# SNMPTrapperFile=/tmp/zabbix_traps.tmp + +SNMPTrapperFile=/var/log/snmptrap/snmptrap.log + +### Option: StartSNMPTrapper +# If 1, SNMP trapper process is started. +# +# Mandatory: no +# Range: 0-1 +# Default: +# StartSNMPTrapper=0 + +### Option: ListenIP +# List of comma delimited IP addresses that the trapper should listen on. +# Trapper will listen on all network interfaces if this parameter is missing. +# +# Mandatory: no +# Default: +# ListenIP=0.0.0.0 + +# ListenIP=127.0.0.1 + +### Option: HousekeepingFrequency +# How often Zabbix will perform housekeeping procedure (in hours). +# Housekeeping is removing outdated information from the database. +# To prevent Housekeeper from being overloaded, no more than 4 times HousekeepingFrequency +# hours of outdated information are deleted in one housekeeping cycle, for each item. +# To lower load on server startup housekeeping is postponed for 30 minutes after server start. +# With HousekeepingFrequency=0 the housekeeper can be only executed using the runtime control option. +# In this case the period of outdated information deleted in one housekeeping cycle is 4 times the +# period since the last housekeeping cycle, but not less than 4 hours and not greater than 4 days. +# +# Mandatory: no +# Range: 0-24 +# Default: +# HousekeepingFrequency=1 + +### Option: MaxHousekeeperDelete +# The table "housekeeper" contains "tasks" for housekeeping procedure in the format: +# [housekeeperid], [tablename], [field], [value]. +# No more than 'MaxHousekeeperDelete' rows (corresponding to [tablename], [field], [value]) +# will be deleted per one task in one housekeeping cycle. +# If set to 0 then no limit is used at all. In this case you must know what you are doing! +# +# Mandatory: no +# Range: 0-1000000 +# Default: +# MaxHousekeeperDelete=5000 + +### Option: CacheSize +# Size of configuration cache, in bytes. +# Shared memory size for storing host, item and trigger data. +# +# Mandatory: no +# Range: 128K-8G +# Default: +# CacheSize=8M + +### Option: CacheUpdateFrequency +# How often Zabbix will perform update of configuration cache, in seconds. +# +# Mandatory: no +# Range: 1-3600 +# Default: +# CacheUpdateFrequency=60 + +### Option: StartDBSyncers +# Number of pre-forked instances of DB Syncers. +# +# Mandatory: no +# Range: 1-100 +# Default: +# StartDBSyncers=4 + +### Option: HistoryCacheSize +# Size of history cache, in bytes. +# Shared memory size for storing history data. +# +# Mandatory: no +# Range: 128K-2G +# Default: +# HistoryCacheSize=16M + +### Option: HistoryIndexCacheSize +# Size of history index cache, in bytes. +# Shared memory size for indexing history cache. +# +# Mandatory: no +# Range: 128K-2G +# Default: +# HistoryIndexCacheSize=4M + +### Option: TrendCacheSize +# Size of trend cache, in bytes. +# Shared memory size for storing trends data. +# +# Mandatory: no +# Range: 128K-2G +# Default: +# TrendCacheSize=4M + +### Option: ValueCacheSize +# Size of history value cache, in bytes. +# Shared memory size for caching item history data requests. +# Setting to 0 disables value cache. +# +# Mandatory: no +# Range: 0,128K-64G +# Default: +# ValueCacheSize=8M + +### Option: Timeout +# Specifies how long we wait for agent, SNMP device or external check (in seconds). +# +# Mandatory: no +# Range: 1-30 +# Default: +# Timeout=3 + +Timeout=4 + +### Option: TrapperTimeout +# Specifies how many seconds trapper may spend processing new data. +# +# Mandatory: no +# Range: 1-300 +# Default: +# TrapperTimeout=300 + +### Option: UnreachablePeriod +# After how many seconds of unreachability treat a host as unavailable. +# +# Mandatory: no +# Range: 1-3600 +# Default: +# UnreachablePeriod=45 + +### Option: UnavailableDelay +# How often host is checked for availability during the unavailability period, in seconds. +# +# Mandatory: no +# Range: 1-3600 +# Default: +# UnavailableDelay=60 + +### Option: UnreachableDelay +# How often host is checked for availability during the unreachability period, in seconds. +# +# Mandatory: no +# Range: 1-3600 +# Default: +# UnreachableDelay=15 + +### Option: AlertScriptsPath +# Full path to location of custom alert scripts. +# Default depends on compilation options. +# To see the default path run command "zabbix_server --help". +# +# Mandatory: no +# Default: +# AlertScriptsPath=${datadir}/zabbix/alertscripts + +AlertScriptsPath=/usr/lib/zabbix/alertscripts + +### Option: ExternalScripts +# Full path to location of external scripts. +# Default depends on compilation options. +# To see the default path run command "zabbix_server --help". +# +# Mandatory: no +# Default: +# ExternalScripts=${datadir}/zabbix/externalscripts + +ExternalScripts=/usr/lib/zabbix/externalscripts + +### Option: FpingLocation +# Location of fping. +# Make sure that fping binary has root ownership and SUID flag set. +# +# Mandatory: no +# Default: +# FpingLocation=/usr/sbin/fping + +FpingLocation=/usr/bin/fping + +### Option: Fping6Location +# Location of fping6. +# Make sure that fping6 binary has root ownership and SUID flag set. +# Make empty if your fping utility is capable to process IPv6 addresses. +# +# Mandatory: no +# Default: +# Fping6Location=/usr/sbin/fping6 + +Fping6Location=/usr/bin/fping6 + +### Option: SSHKeyLocation +# Location of public and private keys for SSH checks and actions. +# +# Mandatory: no +# Default: +# SSHKeyLocation= + +### Option: LogSlowQueries +# How long a database query may take before being logged (in milliseconds). +# Only works if DebugLevel set to 3, 4 or 5. +# 0 - don't log slow queries. +# +# Mandatory: no +# Range: 1-3600000 +# Default: +# LogSlowQueries=0 + +LogSlowQueries=3000 + +### Option: TmpDir +# Temporary directory. +# +# Mandatory: no +# Default: +# TmpDir=/tmp + +### Option: StartProxyPollers +# Number of pre-forked instances of pollers for passive proxies. +# +# Mandatory: no +# Range: 0-250 +# Default: +# StartProxyPollers=1 + +### Option: ProxyConfigFrequency +# How often Zabbix Server sends configuration data to a Zabbix Proxy in seconds. +# This parameter is used only for proxies in the passive mode. +# +# Mandatory: no +# Range: 1-3600*24*7 +# Default: +# ProxyConfigFrequency=3600 + +### Option: ProxyDataFrequency +# How often Zabbix Server requests history data from a Zabbix Proxy in seconds. +# This parameter is used only for proxies in the passive mode. +# +# Mandatory: no +# Range: 1-3600 +# Default: +# ProxyDataFrequency=1 + +### Option: AllowRoot +# Allow the server to run as 'root'. If disabled and the server is started by 'root', the server +# will try to switch to the user specified by the User configuration option instead. +# Has no effect if started under a regular user. +# 0 - do not allow +# 1 - allow +# +# Mandatory: no +# Default: +# AllowRoot=0 + +### Option: User +# Drop privileges to a specific, existing user on the system. +# Only has effect if run as 'root' and AllowRoot is disabled. +# +# Mandatory: no +# Default: +# User=zabbix + +### Option: Include +# You may include individual files or all files in a directory in the configuration file. +# Installing Zabbix will create include directory in /usr/local/etc, unless modified during the compile time. +# +# Mandatory: no +# Default: +# Include= + +# Include=/usr/local/etc/zabbix_server.general.conf +# Include=/usr/local/etc/zabbix_server.conf.d/ +# Include=/usr/local/etc/zabbix_server.conf.d/*.conf + +### Option: SSLCertLocation +# Location of SSL client certificates. +# This parameter is used only in web monitoring. +# Default depends on compilation options. +# To see the default path run command "zabbix_server --help". +# +# Mandatory: no +# Default: +# SSLCertLocation=${datadir}/zabbix/ssl/certs + +### Option: SSLKeyLocation +# Location of private keys for SSL client certificates. +# This parameter is used only in web monitoring. +# Default depends on compilation options. +# To see the default path run command "zabbix_server --help". +# +# Mandatory: no +# Default: +# SSLKeyLocation=${datadir}/zabbix/ssl/keys + +### Option: SSLCALocation +# Override the location of certificate authority (CA) files for SSL server certificate verification. +# If not set, system-wide directory will be used. +# This parameter is used only in web monitoring and SMTP authentication. +# +# Mandatory: no +# Default: +# SSLCALocation= + +### Option: StatsAllowedIP +# List of comma delimited IP addresses, optionally in CIDR notation, or DNS names of external Zabbix instances. +# Stats request will be accepted only from the addresses listed here. If this parameter is not set no stats requests +# will be accepted. +# If IPv6 support is enabled then '127.0.0.1', '::127.0.0.1', '::ffff:127.0.0.1' are treated equally +# and '::/0' will allow any IPv4 or IPv6 address. +# '0.0.0.0/0' can be used to allow any IPv4 address. +# Example: StatsAllowedIP=127.0.0.1,192.168.1.0/24,::1,2001:db8::/32,zabbix.example.com +# +# Mandatory: no +# Default: +# StatsAllowedIP= + +####### LOADABLE MODULES ####### + +### Option: LoadModulePath +# Full path to location of server modules. +# Default depends on compilation options. +# To see the default path run command "zabbix_server --help". +# +# Mandatory: no +# Default: +# LoadModulePath=${libdir}/modules + +### Option: LoadModule +# Module to load at server startup. Modules are used to extend functionality of the server. +# Format: LoadModule= +# The modules must be located in directory specified by LoadModulePath. +# It is allowed to include multiple LoadModule parameters. +# +# Mandatory: no +# Default: +# LoadModule= + +####### TLS-RELATED PARAMETERS ####### + +### Option: TLSCAFile +# Full pathname of a file containing the top-level CA(s) certificates for +# peer certificate verification. +# +# Mandatory: no +# Default: +# TLSCAFile= + +### Option: TLSCRLFile +# Full pathname of a file containing revoked certificates. +# +# Mandatory: no +# Default: +# TLSCRLFile= + +### Option: TLSCertFile +# Full pathname of a file containing the server certificate or certificate chain. +# +# Mandatory: no +# Default: +# TLSCertFile= + +### Option: TLSKeyFile +# Full pathname of a file containing the server private key. +# +# Mandatory: no +# Default: +# TLSKeyFile= diff --git a/resources/playbook/tools/tee.py b/resources/playbook/tools/tee.py new file mode 100644 index 000000000..75c95f90d --- /dev/null +++ b/resources/playbook/tools/tee.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +from argparse import ArgumentParser +from subprocess import Popen,PIPE +import sys +from queue import Queue +from threading import Thread +from pathlib import Path + +parser = ArgumentParser( + description='Tee like program, that executes given cmd and branch stdout and stderr\n.' + 'Exits with the exit code of called program.') +parser.add_argument ("--cmd", help="A cmd executed by tee.py, e.g. --cmd \"ansible-playbook -i hosts site.yml\"",required=True) +parser.add_argument ("--outfile", help="Path to stdoutfile",required=True) +args = parser.parse_args() + +cmds = args.cmd.split(" ") +cmd = Path(cmds[0]) +if not(cmd.is_file()) : + sys.exit("{} is not an file".format(cmd)) + +def reader(pipe, queue): + try: + with pipe: + for line in iter(pipe.readline, b''): + queue.put(line.decode('utf-8')) + finally: + queue.put(None) + +p = Popen(cmds,shell=False,stdout=PIPE,stderr=PIPE,bufsize=1) + +q = Queue() +Thread(target=reader, args=[p.stdout, q]).start() +Thread(target=reader, args=[p.stderr, q]).start() + +f = open(args.outfile,"w") +for line in iter(q.get, None): + # print to stdout + sys.stdout.write(line) + sys.stdout.flush() + # print to file + f.write(line) + f.flush() +f.close() + +# wait until process is finished ... +rt = p.wait() +# and returns it returncode +sys.exit(rt) + + + + diff --git a/tests/provider/test_Provider.py b/tests/provider/test_Provider.py new file mode 100644 index 000000000..197a86d93 --- /dev/null +++ b/tests/provider/test_Provider.py @@ -0,0 +1,215 @@ +import os +import unittest + +import bibigrid2.core.utility.handler.provider_handler as providerHandler +import bibigrid2.core.utility.handler.configuration_handler as configurationHandler +import bibigrid2.core.utility.paths.basic_path as bP + +SERVER_KEYS = {'id', 'name', 'flavor', 'image', 'block_device_mapping', 'location', 'volumes', + 'has_config_drive', 'host_id', 'progress', 'disk_config', 'power_state', 'task_state', + 'vm_state', 'launched_at', 'terminated_at', 'hypervisor_hostname', 'instance_name', + 'user_data', 'host', 'hostname', 'kernel_id', 'launch_index', 'ramdisk_id', + 'reservation_id', 'root_device_name', 'scheduler_hints', 'security_groups', + 'created_at', 'accessIPv4', 'accessIPv6', 'addresses', 'adminPass', 'created', + 'description', 'key_name', 'metadata', 'networks', 'personality', 'private_v4', + 'public_v4', 'public_v6', 'server_groups', 'status', 'updated', 'user_id', 'tags', + 'interface_ip', 'properties', 'hostId', 'config_drive', 'project_id', 'tenant_id', + 'region', 'cloud', 'az', 'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone', + 'OS-SRV-USG:launched_at', 'OS-SRV-USG:terminated_at', 'OS-EXT-STS:task_state', + 'OS-EXT-STS:vm_state', 'OS-EXT-STS:power_state', + 'os-extended-volumes:volumes_attached'} +FLOATING_IP_KEYS = {'attached', 'fixed_ip_address', 'floating_ip_address', 'id', 'location', 'network', + 'port', 'router', 'status', 'created_at', 'updated_at', 'description', + 'revision_number', 'properties', 'port_id', 'router_id', 'project_id', 'tenant_id', + 'floating_network_id', 'port_details', 'dns_domain', 'dns_name', 'port_forwardings', + 'tags'} +SUBNET_KEYS = {'id', 'name', 'tenant_id', 'network_id', 'ip_version', 'subnetpool_id', 'enable_dhcp', + 'ipv6_ra_mode', 'ipv6_address_mode', 'gateway_ip', 'cidr', 'allocation_pools', + 'host_routes', 'dns_nameservers', 'description', 'service_types', 'tags', + 'created_at', 'updated_at', 'revision_number', 'project_id'} +NETWORK_KEYS = {'id', 'name', 'tenant_id', 'admin_state_up', 'mtu', 'status', 'subnets', 'shared', + 'availability_zone_hints', 'availability_zones', 'ipv4_address_scope', + 'ipv6_address_scope', 'router:external', 'description', 'port_security_enabled', + 'dns_domain', 'tags', 'created_at', 'updated_at', 'revision_number', 'project_id'} + +FLAVOR_KEYS = {'links', 'name', 'description', 'disk', 'is_public', 'ram', 'vcpus', 'swap', 'ephemeral', 'is_disabled', + 'rxtx_factor', 'extra_specs', 'id', 'location'} + +IMAGE_KEYS = {'location', 'created_at', 'updated_at', 'checksum', 'container_format', 'direct_url', 'disk_format', + 'file', 'id', 'name', 'owner', 'tags', 'status', 'min_ram', 'min_disk', 'size', 'virtual_size', + 'is_protected', 'locations', 'properties', 'is_public', 'visibility', 'description', + 'owner_specified.openstack.md5', 'owner_specified.openstack.object', 'owner_specified.openstack.sha256', + 'os_hidden', 'os_hash_algo', 'os_hash_value', 'os_distro', 'os_version', 'schema', 'protected', + 'metadata', 'created', 'updated', 'minDisk', 'minRam'} + +SNAPSHOT_KEYS = {'id', 'created_at', 'updated_at', 'name', 'description', 'volume_id', 'status', 'size', 'metadata', + 'os-extended-snapshot-attributes:project_id', 'os-extended-snapshot-attributes:progress'} + +VOLUME_KEYS = {'location', 'id', 'name', 'description', 'size', 'attachments', 'status', 'migration_status', 'host', + 'replication_driver', 'replication_status', 'replication_extended_status', 'snapshot_id', 'created_at', + 'updated_at', 'source_volume_id', 'consistencygroup_id', 'volume_type', 'metadata', 'is_bootable', + 'is_encrypted', 'can_multiattach', 'properties', 'display_name', 'display_description', 'bootable', + 'encrypted', 'multiattach', 'availability_zone', 'source_volid', 'user_id', + 'os-vol-tenant-attr:tenant_id'} + +FREE_RESOURCES_KEYS = {'total_cores', 'floating_ips', 'instances', 'total_ram', 'Volumes', 'VolumeGigabytes', + 'Snapshots', 'Backups', 'BackupGigabytes'} + +KEYPAIR = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDORPauyW3O7M4Uk8/Qo557h2zxd9fwByljG9S1/zHKIEzOMcOBb7WUSmyNa5XHh5IB0/BTsQvSag/O9IAhax2wlp9A2za6EkALYiRdEXeGOMNORw8yylRBqzLluKTErZ5sKYxENf1WGHsE3ifzct0G/moEPmIkixTHR9fZrZgOzQwj4bgJXhgQT8wxpc8FwWncvDSazZ/OAefXKh16Dz8dVz2VbMbYEUMY+XXqZxcnHwJABIpU1mrJV7h1F4DW+E8eUF1b6UNQRibX8VJ11V1mq39zMV9Az6W2ZOR6OXjDXK2r6P8y07+9Lh0rrwzeeZMYF17ACZbxIu8crTCZF0Lr6NtX+KWfdT6usUyFcNwuktIvUYv3ylP/7wcQlaPl0g1FMFbUTTukAiDf4jAgvJkg7ayE0MPapGpI/OhSK2gyN45VAzs2m7uykun87B491JagZ57qr16vt8vxGYpFCEe8QqAcrUszUPqyPrb0auA8bzjO8S41Kx8FfG+7eTu4dQ0= user" + +CONFIGURATIONS = configurationHandler.read_configuration(os.path.join(bP.ROOT_PATH, + "tests/resources/infrastructure_cloud.yml")) +PROVIDERS = providerHandler.get_providers(CONFIGURATIONS) + + +class ProviderServer(object): + def __init__(self, provider, name, configuration, key_name=None): + self.provider = provider + self.name = name + self.server_dict = provider.create_server(name=self.name, flavor=configuration["flavor"], + image=configuration["image"], + network=configuration["network"], key_name=key_name) + + def __enter__(self): + return self.server_dict + + def __exit__(self, type, value, traceback): + self.provider.delete_server(name_or_id=self.name) + + +class TestProvider(unittest.TestCase): + def test_get_free_resources(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + free_dict = provider.get_free_resources() + self.assertEqual(FREE_RESOURCES_KEYS, set(free_dict.keys())) + for value in free_dict.values(): + self.assertLessEqual(0, value) + + def test_server_start_type_error(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + with self.assertRaises(TypeError): + provider.create_server(name="name", flavor=configuration["flavor"], + network=configuration["network"]) + with self.assertRaises(TypeError): + provider.create_server(name="name", image=configuration["image"], + network=configuration["network"]) + with self.assertRaises(TypeError): + provider.create_server(flavor=configuration["flavor"], image=configuration["image"], + network=configuration["network"]) + with self.assertRaises(TypeError): + provider.create_server(name="name", flavor=configuration["flavor"], image=configuration["image"]) + + def test_server_start_attribute_error(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + with self.assertRaises(AttributeError): + provider.create_server(name="name", image="ERROR", flavor=configuration["flavor"], + network=configuration["network"]) + with self.assertRaises(AttributeError): + provider.create_server(name="name", flavor="ERROR", image=configuration["image"], + network=configuration["network"]) + with self.assertRaises(AttributeError): + provider.create_server(name="name", flavor=configuration["flavor"], image=configuration["image"], + network="ERROR") + with self.assertRaises(AttributeError): + provider.create_server(name="name", flavor=configuration["flavor"], image=configuration["image"], + network=configuration["network"], key_name="ERROR") + + def test_create_keypair_create_delete_false_delete(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + provider.create_keypair("bibigrid2_test_keypair", KEYPAIR) + self.assertTrue(provider.delete_keypair("bibigrid2_test_keypair")) + self.assertFalse(provider.delete_keypair("bibigrid2_test_keypair")) + + def test_active_server_methods(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + provider.create_keypair("bibigrid2_test_keypair", KEYPAIR) + with self.subTest(provider.NAME): + with ProviderServer(provider, "bibigrid2_test_server", configuration, "bibigrid2_test_keypair") as ps: + floating_ip = provider.create_floating_ip(provider.get_external_network(configuration["network"]), + ps) + server_list = provider.list_servers() + self.assertEqual(SERVER_KEYS, + set(ps.keys())) + self.assertEqual("bibigrid2_test_keypair", ps["key_name"]) + self.assertEqual(FLOATING_IP_KEYS, + set(floating_ip.keys())) + self.assertTrue([server for server in server_list if server["name"] == "bibigrid2_test_server" and + server["public_v4"] == floating_ip.floating_ip_address]) + provider.delete_keypair("bibigrid2_test_keypair") + + def test_get_external_network(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + self.assertTrue(provider.get_external_network(configuration["network"])) + with self.assertRaises(TypeError): + provider.get_external_network("ERROR") + + def test_get_network_get_subnet(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + network = provider.get_network_by_id_or_name(configuration["network"]) + self.assertEqual(NETWORK_KEYS, + set(network.keys())) + subnet_id = provider.get_subnet_ids_by_network(network["id"])[0] + self.assertEqual(SUBNET_KEYS, + set(provider.get_subnet_by_id_or_name(subnet_id).keys())) + network2 = provider.get_network_id_by_subnet(subnet_id) + self.assertEqual(network2, network["id"]) + + def test_get_network_get_subnet_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_network_by_id_or_name("NONE")) + + def test_get_subnet_by_name_or_id_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_subnet_by_id_or_name("NONE")) + + def test_get_subnet_by_network_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_subnet_ids_by_network("NONE")) + + def test_get_server_group_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_server_group_by_id_or_name("NONE")) + + def test_get_flavor_detail_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_flavor("NONE")) + + def test_get_flavor_detail(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + self.assertEqual(FLAVOR_KEYS, set(provider.get_flavor(configuration["flavor"]).keys())) + + def test_get_image(self): + for provider,configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + self.assertEqual(IMAGE_KEYS, set(provider.get_image_by_id_or_name(configuration["image"]).keys())) + + def test_get_image_mismatch(self): + for provider in PROVIDERS: + with self.subTest(provider.NAME): + self.assertIsNone(provider.get_image_by_id_or_name("NONE")) + + if os.environ.get("OS_SNAPSHOT"): + def test_get_snapshot(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + self.assertEqual(SNAPSHOT_KEYS, + set(provider.get_volume_snapshot_by_id_or_name(configuration["snapshot_image"]).keys())) + + def test_create_volume_from_snapshot(self): + for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): + with self.subTest(provider.NAME): + volume_id = provider.create_volume_from_snapshot(configuration["snapshot_image"]) + volume = provider.get_volume_by_id_or_name(volume_id) + self.assertEqual(VOLUME_KEYS, set(volume.keys())) diff --git a/tests/resources/infrastructure_cloud.yml b/tests/resources/infrastructure_cloud.yml new file mode 100644 index 000000000..028c02470 --- /dev/null +++ b/tests/resources/infrastructure_cloud.yml @@ -0,0 +1,10 @@ + # See https://cloud.denbi.de/wiki/Tutorials/BiBiGrid/ (after update) + +- infrastructure: openstack # former mode. + cloud: openstack # name of clouds.yaml entry + image: 2e61eb1b-dbd2-4ed8-b62b-5ee9fe0510e6 + flavor: de.NBI tiny + network: network0 + snapshot_image: test + +#- [next configurations] \ No newline at end of file diff --git a/tests/startupTests.py b/tests/startupTests.py new file mode 100644 index 000000000..312991396 --- /dev/null +++ b/tests/startupTests.py @@ -0,0 +1,41 @@ +import logging +import os +import sys +import unittest +from contextlib import contextmanager + + +@contextmanager +def suppress_stdout(): + with open(os.devnull, "w") as devnull: + old_stdout = sys.stdout + sys.stdout = devnull + try: + yield + finally: + sys.stdout = old_stdout + + +logging.basicConfig(level=logging.ERROR) +if __name__ == '__main__': + """ + You need a network with a subnet in order to integration test the providers + Additionally you need to set: + OS_IMAGE=[any available image]; + OS_FLAVOR=[any available flavor]; + OS_NETWORK=[existing network in your project connected to an external network via a router]; + OS_KEY_NAME=[your keyname]; + OS_SNAPSHOT=[a snapshot you created] + The integration tests will create a volume that is not deleted. + """ + # Unittests + suite = unittest.TestLoader().discover("./", pattern='test_*.py') + with suppress_stdout(): + unittest.TextTestRunner(verbosity=2).run(suite) + + # Provider-Test + ## Configuration needs to contain providers and infrastructures + if os.environ.get("OS_KEY_NAME"): + suite = unittest.TestLoader().discover("./provider", pattern='test_*.py') + with suppress_stdout(): + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/tests/test_ValidateConfiguration.py b/tests/test_ValidateConfiguration.py new file mode 100644 index 000000000..7bfd7d83a --- /dev/null +++ b/tests/test_ValidateConfiguration.py @@ -0,0 +1,319 @@ +import os +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock, call + +import bibigrid2.core.utility.validate_configuration as validateConfiguration + + +class TestValidateConfiguration(TestCase): + def test_check_provider_data_count(self): + provider_data_1 = {"PROJECT_ID": "abcd", "PROJECT_NAME": "1234"} + provider_data_2 = {"PROJECT_ID": "9999", "PROJECT_NAME": "9999"} + vc = validateConfiguration + self.assertTrue(vc.check_provider_data([provider_data_1, provider_data_2], 2)) + self.assertFalse(vc.check_provider_data([provider_data_1, provider_data_2], 3)) + self.assertTrue(vc.check_provider_data([], 0)) + + def test_check_provider_data_unique(self): + provider_data_1 = {"PROJECT_ID": "abcd", "PROJECT_NAME": "1234"} + provider_data_2 = {"PROJECT_ID": "9999", "PROJECT_NAME": "9999"} + vc = validateConfiguration + self.assertTrue(vc.check_provider_data([provider_data_1, provider_data_2], 2)) + self.assertFalse(vc.check_provider_data([provider_data_1, provider_data_1], 2)) + self.assertTrue(vc.check_provider_data([], 0)) + + def test_check_master_vpn_worker_ordered(self): + master = {"masterInstance": "Value"} + vpn = {"vpnInstance": "Value"} + vpn_master = {} + vpn_master.update(master) + vpn_master.update(vpn) + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=[master]) + self.assertTrue(vc.check_master_vpn_worker()) + vc.configurations = [master, vpn] + self.assertTrue(vc.check_master_vpn_worker()) + vc.configurations = [vpn] + self.assertFalse(vc.check_master_vpn_worker()) + vc.configurations = [master, master] + self.assertFalse(vc.check_master_vpn_worker()) + + def test_check_master_vpn_worker_unique(self): + master = {"masterInstance": "Value"} + vpn = {"vpnInstance": "Value"} + vpn_master = {} + vpn_master.update(master) + vpn_master.update(vpn) + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=[vpn_master]) + self.assertFalse(vc.check_master_vpn_worker()) + vc.configurations = [master, vpn_master] + self.assertFalse(vc.check_master_vpn_worker()) + + def test_evaluate(self): + vc = validateConfiguration + self.assertTrue(vc.evaluate("some", True)) + self.assertFalse(vc.evaluate("some", False)) + + def test_check_provider_connection(self): + mock = Mock() + mock.conn = False + vc = validateConfiguration.ValidateConfiguration(providers=[mock], configurations=None) + self.assertFalse(vc.check_provider_connections()) + mock.conn = True + self.assertTrue(vc.check_provider_connections()) + + def test_check_instances_master(self): + vc = validateConfiguration.ValidateConfiguration(providers=["31"], configurations=[{"masterInstance": "42"}]) + with patch.object(vc, "check_instance") as mock: + vc.check_instances() + mock.assert_called_with("masterInstance", "42", "31") + + def test_check_instances_vpn(self): + vc = validateConfiguration.ValidateConfiguration(providers=["31"], configurations=[{"vpnInstance": "42"}]) + with patch.object(vc, "check_instance") as mock: + vc.check_instances() + mock.assert_called_with("vpnInstance", "42", "31") + + def test_check_instances_vpn_worker(self): + vc = validateConfiguration.ValidateConfiguration(providers=["31"], configurations=[ + {"masterInstance": "42", "workerInstances": ["42"]}]) + with patch.object(vc, "check_instance") as mock: + vc.check_instances() + mock.assert_called_with("workerInstance", "42", "31") + + def test_check_instances_vpn_master_missing(self): + vc = validateConfiguration.ValidateConfiguration(providers=["31"], configurations=[{}]) + self.assertFalse(vc.check_instances()) + vc = validateConfiguration.ValidateConfiguration(providers=["31"], configurations=[{"workerInstances": ["42"]}]) + self.assertFalse(vc.check_instances()) + + def test_check_instances_vpn_master_count(self): + for i in range(3): + vc = validateConfiguration.ValidateConfiguration(providers=["31"] * i, + configurations=[{"masterInstance": "42"}] * i) + with patch.object(vc, "check_instance") as mock: + vc.check_instances() + self.assertTrue(vc.required_resources_dict["floating_ips"] == i) + + def test_check_instance_image_not_found(self): + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + provider = Mock() + provider.get_image_by_id_or_name = MagicMock(return_value=None) + self.assertFalse(vc.check_instance(None, {"count": 1, "image": 2}, provider)) + + def test_check_instance_image_not_active(self): + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + provider = Mock() + provider.get_image_by_id_or_name = MagicMock(return_value={"status": None}) + self.assertFalse(vc.check_instance(None, {"count": 1, "image": 2}, provider)) + + def test_check_instance_image_active_combination_call(self): + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + provider = Mock() + provider.get_image_by_id_or_name = MagicMock(return_value={"status": "active"}) + with patch.object(vc, "check_instance_type_image_combination") as mock: + vc.check_instance(42, {"count": 1, "image": 2, "type": 3}, provider) + mock.assert_called_with(3, {"status": "active"}, provider) + + def test_check_instance_image_not_found_count(self): + provider = Mock() + provider.get_image_by_id_or_name = MagicMock(return_value=None) + for i in range(1, 3): + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + vc.check_instance(None, {"count": i, "image": 2}, provider) + self.assertTrue(vc.required_resources_dict["instances"] == i) + + def test_check_instance_type_image_combination_has_enough_calls(self): + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + provider = MagicMock() + provider.get_flavor.return_value = {"disk": 42, "ram": 32, "vcpus": 10} + provider.get_image_by_id_or_name.return_value = {"minDisk": 22, "minRam": 12} + with patch.object(vc, "has_enough") as mock: + vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider) + self.assertEquals(call(42, 22, "Type de.NBI tiny", "disk space"), mock.call_args_list[0]) + self.assertEquals(call(32, 12, "Type de.NBI tiny", "ram"), mock.call_args_list[1]) + + def test_check_instance_type_image_combination_result(self): + provider = MagicMock() + provider.get_flavor.return_value = {"disk": 42, "ram": 32, "vcpus": 10} + provider.get_image_by_id_or_name.return_value = {"minDisk": 22, "minRam": 12} + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + with patch.object(vc, "has_enough") as mock: + mock.side_effect = [True, True, False, False, True, False, False, True] + # True True + self.assertTrue(vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider)) + # False False + self.assertFalse(vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider)) + # True False + self.assertFalse(vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider)) + # False True + self.assertFalse(vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider)) + + def test_check_instance_type_image_combination_count(self): + for i in range(3): + provider = MagicMock() + provider.get_flavor.return_value = {"disk": 42, "ram": i * 32, "vcpus": i * 10} + provider.get_image_by_id_or_name.return_value = {"minDisk": 22, "minRam": 12} + vc = validateConfiguration.ValidateConfiguration(providers=None, configurations=None) + with patch.object(vc, "has_enough") as mock: + vc.check_instance_type_image_combination(instance_image=None, instance_type="de.NBI tiny", + provider=provider) + self.assertEquals(32 * i, vc.required_resources_dict["total_ram"]) + self.assertEquals(10 * i, vc.required_resources_dict["total_cores"]) + mock.assert_called_with(32 * i, 12, 'Type de.NBI tiny', 'ram') + + def test_check_volumes_none(self): + vc = validateConfiguration.ValidateConfiguration(providers=[42], configurations=[{}]) + self.assertTrue(vc.check_volumes()) + + def test_check_volumes_mismatch(self): + provider = Mock() + provider.get_volume_by_id_or_name = MagicMock(return_value=None) + provider.get_volume_snapshot_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"masterMounts": ["Test"]}]) + self.assertFalse(vc.check_volumes()) + + def test_check_volumes_match_snapshot(self): + provider = Mock() + provider.get_volume_by_id_or_name = MagicMock(return_value=None) + provider.get_volume_snapshot_by_id_or_name = MagicMock(return_value={"size": 1}) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"masterMounts": ["Test"]}]) + self.assertTrue(vc.check_volumes()) + + def test_check_volumes_match_snapshot_count(self): + for i in range(3): + provider = Mock() + provider.get_volume_by_id_or_name = MagicMock(return_value=None) + provider.get_volume_snapshot_by_id_or_name = MagicMock(return_value={"size": i}) + vc = validateConfiguration.ValidateConfiguration(providers=[provider] * i, + configurations=[{"masterMounts": ["Test"] * i}]) + self.assertTrue(vc.check_volumes()) + self.assertTrue(vc.required_resources_dict["Volumes"] == i) + self.assertTrue(vc.required_resources_dict["VolumeGigabytes"] == i ** 2) + + def test_check_volumes_match_volume(self): + provider = Mock() + provider.get_volume_by_id_or_name = MagicMock(return_value={"size": 1}) + provider.get_volume_snapshot_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"masterMounts": ["Test"]}]) + self.assertTrue(vc.check_volumes()) + self.assertTrue(vc.required_resources_dict["Volumes"] == 0) + self.assertTrue(vc.required_resources_dict["VolumeGigabytes"] == 0) + + def test_check_network_none(self): + provider = Mock() + provider.get_network_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{}]) + self.assertFalse(vc.check_network()) + + def test_check_network_no_network(self): + provider = Mock() + provider.get_subnet_by_id_or_name = MagicMock(return_value="network") + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"subnet": "subnet_name"}]) + self.assertTrue(vc.check_network()) + provider.get_subnet_by_id_or_name.assert_called_with("subnet_name") + + def test_check_network_no_network_mismatch_subnet(self): + provider = Mock() + provider.get_subnet_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"subnet": "subnet_name"}]) + self.assertFalse(vc.check_network()) + provider.get_subnet_by_id_or_name.assert_called_with("subnet_name") + + def test_check_network_no_subnet_mismatch_network(self): + provider = Mock() + provider.get_network_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"network": "network_name"}]) + self.assertFalse(vc.check_network()) + provider.get_network_by_id_or_name.assert_called_with("network_name") + + def test_check_network_no_subnet(self): + provider = Mock() + provider.get_network_by_id_or_name = MagicMock(return_value="network") + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"network": "network_name"}]) + self.assertTrue(vc.check_network()) + provider.get_network_by_id_or_name.assert_called_with("network_name") + + def test_check_network_subnet_network(self): + provider = Mock() + provider.get_network_by_id_or_name = MagicMock(return_value="network") + provider.get_subnet_by_id_or_name = MagicMock(return_value="network") + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"network": "network_name"}]) + self.assertTrue(vc.check_network()) + provider.get_network_by_id_or_name.assert_called_with("network_name") + + def test_check_server_group_none(self): + provider = Mock() + provider.get_network_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{}]) + self.assertTrue(vc.check_server_group()) + + def test_check_server_group_mismatch(self): + provider = Mock() + provider.get_server_group_by_id_or_name = MagicMock(return_value=None) + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"serverGroup": "GroupName"}]) + self.assertFalse(vc.check_server_group()) + provider.get_server_group_by_id_or_name.assert_called_with("GroupName") + + def test_check_server_group_match(self): + provider = Mock() + provider.get_server_group_by_id_or_name = MagicMock(return_value="Group") + vc = validateConfiguration.ValidateConfiguration(providers=[provider], + configurations=[{"serverGroup": "GroupName"}]) + self.assertTrue(vc.check_server_group()) + provider.get_server_group_by_id_or_name.assert_called_with("GroupName") + + def test_check_quotas_true(self): + provider = MagicMock() + provider.cloud_specification = {"auth": {"project_name": "name"}, "identifier": "identifier"} + test_dict = {'total_cores': 42, 'floating_ips': 42, 'instances': 42, 'total_ram': 42, + 'Volumes': 42, 'VolumeGigabytes': 42, 'Snapshots': 42, 'Backups': 42, 'BackupGigabytes': 42} + provider.get_free_resources.return_value = test_dict + vc = validateConfiguration.ValidateConfiguration(providers=[provider], configurations=None) + with patch.object(vc, "has_enough") as mock: + mock.side_effect = [True] * len(test_dict) + self.assertTrue(vc.check_quotas()) + provider.get_free_resources.assert_called() + for key in vc.required_resources_dict.keys(): + self.assertTrue(call(test_dict[key], vc.required_resources_dict[key], + f"Project identifier", key) in mock.call_args_list) + + def test_check_quotas_false(self): + provider = MagicMock() + test_dict = {'total_cores': 42, 'floating_ips': 42, 'instances': 42, 'total_ram': 42, + 'Volumes': 42, 'VolumeGigabytes': 42, 'Snapshots': 42, 'Backups': 42, 'BackupGigabytes': 42} + provider.get_free_resources.return_value = test_dict + os.environ['OS_PROJECT_NAME'] = "name" + vc = validateConfiguration.ValidateConfiguration(providers=[provider], configurations=None) + with patch.object(vc, "has_enough") as mock: + mock.side_effect = [True] * (len(test_dict) - 1) + [False] + self.assertFalse(vc.check_quotas()) + provider.get_free_resources.assert_called() + mock.assert_called() + + def test_has_enough_lower(self): + vc = validateConfiguration + self.assertTrue(vc.has_enough(2, 1, "", "")) + + def test_has_enough_equal(self): + vc = validateConfiguration + self.assertTrue(vc.has_enough(2, 2, "", "")) + + def test_has_enough_higher(self): + vc = validateConfiguration + self.assertFalse(vc.has_enough(1, 2, "", "")) diff --git a/tests/test_ansibleConfigurator.py b/tests/test_ansibleConfigurator.py new file mode 100644 index 000000000..85f232725 --- /dev/null +++ b/tests/test_ansibleConfigurator.py @@ -0,0 +1,307 @@ +from unittest import TestCase +from unittest.mock import MagicMock, Mock, patch, call, mock_open, ANY + +import bibigrid2.core.utility.ansible_configurator as ansibleConfigurator +import bibigrid2.core.utility.paths.ansible_resources_path as aRP +import bibigrid2.core.utility.yaml_dumper as yamlDumper + + +class TestAnsibleConfigurator(TestCase): + def test_generate_site_file_yaml_empty(self): + site_yaml = [{'hosts': 'master', "become": "yes", + "vars_files": ansibleConfigurator.VARS_FILES, "roles": ["common", "master"]}, + {"hosts": "worker", "become": "yes", "vars_files": + ansibleConfigurator.VARS_FILES, "roles": ["common", "worker"]}, + {"hosts": "vpnwkr", "become": "yes", "vars_files": + ansibleConfigurator.VARS_FILES, "roles": ["common", "vpnwkr"]}] + self.assertEqual(site_yaml, ansibleConfigurator.generate_site_file_yaml([])) + + def test_generate_site_file_yaml_role(self): + custom_roles = [{"file": "file", "hosts": "hosts", "name": "name", "vars": "vars", "vars_file": "varsFile"}] + vars_files = ['vars/login.yml', 'vars/instances.yml', 'vars/common_configuration.yml', 'varsFile'] + site_yaml = [{'hosts': 'master', "become": "yes", + "vars_files": vars_files, "roles": ["common", "master", "additional/name"]}, + {"hosts": "worker", "become": "yes", "vars_files": + vars_files, "roles": ["common", "worker", "additional/name"]}, + {"hosts": "vpnwkr", "become": "yes", "vars_files": + vars_files, "roles": ["common", "vpnwkr", "additional/name"]}] + self.assertEqual(site_yaml, ansibleConfigurator.generate_site_file_yaml(custom_roles)) + + def test_generate_instances(self): + cluster_dict = object() + self.assertEqual(cluster_dict, ansibleConfigurator.generate_instances_yaml(cluster_dict)) + + def test_generate_common_configuration_false(self): + cidrs = 42 + configuration = {} + common_configuration_yaml = {"cluster_cidrs": cidrs, + "local_fs": False, + "local_dns_lookup": False, + "use_master_as_compute": True, + "enable_slurm": False, + "enable_zabbix": False, + "enable_nfs": False, + "enable_ide": False + } + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + def test_generate_common_configuration_true(self): + cidrs = 42 + configuration = {elem: "true" for elem in ["localFS", "localDNSlookup", "useMasterAsCompute", "slurm", + "zabbix", "ide"]} + common_configuration_yaml = {elem: "true" for elem in ["local_fs", "local_dns_lookup", "use_master_as_compute", + "enable_slurm", "enable_zabbix", "enable_ide"]} + common_configuration_yaml["cluster_cidrs"] = cidrs + common_configuration_yaml["enable_nfs"] = False + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + def test_generate_common_configuration_nfs_shares(self): + cidrs = 42 + configuration = {"nfs": "True", "nfsShares": ["/vil/mil"]} + common_configuration_yaml = {'cluster_cidrs': 42, + 'enable_ide': False, + 'enable_nfs': 'True', + 'enable_slurm': False, + 'enable_zabbix': False, + 'ext_nfs_mounts': [], + 'local_dns_lookup': False, + 'local_fs': False, + 'nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'}, + {'dst': '/vol/spool', 'src': '/vol/spool'}], + 'use_master_as_compute': True} + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + def test_generate_common_configuration_nfs(self): + cidrs = 42 + configuration = {"nfs": "True"} + common_configuration_yaml = {'cluster_cidrs': 42, + 'enable_ide': False, + 'enable_nfs': 'True', + 'enable_slurm': False, + 'enable_zabbix': False, + 'ext_nfs_mounts': [], + 'local_dns_lookup': False, + 'local_fs': False, + 'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], + 'use_master_as_compute': True} + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + def test_generate_common_configuration_ext_nfs_shares(self): + cidrs = 42 + configuration = {"nfs": "True", "extNfsShares": ["/vil/mil"]} + common_configuration_yaml = {'cluster_cidrs': 42, + 'enable_ide': False, + 'enable_nfs': 'True', + 'enable_slurm': False, + 'enable_zabbix': False, + 'ext_nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'}], + 'local_dns_lookup': False, + 'local_fs': False, + 'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], + 'use_master_as_compute': True} + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + def test_generate_common_configuration_ide(self): + cidrs = 42 + configuration = {"ide": "Some1", "ideConf": "Some2"} + common_configuration_yaml = {'cluster_cidrs': 42, + 'enable_ide': "Some1", + 'enable_nfs': False, + 'enable_slurm': False, + 'enable_zabbix': False, + 'ide_conf': 'Some2', + 'local_dns_lookup': False, + 'local_fs': False, + 'use_master_as_compute': True} + self.assertEqual(common_configuration_yaml, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)) + + @patch("bibigrid2.core.utility.ansibleConfigurator.get_ansible_roles") + def test_generate_common_configuration_ansible_roles_mock(self, mock_ansible_roles): + cidrs = 42 + ansible_roles = [{elem: elem for elem in ["file", "hosts", "name", "vars", "vars_file"]}] + mock_ansible_roles.return_value = 21 + configuration = {"ansibleRoles": ansible_roles} + self.assertEqual(21, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)["ansible_roles"]) + mock_ansible_roles.assert_called_with(ansible_roles) + + @patch("bibigrid2.core.utility.ansibleConfigurator.get_ansible_galaxy_roles") + def test_generate_common_configuration_ansible_galaxy_roles(self, mock_galaxy_roles): + cidrs = 42 + galaxy_roles = [{elem: elem for elem in ["hosts", "name", "galaxy", "git", "url", "vars", "vars_file"]}] + configuration = {"ansibleGalaxyRoles": galaxy_roles} + mock_galaxy_roles.return_value = 21 + self.assertEqual(21, + ansibleConfigurator.generate_common_configuration_yaml(cidrs, configuration)[ + "ansible_galaxy_roles"]) + mock_galaxy_roles.assert_called_with(galaxy_roles) + + @patch("bibigrid2.core.utility.ansibleConfigurator.to_instance_host_dict") + def test_generate_ansible_hosts(self, mock_instance_host_dict): + mock_instance_host_dict.side_effect = [0, 1, 2] + cluster_dict = {"workers": [{"private_v4": 21}], "vpnwkrs": [{"private_v4": 32}]} + expected = {'master': {'hosts': 0}, 'worker': {'hosts': {21: 1, 32: 2}}} + self.assertEqual(expected, ansibleConfigurator.generate_ansible_hosts_yaml(42, cluster_dict)) + call_list = mock_instance_host_dict.call_args_list + self.assertEqual(call(42), call_list[0]) + self.assertEqual(call(42, ip=21, local=False), call_list[1]) + self.assertEqual(call(42, ip=32, local=False), call_list[2]) + + def test_to_instance_host_local(self): + ip = 42 + ssh_user = 21 + local = {"ip": ip, "ansible_connection": "local", + "ansible_python_interpreter": ansibleConfigurator.PYTHON_INTERPRETER, + "ansible_user": ssh_user} + self.assertEqual(local, ansibleConfigurator.to_instance_host_dict(21, 42, True)) + + def test_to_instance_host_ssh(self): + ip = 42 + ssh_user = 21 + ssh = {"ip": ip, "ansible_connection": "ssh", + "ansible_python_interpreter": ansibleConfigurator.PYTHON_INTERPRETER, + "ansible_user": ssh_user} + self.assertEqual(ssh, ansibleConfigurator.to_instance_host_dict(21, 42, False)) + + def test_get_cidrs_single(self): + provider = Mock() + provider.get_subnet_by_id_or_name.return_value = {"cidr": 42} + configuration = {"subnet": 21} + expected = [{'provider': 'Mock', 'provider_cidrs': [42]}] + self.assertEqual(expected, ansibleConfigurator.get_cidrs([configuration], [provider])) + provider.get_subnet_by_id_or_name.assert_called_with(21) + + def test_get_cidrs_list(self): + provider = Mock() + provider.get_subnet_by_id_or_name.return_value = {"cidr": 42} + configuration = {"subnet": [21, 22]} + expected = [{'provider': 'Mock', 'provider_cidrs': [42, 42]}] + self.assertEqual(expected, ansibleConfigurator.get_cidrs([configuration], [provider])) + call_list = provider.get_subnet_by_id_or_name.call_args_list + self.assertEqual(call(21), call_list[0]) + self.assertEqual(call(22), call_list[1]) + + def test_get_ansible_roles_empty(self): + self.assertEqual([], ansibleConfigurator.get_ansible_roles([])) + + def test_get_ansible_roles(self): + ansible_roles = [{elem: elem for elem in ["file", "hosts", "name", "vars", "vars_file"]}] + self.assertEqual(ansible_roles, ansibleConfigurator.get_ansible_roles(ansible_roles)) + + def test_get_ansible_roles_add(self): + ansible_roles = [{elem: elem for elem in ["file", "hosts", "name", "vars", "vars_file"]}] + ansible_roles_add = [{elem: elem for elem in ["file", "hosts", "name", "vars", "vars_file", "additional"]}] + self.assertEqual(ansible_roles, ansibleConfigurator.get_ansible_roles(ansible_roles_add)) + + def test_get_ansible_roles_minus(self): + ansible_roles = [{elem: elem for elem in ["file", "hosts"]}] + self.assertEqual(ansible_roles, ansibleConfigurator.get_ansible_roles(ansible_roles)) + + @patch("logging.warning") + def test_get_ansible_roles_mismatch_hosts(self, mock_log): + ansible_roles = [{"file": "file"}] + self.assertEqual([], ansibleConfigurator.get_ansible_roles(ansible_roles)) + mock_log.assert_called() + + @patch("logging.warning") + def test_get_ansible_roles_mismatch_file(self, mock_log): + ansible_roles = [{"hosts": "hosts"}] + self.assertEqual([], ansibleConfigurator.get_ansible_roles(ansible_roles)) + mock_log.assert_called() + + def test_get_ansible_galaxy_roles_empty(self): + self.assertEqual([], ansibleConfigurator.get_ansible_galaxy_roles([])) + + def test_get_ansible_galaxy_roles(self): + galaxy_roles = [{elem: elem for elem in ["hosts", "name", "galaxy", "git", "url", "vars", "vars_file"]}] + self.assertEqual(galaxy_roles, ansibleConfigurator.get_ansible_galaxy_roles(galaxy_roles)) + + def test_get_ansible_galaxy_roles_add(self): + galaxy_roles = [{elem: elem for elem in ["hosts", "name", "galaxy", "git", "url", "vars", "vars_file"]}] + galaxy_roles_add = [ + {elem: elem for elem in ["hosts", "name", "galaxy", "git", "url", "vars", "vars_file", "additional"]}] + self.assertEqual(galaxy_roles, ansibleConfigurator.get_ansible_galaxy_roles(galaxy_roles_add)) + + def test_get_ansible_galaxy_roles_minus(self): + galaxy_roles = [{elem: elem for elem in ["hosts", "name", "galaxy", "git", "vars", "vars_file"]}] + self.assertEqual(galaxy_roles, ansibleConfigurator.get_ansible_galaxy_roles(galaxy_roles)) + + @patch("logging.warning") + def test_get_ansible_galaxy_roles_mismatch(self, mock_log): + galaxy_roles = [{elem: elem for elem in ["hosts", "name", "vars", "vars_file"]}] + self.assertEqual([], ansibleConfigurator.get_ansible_galaxy_roles(galaxy_roles)) + mock_log.assert_called() + + def test_generate_login_file(self): + login_yaml = {"default_user": 99, + "ssh_user": 21, + "munge_key": 32} + self.assertEqual(login_yaml, ansibleConfigurator.generate_login_file_yaml(21, 32, 99)) + + def test_generate_worker_specification_file_yaml(self): + configuration = [{"workerInstances": [{elem: elem for elem in ["type", "image"]}], "network": [32]}] + expected = [{'IMAGE': 'image', 'NETWORK': [32], 'TYPE': 'type'}] + self.assertEqual(expected, ansibleConfigurator.generate_worker_specification_file_yaml(configuration)) + + def test_generate_worker_specification_file_yaml_empty(self): + configuration = [{}] + expected = [] + self.assertEqual(expected, ansibleConfigurator.generate_worker_specification_file_yaml(configuration)) + + @patch("yaml.dump") + def test_write_yaml_no_alias(self, mock_yaml): + with patch('builtins.open', mock_open()) as output_mock: + ansibleConfigurator.write_yaml("here", {"some": "yaml"}, False) + output_mock.assert_called_once_with("here", "w+") + mock_yaml.assert_called_with(data={"some": "yaml"}, stream=ANY, Dumper=yamlDumper.NoAliasSafeDumper) + + @patch("yaml.safe_dump") + def test_write_yaml_alias(self, mock_yaml): + with patch('builtins.open', mock_open()) as output_mock: + ansibleConfigurator.write_yaml("here", {"some": "yaml"}, True) + output_mock.assert_called_once_with("here", "w+") + mock_yaml.assert_called_with(data={"some": "yaml"}, stream=ANY) + + @patch("bibigrid2.core.utility.idGeneration.generate_munge_key") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_worker_specification_file_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_login_file_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_common_configuration_yaml") + @patch("bibigrid2.core.actions.listClusters.dict_clusters") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_instances_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_ansible_hosts_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.get_ansible_roles") + @patch("bibigrid2.core.utility.ansibleConfigurator.generate_site_file_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.write_yaml") + @patch("bibigrid2.core.utility.ansibleConfigurator.get_cidrs") + def test_configure_ansible_yaml(self, mock_cidrs, mock_yaml, mock_site, mock_roles, mock_hosts, + mock_instances, mock_list, mock_common, mock_login, mock_worker, mock_munge): + mock_munge.return_value = 420 + mock_cidrs.return_value = 421 + mock_list.return_value = {2: 422} + mock_roles.return_value = 423 + provider = MagicMock() + provider.cloud_specification = {"auth": {"username":"Tom"}} + ansibleConfigurator.configure_ansible_yaml([provider], [{"sshUser": 42, "ansibleRoles": 21}], 2) + mock_munge.assert_called() + mock_worker.assert_called_with([{"sshUser": 42, "ansibleRoles": 21}]) + mock_common.assert_called_with(421, configuration={"sshUser": 42, "ansibleRoles": 21}) + mock_login.assert_called_with(ssh_user=42, munge_key=420, default_user="Tom") + mock_list.assert_called_with([provider]) + mock_instances.assert_called_with(422) + mock_hosts.assert_called_with(42, 422) + mock_site.assert_called_with(423) + mock_roles.assert_called_with(21) + mock_cidrs.assert_called_with([{'sshUser': 42, 'ansibleRoles': 21}], [provider]) + expected = [call(aRP.WORKER_SPECIFICATION_FILE, mock_worker(), False), + call(aRP.COMMONS_LOGIN_FILE, mock_login(), False), + call(aRP.COMMONS_CONFIG_FILE, mock_common(), False), + call(aRP.COMMONS_INSTANCES_FILE, mock_instances(), False), + call(aRP.HOSTS_CONFIG_FILE, mock_hosts(), False), + call(aRP.SITE_CONFIG_FILE, mock_site(), False)] + self.assertEqual(expected, mock_yaml.call_args_list) diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 000000000..c77361758 --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,33 @@ +from unittest import TestCase +from unittest.mock import patch + +import bibigrid2.core.actions.check as check +import bibigrid2.core.utility.validate_configuration as validateConfiguration + + +class TestCheck(TestCase): + + @patch("logging.info") + def test_check_true(self, mock_log): + providers = [42] + configurations = [32] + with patch.object(validateConfiguration.ValidateConfiguration, "validate", return_value=True) as mock_validate: + self.assertFalse(check.check(configurations, providers)) + mock_validate.assert_called() + mock_log.assert_called_with("Total check returned True.") + + @patch("logging.info") + def test_check_false(self, mock_log): + providers = [42] + configurations = [32] + with patch.object(validateConfiguration.ValidateConfiguration, "validate", return_value=False) as mock_validate: + self.assertFalse(check.check(configurations, providers)) + mock_validate.assert_called() + mock_log.assert_called_with("Total check returned False.") + + @patch("bibigrid2.core.utility.validateConfiguration.ValidateConfiguration") + def test_check_init(self, mock_validator): + providers = [42] + configurations = [32] + self.assertFalse(check.check(configurations, providers)) + mock_validator.assert_called_with(configurations, providers) diff --git a/tests/test_configurationHandler.py b/tests/test_configurationHandler.py new file mode 100644 index 000000000..8e491c004 --- /dev/null +++ b/tests/test_configurationHandler.py @@ -0,0 +1,198 @@ +from unittest import TestCase + +import yaml + +import bibigrid2.core.utility.handler.configuration_handler as configurationHandler +from unittest.mock import patch, mock_open, MagicMock +import os + + +class TestConfigurationHandler(TestCase): + def test_get_list_by_name_none(self): + configurations = [{}, {}] + self.assertEqual([None, None], configurationHandler.get_list_by_key(configurations, "key1")) + self.assertEqual([], configurationHandler.get_list_by_key(configurations, "key1", False)) + + def test_get_list_by_name_empty(self): + configurations = [{"key1": "value1", "key2": "value1"}, {"key1": "value2"}] + self.assertEqual(["value1", "value2"], configurationHandler.get_list_by_key(configurations, "key1")) + self.assertEqual(["value1", "value2"], configurationHandler.get_list_by_key(configurations, "key1", False)) + self.assertEqual(["value1", None], configurationHandler.get_list_by_key(configurations, "key2")) + self.assertEqual(["value1"], configurationHandler.get_list_by_key(configurations, "key2", False)) + + @patch("os.path.isfile") + def test_read_configuration_no_file(self, mock_isfile): + mock_isfile.return_value = False + test = MagicMock() + configuration = "Test: 42" + expected_result = None + with patch("builtins.open", mock_open(test, read_data=configuration)): + result = configurationHandler.read_configuration("path") + mock_isfile.assert_called_with("path") + test.assert_not_called() + self.assertEqual(expected_result, result) + + @patch("os.path.isfile") + def test_read_configuration_file(self, mock_isfile): + mock_isfile.return_value = True + opener = MagicMock() + configuration = "Test: 42" + expected_result = {"Test": 42} + with patch("builtins.open", mock_open(opener, read_data=configuration)): + result = configurationHandler.read_configuration("path") + mock_isfile.assert_called_with("path") + opener.assert_called_with("path", "r") + self.assertEqual(expected_result, result) + + @patch("os.path.isfile") + def test_read_configuration_file_yaml_exception(self, mock_isfile): + mock_isfile.return_value = True + opener = MagicMock() + configuration = "]unbalanced brackets[" + expected_result = None + with patch("builtins.open", mock_open(opener, read_data=configuration)): + result = configurationHandler.read_configuration("path") + mock_isfile.assert_called_with("path") + opener.assert_called_with("path", "r") + self.assertEqual(expected_result, result) + + def test_find_file_in_folders_not_found_no_folder(self): + expected_result = None + result = configurationHandler.find_file_in_folders("true_file", []) + self.assertEqual(expected_result, result) + + def test_find_file_in_folders_not_found_no_file(self): + expected_result = None + with patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = False + result = configurationHandler.find_file_in_folders("false_file", ["or_false_folder"]) + self.assertEqual(expected_result, result) + mock_isfile.called_with(os.path.expanduser(os.path.join("or_false_folder", "false_file"))) + + @patch("os.path.isfile") + @patch("bibigrid2.core.utility.handler.configurationHandler.read_configuration") + def test_find_file_in_folders(self, mock_read_configuration, mock_isfile): + expected_result = 42 + mock_isfile.return_value(True) + mock_read_configuration.return_value = 42 + result = configurationHandler.find_file_in_folders("true_file", ["true_folder"]) + self.assertEqual(expected_result, result) + mock_read_configuration.assert_called_with(os.path.expanduser(os.path.join("true_folder", "true_file"))) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files_none(self, mock_ffif): + mock_ffif.return_value = None + expected_result = None, None + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files_no_clouds_yaml(self, mock_ffif): + mock_ffif.side_effect = [None, {configurationHandler.CLOUD_PUBLIC_ROOT_KEY: 42}] + expected_result = None, 42 + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files_no_public_clouds_yaml(self, mock_ffif): + mock_ffif.side_effect = [{configurationHandler.CLOUD_ROOT_KEY: 42}, None] + expected_result = 42, None + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files_no_root_key_public(self, mock_ffif): + mock_ffif.side_effect = [{configurationHandler.CLOUD_ROOT_KEY: 42}, {"name": 42}] + expected_result = 42, None + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files_no_root_key_cloud(self, mock_ffif): + mock_ffif.side_effect = [{"name": 42}, {configurationHandler.CLOUD_PUBLIC_ROOT_KEY: 42}] + expected_result = None, 42 + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + + @patch("bibigrid2.core.utility.handler.configurationHandler.find_file_in_folders") + def test_get_cloud_files(self, mock_ffif): + mock_ffif.side_effect = [{configurationHandler.CLOUD_ROOT_KEY: 22}, + {configurationHandler.CLOUD_PUBLIC_ROOT_KEY: 42}] + expected_result = 22, 42 + result = configurationHandler.get_clouds_files() + self.assertEqual(expected_result, result) + mock_ffif.assert_called_with(configurationHandler.CLOUDS_PUBLIC_YAML, configurationHandler.CLOUDS_YAML_PATHS) + + @patch("bibigrid2.core.utility.handler.configurationHandler.get_cloud_specification") + @patch("bibigrid2.core.utility.handler.configurationHandler.get_clouds_files") + def test_get_cloud_specifications_none(self, mock_get_clouds_files, mock_get_clouds_specification): + mock_get_clouds_files.return_value = None, None + expected_result = [] + result = configurationHandler.get_cloud_specifications([{"cloud": 42}]) + self.assertEqual(expected_result, result) + mock_get_clouds_specification.assert_not_called() + mock_get_clouds_files.assert_called() + + @patch("bibigrid2.core.utility.handler.configurationHandler.get_cloud_specification") + @patch("bibigrid2.core.utility.handler.configurationHandler.get_clouds_files") + def test_get_cloud_specifications_no_cloud_configuration_key(self, mock_get_clouds_files, + mock_get_clouds_specification): + mock_get_clouds_files.return_value = {"Some"}, {"Some"} + expected_result = [] + result = configurationHandler.get_cloud_specifications([{"no_cloud": 42}]) + self.assertEqual(expected_result, result) + mock_get_clouds_specification.assert_not_called() + mock_get_clouds_files.assert_called() + + @patch("bibigrid2.core.utility.handler.configurationHandler.get_cloud_specification") + @patch("bibigrid2.core.utility.handler.configurationHandler.get_clouds_files") + def test_get_cloud_specifications_cloud(self, mock_get_clouds_files, mock_get_clouds_specification): + mock_get_clouds_files.return_value = {"1"}, {"2"} + mock_get_clouds_specification.return_value = 21 + expected_result = [21] + result = configurationHandler.get_cloud_specifications([{"cloud": 42}]) + self.assertEqual(expected_result, result) + mock_get_clouds_specification.assert_called_with(42, {"1"}, {"2"}) + mock_get_clouds_files.assert_called() + + @patch("bibigrid2.core.utility.handler.configurationHandler.get_cloud_specification") + @patch("bibigrid2.core.utility.handler.configurationHandler.get_clouds_files") + def test_get_cloud_specifications_no_config(self, mock_get_clouds_files, mock_get_clouds_specification): + mock_get_clouds_files.return_value = {"1"}, {"2"} + mock_get_clouds_specification.return_value = 21 + expected_result = [] + result = configurationHandler.get_cloud_specifications([]) + self.assertEqual(expected_result, result) + mock_get_clouds_specification.assert_not_called() + mock_get_clouds_files.assert_called() + + def test_get_cloud_specification_no_matching_cloud(self): + expected_result = {} + result = configurationHandler.get_cloud_specification("some_name", {}, {"some_some": "public"}) + self.assertEqual(expected_result, result) + + def test_get_cloud_specification_cloud(self): + expected_result = {42: 42} + result = configurationHandler.get_cloud_specification("some_name", {"some_name": {42: 42}}, None) + self.assertEqual(expected_result, result) + + def test_get_cloud_specification_no_public_cloud(self): + expected_result = {42: 42, "profile": "name2"} + result = configurationHandler.get_cloud_specification("some_name", {"some_name": expected_result}, + {"not_name2": {21: 21}}) + self.assertEqual(expected_result, result) + + def test_get_cloud_specification(self): + cloud_private_specification = {42: 42, "profile": "name2", "test": {"recursive": "foo"}} + expected_result = {42: 42, "profile": "name2", "test": {"recursive": "foo"}, "additional": "value"} + result = configurationHandler.get_cloud_specification("some_name", {"some_name": cloud_private_specification}, + {"name2": {42: 21, "test": {"recursive": "oof"}, + "additional": "value"}}) + self.assertEqual(expected_result, result) + + def test_get_cloud_specification_type_exception(self): + cloud_private_specification = {42: 42, "profile": "name2", "test": {"recursive": "foo"}} + result = configurationHandler.get_cloud_specification("some_name", {"some_name": cloud_private_specification}, + {"name2": {42: 21, "test": ["recursive", 22], + "additional": "value"}}) + self.assertEqual({}, result) diff --git a/tests/test_create.py b/tests/test_create.py new file mode 100644 index 000000000..3cc45257e --- /dev/null +++ b/tests/test_create.py @@ -0,0 +1,319 @@ +import os +from unittest import TestCase +from unittest.mock import patch, Mock, MagicMock, mock_open + +import bibigrid2.core.actions.create as create + + +class TestCreate(TestCase): + @patch("bibigrid2.core.utility.handler.sshHandler.get_add_ssh_public_key_commands") + @patch("bibigrid2.core.utility.idGeneration.generate_safe_cluster_id") + def test_init(self, mock_id, mock_ssh): + unique_id = 21 + provider = MagicMock() + provider.cloud_specification["auth"]["project_name"] = "name" + key_name = create.KEY_PREFIX + provider.cloud_specification["auth"]["project_name"] \ + + create.SEPARATOR + str(unique_id) + mock_id.return_value = str(unique_id) + mock_ssh.return_value = [32] + c = create.Create([provider], [{}], "path", False) + self.assertEqual(str(unique_id), c.cluster_id) + self.assertEqual("ubuntu", c.ssh_user) + self.assertEqual([32], c.ssh_add_public_key_commands) + self.assertEqual(c.key_name, key_name) + mock_id.assert_called_with([provider]) + + @patch("bibigrid2.core.utility.handler.sshHandler.get_add_ssh_public_key_commands") + @patch("bibigrid2.core.utility.idGeneration.generate_safe_cluster_id") + def test_init_username(self, mock_id, mock_ssh): + unique_id = 21 + mock_id.return_value = str(unique_id) + mock_ssh.return_value = [32] + c = create.Create([MagicMock()], [{"sshUser": "ssh"}], "path", False) + self.assertEqual("ssh", c.ssh_user) + + @patch("subprocess.check_output") + def test_generate_keypair(self, mock_subprocess): + provider = MagicMock() + provider.list_servers.return_value = [] + c = create.Create([provider], [{}], "") + public_key = "data" + with patch("builtins.open", mock_open(read_data=public_key)): + c.generate_keypair() + provider.create_keypair.assert_called_with(name=c.key_name, public_key=public_key) + mock_subprocess.assert_called_with(f'ssh-keygen -t ecdsa -f {create.KEY_FOLDER}{c.key_name} -P ""') + + def test_start_instance(self): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.create_server.return_value = 42 + provider.add_auto_ip.return_value = {"floating_ip_address": 12} + c = create.Create([provider], [{}], "") + server_type = {"type": "testType", "image": "testImage"} + network = 21 + external_network = "testExternal" + c.start_instance(provider, create.MASTER_IDENTIFIER, server_type, network, worker=False, volumes=2, + external_network=external_network) + provider.create_server.assert_called_with(name=create.MASTER_IDENTIFIER + create.SEPARATOR + c.cluster_id, + flavor=server_type["type"], + key_name=c.key_name, + image=server_type["image"], + network=network, volumes=2) + provider.add_auto_ip.assert_called_with(network=external_network, server=42) + + def test_start_instance_worker(self): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.create_server.return_value = 42 + provider.create_floating_ip.return_value = {"floating_ip_address": 12} + c = create.Create([provider], [{}], "") + server_type = {"type": "testType", "image": "testImage"} + network = 21 + c.start_instance(provider, create.WORKER_IDENTIFIER, server_type, network, worker=True, volumes=None, + external_network=None) + provider.create_server.assert_called_with(name=create.WORKER_IDENTIFIER.format(0) + create.SEPARATOR + c.cluster_id, + flavor=server_type["type"], + key_name=c.key_name, + image=server_type["image"], + network=network, volumes=None) + provider.create_floating_ip.assert_not_called() + + @patch("bibigrid2.models.returnThreading.ReturnThread") + def test_start_instances(self, return_mock): + provider = MagicMock() + provider.list_servers.return_value = [] + external_network = "externalTest" + provider.get_external_netowrk.return_value = external_network + configuration = {"network": 42} + c = create.Create([provider], [configuration], "") + provider.get_external_network.return_value = 32 + with patch.object(c, "prepare_vpn_or_master_args", return_value=(0, 1, 2)) as prepare_mock: + prepare_mock.return_value = (0, 1, 2) + c.start_instances({"network": 42}, provider) + prepare_mock.assert_called_with(configuration, provider) + provider.get_external_network.assert_called_with(configuration["network"]) + return_mock.assert_called_with(target=c.start_instance, + args=[provider, 0, 1, configuration["network"], False, 2, 32]) + + @patch("threading.Thread") + @patch("bibigrid2.models.returnThreading.ReturnThread") + def test_start_instances_workers(self, return_mock, thread_mock): + provider = MagicMock() + provider.list_servers.return_value = [] + external_network = "externalTest" + provider.get_external_netowrk.return_value = external_network + configuration = {"network": 42, "workerInstances": [{"count": 1}]} + c = create.Create([provider], [configuration], "") + provider.get_external_network.return_value = 32 + with patch.object(c, "prepare_vpn_or_master_args", return_value=(0, 1, 2)) as prepare_mock: + prepare_mock.return_value = (0, 1, 2) + c.start_instances(configuration, provider) + thread_mock.assert_called_with(target=c.start_instance, + args=[provider, create.WORKER_IDENTIFIER, configuration["workerInstances"][0], + configuration["network"], True]) + return_mock.assert_called() + + def test_prepare_master_args(self): + provider = MagicMock() + provider.list_servers.return_value = [] + external_network = "externalTest" + provider.get_external_netowrk.return_value = external_network + configuration = {"network": 42, "masterInstance": "Some"} + c = create.Create([provider], [configuration], "") + volume_return = [42] + with patch.object(c, "prepare_volumes", return_value=volume_return) as prepare_mock: + self.assertEqual((create.MASTER_IDENTIFIER, configuration["masterInstance"], volume_return), + c.prepare_vpn_or_master_args(configuration, provider)) + prepare_mock.assert_called_with(provider, []) + + def test_prepare_vpn_args(self): + provider = MagicMock() + provider.list_servers.return_value = [] + external_network = "externalTest" + provider.get_external_netowrk.return_value = external_network + configuration = {"network": 42, "vpnInstance": "Some"} + c = create.Create([provider], [configuration], "") + volume_return = [42] + with patch.object(c, "prepare_volumes", return_value=volume_return) as prepare_mock: + self.assertEqual((create.VPN_WORKER_IDENTIFIER, configuration["vpnInstance"], []), + c.prepare_vpn_or_master_args(configuration, provider)) + prepare_mock.assert_not_called() + + def test_prepare_args_keyerror(self): + provider = MagicMock() + provider.list_servers.return_value = [] + external_network = "externalTest" + provider.get_external_netowrk.return_value = external_network + configuration = {"network": 42} + c = create.Create([provider], [configuration], "") + volume_return = [42] + with patch.object(c, "prepare_volumes", return_value=volume_return) as prepare_mock: + with self.assertRaises(KeyError): + self.assertEqual((create.VPN_WORKER_IDENTIFIER, configuration["vpnInstance"], []), + c.prepare_vpn_or_master_args(configuration, provider)) + prepare_mock.assert_not_called() + + @patch("bibigrid2.core.utility.handler.sshHandler.ansible_preparation") + def test_setup_reachable_servers_master(self, mock_ansible): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {"masterInstance": 42} + c = create.Create([provider], [configuration], "") + floating_ip = 21 + c.setup_reachable_servers(configuration, floating_ip) + mock_ansible.assert_called_with(floating_ip=floating_ip, + private_key=create.KEY_FOLDER + c.key_name, + username=c.ssh_user, + commands=[]) + + def test_prepare_volumes_none(self): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.get_volume_by_id_or_name.return_value = 42 + provider.create_volume_from_snapshot = 21 + configuration = {"vpnInstance": 42} + c = create.Create([provider], [configuration], "") + self.assertEqual([], c.prepare_volumes(provider, [])) + + def test_prepare_volumes_volume(self): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.get_volume_by_id_or_name.return_value = 42 + provider.create_volume_from_snapshot = 21 + configuration = {"vpnInstance": 42} + c = create.Create([provider], [configuration], "") + self.assertEqual([42], c.prepare_volumes(provider, ["Test"])) + + def test_prepare_volumes_snapshot(self): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.get_volume_by_id_or_name.return_value = None + provider.create_volume_from_snapshot.return_value = 21 + configuration = {"vpnInstance": 42} + c = create.Create([provider], [configuration], "") + self.assertEqual([21], c.prepare_volumes(provider, ["Test"])) + + @patch("logging.warning") + def test_prepare_volumes_mismatch(self, mock_log): + provider = MagicMock() + provider.list_servers.return_value = [] + provider.get_volume_by_id_or_name.return_value = None + provider.create_volume_from_snapshot.return_value = None + configuration = {"vpnInstance": 42} + c = create.Create([provider], [configuration], "") + mount = "Test" + self.assertEqual([], c.prepare_volumes(provider, [mount])) + mock_log.assert_called_with(f"Mount {mount} is neither a snapshot nor a volume.") + + def test_prepare_configurations_no_network(self): + provider = MagicMock() + provider.list_servers.return_value = [] + network = "network" + provider.get_network_id_by_subnet.return_value = network + configuration = {"subnet": 42} + c = create.Create([provider], [configuration], "") + c.prepare_configurations() + provider.get_network_id_by_subnet.assert_called_with(42) + self.assertEqual(network, configuration["network"]) + self.assertEqual(c.ssh_user, configuration["sshUser"]) + + def test_prepare_configurations_no_subnet(self): + provider = MagicMock() + provider.list_servers.return_value = [] + subnet = ["subnet"] + provider.get_subnet_ids_by_network.return_value = subnet + configuration = {"network": 42} + c = create.Create([provider], [configuration], "") + c.prepare_configurations() + provider.get_subnet_ids_by_network.assert_called_with(42) + self.assertEqual(subnet, configuration["subnet"]) + self.assertEqual(c.ssh_user, configuration["sshUser"]) + + def test_prepare_configurations_none(self): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "") + with self.assertRaises(KeyError): + c.prepare_configurations() + + @patch("bibigrid2.core.utility.ansibleConfigurator.configure_ansible_yaml") + @patch("bibigrid2.core.utility.handler.sshHandler.execute_ssh") + def test_upload_playbooks(self, mock_ssh, mock_configure_ansible): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "") + c.master_ip = 42 + c.upload_data() + mock_configure_ansible.assert_called_with(providers=c.providers, + configurations=c.configurations, + cluster_id=c.cluster_id) + mock_ssh.assert_called_with(floating_ip=c.master_ip, private_key=create.KEY_FOLDER + c.key_name, + username=c.ssh_user, filepaths=[(os.path.expanduser("/Documents/Repos/bibigrid2/" + "resources/playbook/"), "playbook")], + commands=['echo ansible_start']) + + @patch("threading.Thread") + def test_start_start_instances_thread(self, mock_thread): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "") + start_instances_mock_thread = Mock() + mock_thread.return_value = start_instances_mock_thread + c.start_start_instances_threads() + mock_thread.assert_called_with(target=c.start_instances, args=[configuration, provider]) + start_instances_mock_thread.start.assert_called() + start_instances_mock_thread.join.assert_called() + + @patch.object(create.Create, "generate_keypair") + @patch.object(create.Create, "prepare_configurations") + @patch.object(create.Create, "start_start_instances_threads") + @patch.object(create.Create, "upload_data") + @patch.object(create.Create, "print_cluster_start_info") + @patch("bibigrid2.core.actions.terminateCluster.terminate_cluster") + def test_create_non_debug(self, mock_terminate, mock_info, mock_up, mock_start, mock_conf, mock_key): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "", False) + self.assertEqual(0, c.create()) + for mock in [mock_info, mock_up, mock_start, mock_conf, mock_key]: + mock.assert_called() + mock_terminate.assert_not_called() + + @patch.object(create.Create, "generate_keypair") + @patch.object(create.Create, "prepare_configurations") + @patch.object(create.Create, "start_start_instances_threads") + @patch.object(create.Create, "upload_data") + @patch.object(create.Create, "print_cluster_start_info") + @patch("bibigrid2.core.actions.terminateCluster.terminate_cluster") + def test_create_non_debug_upload_raise(self, mock_terminate, mock_info, mock_up, mock_start, mock_conf, mock_key): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "", False) + mock_up.side_effect = [ConnectionError()] + self.assertEqual(1, c.create()) + for mock in [mock_start, mock_conf, mock_key, mock_up]: + mock.assert_called() + for mock in [mock_info]: + mock.assert_not_called() + mock_terminate.assert_called_with(cluster_id=c.cluster_id, providers=[provider], debug=False) + + @patch.object(create.Create, "generate_keypair") + @patch.object(create.Create, "prepare_configurations") + @patch.object(create.Create, "start_start_instances_threads") + @patch.object(create.Create, "upload_data") + @patch.object(create.Create, "print_cluster_start_info") + @patch("bibigrid2.core.actions.terminateCluster.terminate_cluster") + def test_create_debug(self, mock_terminate, mock_info, mock_up, mock_start, mock_conf, mock_key): + provider = MagicMock() + provider.list_servers.return_value = [] + configuration = {} + c = create.Create([provider], [configuration], "", True) + self.assertEqual(0, c.create()) + for mock in [mock_info, mock_up, mock_start, mock_conf, mock_key]: + mock.assert_called() + mock_terminate.assert_called_with(cluster_id=c.cluster_id, providers=[provider], debug=True) diff --git a/tests/test_idGeneration.py b/tests/test_idGeneration.py new file mode 100644 index 000000000..f139772e0 --- /dev/null +++ b/tests/test_idGeneration.py @@ -0,0 +1,42 @@ +from unittest import TestCase +from unittest.mock import Mock, MagicMock, patch + +import bibigrid2.core.actions.create as create +import bibigrid2.core.utility.id_generation as idGeneration + + +class Test(TestCase): + + def test_generate_cluster_id(self): + """ + This test is not ideal, but prevents worst changes within a reasonable runtime + :return: + """ + test_list = [] + for x in range(10000): + test_list.append(idGeneration.generate_cluster_id()) + self.assertTrue(len(set(test_list)) == len(test_list)) + + @patch("bibigrid2.core.utility.idGeneration.generate_cluster_id") + def test_generate_safe_cluster_id(self, mock_generate_cluster_id): + mock_generate_cluster_id.return_value = 21 + with patch("bibigrid2.core.utility.idGeneration.is_unique_cluster_id") as mock_is_unique: + mock_is_unique.side_effect = [True] + self.assertTrue(idGeneration.generate_safe_cluster_id([42])) + mock_is_unique.assert_called_with(21, [42]) + + def test_is_unique_cluster_id_duplicate(self): + cluster_id = 42 + provider = Mock() + provider.list_servers = MagicMock( + return_value=[{"name": create.MASTER_IDENTIFIER + create.SEPARATOR + str(cluster_id)}]) + self.assertFalse(idGeneration.is_unique_cluster_id(str(cluster_id), [provider])) + provider.list_servers.assert_called() + + def test_is_unique_cluster_id_unique(self): + cluster_id = 42 + provider = Mock() + provider.list_servers = MagicMock( + return_value=[{"name": create.MASTER_IDENTIFIER + create.SEPARATOR + str(cluster_id + 1)}]) + self.assertTrue(idGeneration.is_unique_cluster_id(str(cluster_id), [provider])) + provider.list_servers.assert_called() diff --git a/tests/test_listClusters.py b/tests/test_listClusters.py new file mode 100644 index 000000000..4c1fdfd3a --- /dev/null +++ b/tests/test_listClusters.py @@ -0,0 +1,46 @@ +from unittest import TestCase +from unittest.mock import Mock + +import bibigrid2.core.actions.create as create +import bibigrid2.core.actions.list_clusters as listClusters + + +class TestDictClusters(TestCase): + def test_setup(self): + for identifier in [create.WORKER_IDENTIFIER, create.VPN_WORKER_IDENTIFIER, create.MASTER_IDENTIFIER]: + cluster_id = 42 + test_provider = Mock() + test_provider.name = "name" + cluster_dict = {} + server = {"name": identifier + create.SEPARATOR + str(cluster_id)} + self.assertEqual(str(cluster_id), + listClusters.setup(server, + identifier, cluster_dict, test_provider)) + self.assertEqual({str(cluster_id): {'worker': [], 'vpnwkr': []}}, cluster_dict) + self.assertEqual(test_provider, server["provider"]) + + def test_setup_already(self): + for identifier in [create.WORKER_IDENTIFIER, create.VPN_WORKER_IDENTIFIER, create.MASTER_IDENTIFIER]: + cluster_id = 42 + test_provider = Mock() + test_provider.name = "name" + cluster_dict = {str(cluster_id): {'worker': ["some"], 'vpnwkr': ["some"]}} + server = {"name": identifier + create.SEPARATOR + str(cluster_id)} + self.assertEqual(str(cluster_id), + listClusters.setup(server, + identifier, cluster_dict, test_provider)) + self.assertEqual({str(cluster_id): {'worker': ["some"], 'vpnwkr': ["some"]}}, cluster_dict) + self.assertEqual(test_provider, server["provider"]) + + def test_dict_clusters(self): + cluster_id = 42 + expected = {str(cluster_id): {'workers': [{'name': f'bibigrid-worker-{str(cluster_id)}', 'provider': 'Mock'}], + 'vpnwkrs': [ + {'name': f'bibigrid-vpnwkr-{str(cluster_id)}', 'provider': 'Mock'}], + 'master': {'name': f'bibigrid-master-{str(cluster_id)}', 'provider': 'Mock'}}} + provider = Mock() + provider.list_servers.return_value = [{'name': identifier + create.SEPARATOR + str(cluster_id)} for identifier + in + [create.WORKER_IDENTIFIER, create.VPN_WORKER_IDENTIFIER, + create.MASTER_IDENTIFIER]] + self.assertEqual(expected, listClusters.dict_clusters([provider])) diff --git a/tests/test_providerHandler.py b/tests/test_providerHandler.py new file mode 100644 index 000000000..e6eeab17f --- /dev/null +++ b/tests/test_providerHandler.py @@ -0,0 +1,26 @@ +from unittest import TestCase +from unittest.mock import MagicMock, patch + +import bibigrid2.core.utility.handler.provider_handler as providerHandler + + +class TestProviderHandler(TestCase): + + @patch("bibigrid2.core.utility.handler.configurationHandler.get_cloud_specifications") + @patch("bibigrid2.core.utility.handler.providerHandler.get_provider_list_by_name_list") + def test_get_providers(self, mock_provider_list, mock_get_cloud_specifications): + mock_get_cloud_specifications.return_value = True # for if not false + configurations = [{"infrastructure": "some"}] + mock_provider_list.return_value = 42 + with patch("bibigrid2.core.utility.handler.configurationHandler.get_list_by_key") as mock_by_name: + self.assertEqual(42, providerHandler.get_providers(configurations)) + mock_by_name.assert_called_with(configurations, "infrastructure") + mock_get_cloud_specifications.assert_called_with(configurations) + + def test_get_provider_list_by_name_list(self): + keys = providerHandler.PROVIDER_NAME_DICT.keys() + values = [42] + with patch("bibigrid2.core.utility.handler.providerHandler.get_provider_by_name") as mock_by_name: + mock_by_name.return_value = MagicMock(return_value=42) + self.assertEqual(providerHandler.get_provider_list_by_name_list(keys, "nonempty_specification"), values) + mock_by_name.assert_called_with(list(keys)[0]) diff --git a/tests/test_returnThreading.py b/tests/test_returnThreading.py new file mode 100644 index 000000000..9256280c4 --- /dev/null +++ b/tests/test_returnThreading.py @@ -0,0 +1,17 @@ +from unittest import TestCase + +import bibigrid2.models.return_threading as returnThreading + + +def test_method(x): + return (42, x) + + +class TestReturnThread(TestCase): + + def test_ReturnThread(self): + return_thread = returnThreading.ReturnThread(target=test_method, + args=[42]) + return_thread.start() + return_value = return_thread.join() + self.assertTrue(return_value == (42, 42)) diff --git a/tests/test_sshHandler.py b/tests/test_sshHandler.py new file mode 100644 index 000000000..54c7dda6d --- /dev/null +++ b/tests/test_sshHandler.py @@ -0,0 +1,104 @@ +import socket +from unittest import TestCase +from unittest.mock import mock_open, Mock, MagicMock, patch, call + +from paramiko.ssh_exception import NoValidConnectionsError + +import bibigrid2.core.utility.handler.ssh_handler as sshHandler + + +class TestSshHandler(TestCase): + def test_get_add_ssh_public_key_commands_none(self): + ssh_public_key_files = [] + self.assertEqual([], sshHandler.get_add_ssh_public_key_commands(ssh_public_key_files)) + + def test_get_add_ssh_public_key_commands_line(self): + ssh_public_key_files = [42] + line = "42" + expected = [f"echo {line} >> .ssh/authorized_keys"] + with patch("builtins.open", mock_open(read_data=line)) as mock_file: + self.assertEqual(expected, sshHandler.get_add_ssh_public_key_commands(ssh_public_key_files)) + mock_file.assert_called_with(42) + + def test_copy_to_server_file(self): + sftp = Mock() + sftp.put = MagicMock(return_value=True) + with patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = True + sshHandler.copy_to_server(sftp, "Jim", "Joe") + sftp.put.assert_called_with("Jim", "Joe") + + @patch("os.listdir") + def test_copy_to_server_folder(self, mock_listdir): + sftp = Mock() + sftp.mkdir = MagicMock() + mock_listdir.return_value = [] + with patch("os.path.isfile") as mock_isfile: + mock_isfile.return_value = False + sshHandler.copy_to_server(sftp, "Jim", "Joe") + mock_listdir.assert_called_with("Jim") + sftp.mkdir.assert_called_with("Joe") + + @patch("logging.info") + def test_is_active(self, mock_log): + client = Mock() + client.connect = MagicMock(return_value=True) + self.assertFalse(sshHandler.is_active(client, 42, 32, 22, timeout=5)) + mock_log.assert_not_called() + + @patch("logging.info") + def test_is_active_second(self, mock_log): + client = Mock() + client.connect = MagicMock(side_effect=[NoValidConnectionsError({('127.0.0.1', 22): socket.error}), True]) + self.assertFalse(sshHandler.is_active(client, 42, 32, 22, timeout=5)) + mock_log.assert_called() + + @patch("logging.info") + def test_is_active_exception(self, mock_log): + client = Mock() + client.connect = MagicMock(side_effect=NoValidConnectionsError({('127.0.0.1', 22): socket.error})) + with self.assertRaises(ConnectionError): + sshHandler.is_active(client, 42, 32, 22, timeout=0) + client.connect.assert_called_with(hostname=42, username=22, pkey=32) + mock_log.assert_called() + + @patch("bibigrid2.core.utility.handler.sshHandler.execute_ssh_cml_commands") + @patch("paramiko.ECDSAKey.from_private_key_file") + @patch("paramiko.SSHClient") + def test_execute_ssh(self, mock_client, mock_paramiko_key, mock_exec): + mock_paramiko_key.return_value = 2 + client = Mock() + mock = Mock() + mock_client.return_value = mock + mock.__enter__ = client + mock.__exit__ = Mock(return_value=None) + with patch("bibigrid2.core.utility.handler.sshHandler.is_active") as mock_active: + sshHandler.execute_ssh(42, 32, 22, [12], None) + mock_client.assert_called_with() + mock_active.assert_called_with(client=client(), floating_ip_address=42, username=22, private_key=2) + mock_exec.assert_called_with(client(), [12]) + mock_paramiko_key.assert_called_with(32) + + @patch("bibigrid2.core.utility.handler.sshHandler.execute_ssh") + def test_ansible_preparation(self, mock_execute): + sshHandler.ansible_preparation(1, 2, 3, [], []) + mock_execute.assert_called_with(1, 2, 3, [] + sshHandler.ANSIBLE_SETUP, [(2, sshHandler.PRIVATE_KEY_FILE)]) + + @patch("bibigrid2.core.utility.handler.sshHandler.execute_ssh") + def test_ansible_preparation_elem(self, mock_execute): + sshHandler.ansible_preparation(1, 2, 3, [42], [42]) + mock_execute.assert_called_with(1, 2, 3, sshHandler.ANSIBLE_SETUP + [42], + [42, (2, sshHandler.PRIVATE_KEY_FILE)]) + + @patch("logging.warning") + @patch("logging.info") + def test_execute_ssh_cml_commands(self, mock_log_info, mock_log_warning): + client = Mock() + stdout_mock = Mock() + stdout_mock.channel.recv_exit_status.side_effect = [0, 1] + stdout_mock.readlines.return_value = 49 + client.exec_command.return_value = (0, stdout_mock, 2) + commands = [42, 21] + sshHandler.execute_ssh_cml_commands(client, commands) + self.assertEqual([call('42:0')], mock_log_info.call_args_list) + self.assertEqual([call('21:1|49')], mock_log_warning.call_args_list) diff --git a/tests/test_startup.py b/tests/test_startup.py new file mode 100644 index 000000000..7138c808b --- /dev/null +++ b/tests/test_startup.py @@ -0,0 +1,113 @@ +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock + +import bibigrid2.core.startup as startup + + +class TestStartup(TestCase): + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + def test_provider(self, mock_get_providers): + args = Mock() + args.list_clusters = True + args.version = False + args.cluster_id = 12 + provider = Mock + provider.close = MagicMock() + configurations = {} + mock_get_providers.return_value = [provider] + with patch("bibigrid2.core.actions.listClusters.print_list_clusters") as mock_lc: + mock_lc.return_value = 42 + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_get_providers.assert_called_with(configurations) + provider.close.assert_called() + + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + def test_list_clusters(self, get_providers): + provider_mock = Mock() + provider_mock.close = Mock() + get_providers.return_value = [provider_mock] + args = Mock() + args.list_clusters = True + args.version = False + args.cluster_id = 12 + configurations = {} + with patch("bibigrid2.core.actions.listClusters.print_list_clusters") as mock_lc: + mock_lc.return_value = 42 + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_lc.assert_called_with(12, [provider_mock]) + + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + def test_check(self, get_providers): + provider_mock = Mock() + provider_mock.close = Mock() + get_providers.return_value = [provider_mock] + args = Mock() + args.list_clusters = False + args.version = False + args.check = True + args.cluster_id = 12 + configurations = {} + with patch("bibigrid2.core.actions.check.check") as mock_lc: + mock_lc.return_value = 42 + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_lc.assert_called_with(configurations, [provider_mock]) + + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + @patch('bibigrid2.core.actions.create.Create') + def test_create(self, mock_create, get_providers): + provider_mock = Mock() + provider_mock.close = Mock() + get_providers.return_value = [provider_mock] + args = Mock() + args.list_clusters = False + args.version = False + args.check = False + args.create = True + args.cluster_id = 12 + args.debug = True + configurations = {} + creator = Mock() + creator.create = MagicMock(return_value=42) + mock_create.return_value = creator + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_create.assert_called_with(providers=[provider_mock], configurations=configurations, debug=True, config_path="") + creator.create.assert_called() + + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + def test_terminate(self, get_providers): + provider_mock = Mock() + provider_mock.close = Mock() + get_providers.return_value = [provider_mock] + args = Mock() + args.list_clusters = False + args.version = False + args.create = False + args.check = False + args.terminate_cluster = True + args.cluster_id = 12 + args.debug = True + configurations = {} + with patch("bibigrid2.core.actions.terminateCluster.terminate_cluster") as mock_tc: + mock_tc.return_value = 42 + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_tc.assert_called_with(12, [provider_mock], True) + + @patch('bibigrid2.core.utility.handler.providerHandler.get_providers') + @patch("bibigrid2.core.actions.ide.ide") + def test_ide(self, mock_ide, get_providers): + provider_mock = Mock() + provider_mock.close = Mock() + get_providers.return_value = [provider_mock] + args = Mock() + args.list_clusters = False + args.version = False + args.create = False + args.check = False + args.terminate_cluster = False + args.ide = True + args.cluster_id = 12 + args.debug = True + configurations = {} + mock_ide.return_value = 42 + self.assertTrue(startup.run_action(args, configurations, "") == 42) + mock_ide.assert_called_with(12, [provider_mock], {}) diff --git a/tests/test_terminateCluster.py b/tests/test_terminateCluster.py new file mode 100644 index 000000000..68f0f5474 --- /dev/null +++ b/tests/test_terminateCluster.py @@ -0,0 +1,38 @@ +import os +from unittest import TestCase +from unittest.mock import MagicMock, Mock, patch, call + +import bibigrid2.core.actions.create as create +import bibigrid2.core.actions.terminate_cluster as terminateCluster + + +class TestTerminate(TestCase): + + @patch("bibigrid2.core.actions.terminateCluster.terminate_output") + @patch("logging.info") + def test_terminate_cluster(self, mock_log, mock_output): + provider = MagicMock() + provider.cloud_specification["auth"]["project_name"] = 32 + cluster_id = 42 + provider.list_servers.return_value = [ + {"name": create.MASTER_IDENTIFIER + create.SEPARATOR + str(cluster_id), "id": 21}] + provider.delete_server.return_value = True + provider.delete_keypair.return_value = True + terminateCluster.terminate_cluster(str(cluster_id), [provider], False) + provider.delete_server.assert_called_with(21) + provider.delete_keypair.assert_called_with( + create.KEY_PREFIX + provider.cloud_specification["auth"]["project_name"] + + create.SEPARATOR + str(cluster_id)) + mock_output.assert_called_with([provider.delete_server.return_value], + [provider.delete_keypair.return_value], str(cluster_id)) + @patch("logging.info") + def test_terminate_cluster_none(self, mock_log): + provider = MagicMock() + provider[0].specification["auth"]["project_name"] = "test_project_name" + cluster_id = 42 + provider.list_servers.return_value = [ + {"name": create.MASTER_IDENTIFIER + create.SEPARATOR + str(cluster_id + 1), "id": 21}] + provider.delete_keypair.return_value = False + terminateCluster.terminate_cluster(str(cluster_id), [provider], False) + provider.delete_server.assert_not_called() + provider.delete_keypair.assert_called_with('bibigrid42') # since keypair is not called