diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 7815ade..3824cb9 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -75,7 +75,7 @@ jobs: - name: Report test coverage if: ${{ matrix.coverage-report == true }} - uses: rhs/pr-comment@1.0.4 + uses: enlyze/pr-comment@v1.0.5 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.gitignore b/.gitignore index e72a38f..ddb46ad 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ dmypy.json .pyre/ .vscode/ + +# Mac OS-specific storage files +.DS_Store diff --git a/README.rst b/README.rst index afcc523..2020fe8 100644 --- a/README.rst +++ b/README.rst @@ -64,3 +64,9 @@ so:* .. code-block:: console $ export PYENCHANT_LIBRARY_PATH=/opt/homebrew/lib/libenchant-2.dylib + + +Examples +-------------------------------- +You can find examples of how to use the Python SDK here: `Notebooks `_ + diff --git a/docs/api_clients/production_runs/models.rst b/docs/api_clients/production_runs/models.rst index 73188d9..03581d4 100644 --- a/docs/api_clients/production_runs/models.rst +++ b/docs/api_clients/production_runs/models.rst @@ -11,7 +11,7 @@ Models :exclude-members: model_config, model_fields :show-inheritance: -.. autoclass:: Appliance() +.. autoclass:: Machine() :members: :undoc-members: :exclude-members: model_config, model_fields diff --git a/docs/api_clients/timeseries/models.rst b/docs/api_clients/timeseries/models.rst index 19d57ee..dd03852 100644 --- a/docs/api_clients/timeseries/models.rst +++ b/docs/api_clients/timeseries/models.rst @@ -11,7 +11,7 @@ Models :exclude-members: model_config, model_fields :show-inheritance: -.. autoclass:: Appliance() +.. autoclass:: Machine() :members: :undoc-members: :exclude-members: model_config, model_fields diff --git a/docs/concepts.rst b/docs/concepts.rst index 9f9b54f..5e2c6c4 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -13,15 +13,15 @@ The concept of a *site* refers to a physical production site. Depending on its s your organization might have one or many of them. In the ENLYZE platform, each site has a name and an address, which makes it easy to identify for humans. -.. _appliance: +.. _machine: -Appliance +Machine --------- -An *appliance* refers to a machine that your organization uses to produce goods. For +A *machine* refers to a machine that your organization uses to produce goods. For example, a CNC-milling center, a blown film extrusion line or an injection molding -machine all represent an appliance in the ENLYZE platform. Just as a physical machine is -located at one production site, an appliance in the ENLYZE platform is associated with +machine all represent a machine in the ENLYZE platform. Just as a physical machine is +located at one production site, a machine in the ENLYZE platform is associated with exactly one :ref:`site `. .. _variable: @@ -29,9 +29,9 @@ exactly one :ref:`site `. Variable -------- -A *variable* represents a process measure of one :ref:`appliance ` of which -timeseries data is captured and stored in the ENLYZE platform. One appliance may have -many variables, whereas one variable is only associated with one appliance. +A *variable* represents a process measure of one :ref:`machine ` of which +timeseries data is captured and stored in the ENLYZE platform. One machine may have +many variables, whereas one variable is only associated with one machine. .. _production_order: @@ -45,7 +45,7 @@ MES and then synchronized into the ENLYZE platform. They are referenced by an id which oftentimes is a short combination of numbers and/or characters, like FA23000123. In the ENLYZE platform, a production order always encompasses the production of one -single :ref:`product ` on one single :ref:`appliance ` within one +single :ref:`product ` on one single :ref:`machine ` within one or more :ref:`production runs `. .. _production_run: @@ -54,15 +54,15 @@ Production Run -------------- A *production run* is a time frame within which a machine was producing a :ref:`product -` on an :ref:`appliance ` in order to complete a :ref:`production +` on a :ref:`machine ` in order to complete a :ref:`production order `. A production run always has a beginning and, if it's not still running, it also has an end. -Usually, the operator of the appliance uses an interface to log the time when a certain -production order has been worked on. For instance, this could be the appliance's HMI or +Usually, the operator of the machine uses an interface to log the time when a certain +production order has been worked on. For instance, this could be the machine's HMI or a tablet computer next to it. In German, this is often referred to as *Betriebsdatenerfassung* (BDE). It is common, that a production order is not completed in one go, but is interrupted -several times for very different reasons, like a breakdown of the appliance or a +several times for very different reasons, like a breakdown of the machine or a public holiday. These interruptions lead to the creation of multiple production runs for a single production order. @@ -71,9 +71,9 @@ for a single production order. Product ------- -A *product* is the output of the production process which is executed by an -:ref:`appliance `, driven by a :ref:`production order `. In -the real world, an appliance might have some additional outputs, but only the main +A *product* is the output of the production process which is executed by a +:ref:`machine `, driven by a :ref:`production order `. In +the real world, a machine might have some additional outputs, but only the main output (the product) is modeled in the ENLYZE platform. Similarly to the production order, a product is referenced by an identifier originating from a customer's system, that gets synchronized into the ENLYZE platform. diff --git a/docs/conf.py b/docs/conf.py index caac768..175b12f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,7 +33,7 @@ ] templates_path = ["_templates"] -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".ipynb_checkpoints"] # -- Options for HTML output ------------------------------------------------- @@ -49,6 +49,9 @@ ("py:class", r".*\.T"), ("py:.*", r"httpx\..*"), ] +nitpick_ignore = [ + ("py:class", "ComputedFieldInfo"), +] autodoc_default_options = {"exclude-members": "__weakref__, __init__, __new__"} autodoc_member_order = "bysource" diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 0000000..036b345 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,7 @@ +Examples +======== +In our examples section, you can find Jupyter Notebooks and other material to help you get started: + +`Notebooks `_: + +* `Introduction to the ENLYZE Python SDK `_ diff --git a/docs/index.rst b/docs/index.rst index 08dccf7..5d9247a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ User's Guide installation quickstart + examples concepts client models diff --git a/docs/models.rst b/docs/models.rst index 566c64c..3a2a9cb 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -8,7 +8,7 @@ Data models .. autoclass:: Site() :members: -.. autoclass:: Appliance() +.. autoclass:: Machine() :members: .. autoclass:: Variable() diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 4612c8c..d97518f 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -21,3 +21,4 @@ url uuid virtualenv whitespace +Jupyter \ No newline at end of file diff --git a/examples/notebooks/introduction_enlyze_python_sdk.ipynb b/examples/notebooks/introduction_enlyze_python_sdk.ipynb new file mode 100644 index 0000000..51869e6 --- /dev/null +++ b/examples/notebooks/introduction_enlyze_python_sdk.ipynb @@ -0,0 +1,2947 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "075c41c6-ea51-4542-a1c2-7a403a389b27", + "metadata": {}, + "source": [ + "# Introduction to the ENLYZE Python SDK\n", + "---\n", + "This guide introduces you to the concepts of the ENLYZE platform and shows you how they are implemented in the Python SDK.\n", + "We will use the SDK to query information about different locations and machines, and ultimately query time series data of one machine to further analyze and process it. \n", + "\n", + "In the second half, we will query production runs and OEE KPIs and prepare the data to be exported to Excel or used by Power BI.\n", + "\n", + "All is based on our demo organization, which represents an imaginary company with several locations, exemplary machines and dummy time series data." + ] + }, + { + "cell_type": "markdown", + "id": "a09447e1-85e5-4942-8855-3281ba81cee9", + "metadata": {}, + "source": [ + "## Installation\n", + "\n", + "The [enlyze-python package](https://github.com/enlyze/enlyze-python) is an open-source package that can be directly installed via pip. The full documentation of the SDK can be found [here](https://enlyze-python.readthedocs.io/en/latest/) and also provides a detailed installation guide for macOS/Linux and Windows.\n", + "\n", + "In short, all you need to do is run the following command in your shell. You can install it globally or in a virtualenv.\n", + "\n", + "```bash\n", + "$ pip install enlyze\n", + "```\n", + "\n", + "Installing the enlyze package will install all the required dependencies and would be enough to interact with the platform and process the data. But we will explore some more options within this tutorial, so if you want to follow along, you need to install the following additional packages:\n", + "\n", + " # loading secrets from .env files\n", + " pip install python-dotenv\n", + "\n", + " # visualization\n", + " pip install matplotlib\n", + " pip install seaborn\n", + "\n", + " # saving data to excel\n", + " pip install openpyxl" + ] + }, + { + "cell_type": "markdown", + "id": "0479401f-b1cb-4ca4-82c5-f465147ac2d1", + "metadata": {}, + "source": [ + "## General imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "319ab2b0-4a7c-4d57-8d64-9de3d46f4bcb", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext dotenv\n", + "%dotenv ../.env" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "38f10cf7-c76c-4c05-818c-964e96a4ed08", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from datetime import datetime\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "mpl.rc(\"axes\", labelsize=14)\n", + "mpl.rc(\"xtick\", labelsize=12)\n", + "mpl.rc(\"ytick\", labelsize=12)\n", + "mpl.rc(\"legend\",fontsize=12)\n", + "mpl.rc(\"axes\", titlesize=14)\n", + "\n", + "import pandas as pd\n", + "# To display large DataFrames\n", + "pd.options.display.max_rows = 100\n", + "\n", + "# Set the color schema\n", + "import seaborn as sns\n", + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "markdown", + "id": "ed277287-997d-4fb3-844c-26169850c140", + "metadata": {}, + "source": [ + "## Client setup" + ] + }, + { + "cell_type": "markdown", + "id": "283dd0fc-e802-4b4d-a621-c154f860302a", + "metadata": {}, + "source": [ + "The only thing you need to interact with the ENLYZE platform is an API token. Reach out to us if you haven't received one already. This token will give you access to all the data of your organization, so please keep it safe! In case you have lost your token or you think it might have been compromised please reach out to us as well.\n", + "\n", + "**Store your secrets and config variables in a special file** \n", + "You don't want to leak your token on Github or any other version control system. One way to do this is to create a `.env` file in your project root folder and store your token and other secrets in it. Make sure to add it to your `.gitignore` file, so it is kept out of version control. Here is an example:\n", + "\n", + "```\n", + "# example .env file\n", + "ENLYZE_API_TOKEN=\"XXX\"\n", + "```\n", + "\n", + "You can then use a package to load these variabels automatically. Here we are using a package called [python-dotenv](https://github.com/theskumar/python-dotenv). \n", + "It loads up all the entries in this file as environment variables so they are accessible with `os.environ.get`/`os.getenv`.\n", + "\n", + "This happend a few cells above with the commands:\n", + "\n", + "```python\n", + "%load_ext dotenv\n", + "%dotenv ../.env-demo\n", + "```\n", + "\n", + "Now, since the token is stored in an environment variable, we can use it to set up our client:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e7569b68-8c4b-4ec8-90ba-1854be649ab1", + "metadata": {}, + "outputs": [], + "source": [ + "from enlyze import EnlyzeClient" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41606c72-52fd-4156-b45b-bdcd2835a67a", + "metadata": {}, + "outputs": [], + "source": [ + "enlyze = EnlyzeClient(os.getenv(\"ENLYZE_API_TOKEN\"))" + ] + }, + { + "cell_type": "markdown", + "id": "1be58d63-b9e6-4037-abd5-870e2fdac2be", + "metadata": {}, + "source": [ + "## Sites, Machines, and Variables\n", + "\n", + "These three are the core models to interact with the platform. An organization consists of one or multiple sites, which again hold one or multiple machines. Each machine has then a set of variables. A variable represents a process measure of one machine of which timeseries data is captured and stored in the ENLYZE platform. One machine can have many variables, whereas one variable is only associated with one machine.\n", + "\n", + "To get variables, we need to traverse down this tree until we get and select the variables we want, and then use these variables to query their data.\n", + "\n", + "There are different ways how this can be done, but for now, we will simply go from top to bottom." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a2f84359-c596-4d6c-b774-cf3f974dc72b", + "metadata": {}, + "outputs": [], + "source": [ + "from enlyze.models import Site, Machine, Variable" + ] + }, + { + "cell_type": "markdown", + "id": "ca474120-c510-4f54-9345-f11808395324", + "metadata": {}, + "source": [ + "### Sites\n", + "\n", + "A list of all sites can be fetched by using the `get_sites` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e4b1d442-2521-49a8-8a67-468e8e96ea1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Site(_id=49, display_name='Platform', address='Maybachstrasse 156'),\n", + " Site(_id=32, display_name='Köln', address='Heliosstrasse 6a'),\n", + " Site(_id=35, display_name='Stuttgart', address='Mahlestrasse 67')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sites = enlyze.get_sites()\n", + "sites" + ] + }, + { + "cell_type": "markdown", + "id": "bb02f9a1-3c75-46e0-b925-a55cc6827aeb", + "metadata": {}, + "source": [ + "
\n", + "Side note: data analysis in Python 🫶 pandas\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "0e44713b-9a76-45c6-abbb-813a6c781f93", + "metadata": {}, + "source": [ + "If you are new to processing data in Python, you will inevitably come across pandas and pandas DataFrames. [Pandas](https://pandas.pydata.org/docs/index.html) is a great, open-source library providing high-performance, easy-to-use data structures and data analysis tools for the [Python](https://www.python.org/) programming language.\n", + "\n", + "Due to its many advantages for data processing and widespread use in the Python ecosystem, it is already part of the enlyze package and is installed alongside as a requirement.\n", + "\n", + "Especially in [Jupyter Notebooks](https://jupyterlab.readthedocs.io/en/stable/getting_started/overview.html) it makes working with and visualizing data straightforward. You will notice that models (Site, Machines, Variables etc.) have different attributes and are often returned in a list. For a few models and properties, printing the list in its raw form is still sufficient. But we soon reach the limits, and it becomes confusing and unmanageable. For this, we have this small utility function `models_to_dataframe` that takes a list of models and returns them as a DataFrame. This makes it a lot easier to explore data." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "80593ce2-f90c-407f-92cc-12d04d8f75f3", + "metadata": {}, + "outputs": [], + "source": [ + "def models_to_dataframe(models: list):\n", + " \"\"\"Convert a list of enlyze models to a DataFrame\"\"\"\n", + " return pd.DataFrame([m.__dict__ for m in models])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "001cd893-d5a1-4c1a-a24a-b8c3468e03f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_iddisplay_nameaddress
049PlatformMaybachstrasse 156
132KölnHeliosstrasse 6a
235StuttgartMahlestrasse 67
\n", + "
" + ], + "text/plain": [ + " _id display_name address\n", + "0 49 Platform Maybachstrasse 156\n", + "1 32 Köln Heliosstrasse 6a\n", + "2 35 Stuttgart Mahlestrasse 67" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "models_to_dataframe(sites)" + ] + }, + { + "cell_type": "markdown", + "id": "39500780-f10d-40b8-b1b8-4fa931c42171", + "metadata": {}, + "source": [ + "We will pick a site by selecting an element from the list." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "95f3ba77-25d1-4dff-add5-d15f65d84416", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Site(_id=32, display_name='Köln', address='Heliosstrasse 6a')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "site = sites[1]\n", + "site" + ] + }, + { + "cell_type": "markdown", + "id": "734921aa-f1c8-4254-9569-74f7e90e7db4", + "metadata": {}, + "source": [ + "### Machines" + ] + }, + { + "cell_type": "markdown", + "id": "1aae646d-d47b-434e-a90e-daec386a6802", + "metadata": {}, + "source": [ + "We can get a list of all machines by using the `get_machines` method. Optionally, we can pass a site to the function to filter by it." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4f26f753-8a64-4950-9dab-ca7f6409e81d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_namegenesis_datesite
0fafdee59-933b-40bc-b3ce-c59c7363e358PLC Rack2023-03-21Site(_id=49, display_name='Platform', address=...
122a71d1e-8262-4c27-b5fb-66598027936aW&H Varex2022-03-18Site(_id=32, display_name='Köln', address='Hel...
2cc0d2dcb-564b-48cd-a342-71765a536058Macchi2021-01-15Site(_id=32, display_name='Köln', address='Hel...
3141e0927-62b3-4e76-8398-ad82d20f397fKiefel2020-08-26Site(_id=32, display_name='Köln', address='Hel...
4f5c0b9d4-e89e-43ad-b81c-16bd5cbe3646Reifenhäuser2021-01-21Site(_id=35, display_name='Stuttgart', address...
50eba5355-1dc2-49d5-b539-2ca191dadb76Alpine2021-01-15Site(_id=35, display_name='Stuttgart', address...
\n", + "
" + ], + "text/plain": [ + " uuid display_name genesis_date \\\n", + "0 fafdee59-933b-40bc-b3ce-c59c7363e358 PLC Rack 2023-03-21 \n", + "1 22a71d1e-8262-4c27-b5fb-66598027936a W&H Varex 2022-03-18 \n", + "2 cc0d2dcb-564b-48cd-a342-71765a536058 Macchi 2021-01-15 \n", + "3 141e0927-62b3-4e76-8398-ad82d20f397f Kiefel 2020-08-26 \n", + "4 f5c0b9d4-e89e-43ad-b81c-16bd5cbe3646 Reifenhäuser 2021-01-21 \n", + "5 0eba5355-1dc2-49d5-b539-2ca191dadb76 Alpine 2021-01-15 \n", + "\n", + " site \n", + "0 Site(_id=49, display_name='Platform', address=... \n", + "1 Site(_id=32, display_name='Köln', address='Hel... \n", + "2 Site(_id=32, display_name='Köln', address='Hel... \n", + "3 Site(_id=32, display_name='Köln', address='Hel... \n", + "4 Site(_id=35, display_name='Stuttgart', address... \n", + "5 Site(_id=35, display_name='Stuttgart', address... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "models_to_dataframe(enlyze.get_machines())" + ] + }, + { + "cell_type": "markdown", + "id": "4c378054-420f-405f-a033-1270e948d538", + "metadata": {}, + "source": [ + "Filtering by site will only return the respective machines:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0957caa7-f777-4ebe-878d-dc92d799ccc0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_namegenesis_datesite
022a71d1e-8262-4c27-b5fb-66598027936aW&H Varex2022-03-18Site(_id=32, display_name='Köln', address='Hel...
1cc0d2dcb-564b-48cd-a342-71765a536058Macchi2021-01-15Site(_id=32, display_name='Köln', address='Hel...
2141e0927-62b3-4e76-8398-ad82d20f397fKiefel2020-08-26Site(_id=32, display_name='Köln', address='Hel...
\n", + "
" + ], + "text/plain": [ + " uuid display_name genesis_date \\\n", + "0 22a71d1e-8262-4c27-b5fb-66598027936a W&H Varex 2022-03-18 \n", + "1 cc0d2dcb-564b-48cd-a342-71765a536058 Macchi 2021-01-15 \n", + "2 141e0927-62b3-4e76-8398-ad82d20f397f Kiefel 2020-08-26 \n", + "\n", + " site \n", + "0 Site(_id=32, display_name='Köln', address='Hel... \n", + "1 Site(_id=32, display_name='Köln', address='Hel... \n", + "2 Site(_id=32, display_name='Köln', address='Hel... " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cologne_machines = enlyze.get_machines(site=site)\n", + "models_to_dataframe(cologne_machines)" + ] + }, + { + "cell_type": "markdown", + "id": "c6a57053-edac-4fcc-b3df-795ff9441d42", + "metadata": {}, + "source": [ + "**Selecting a machine by UUID**\n", + "\n", + "A more reliable way to select a machine from the list is filtering by `UUID`. Attributes like the name may change over time, but the assigned `UUID` will stay unchanged." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ec91bd4f-0819-46a1-8738-c09839315bd0", + "metadata": {}, + "outputs": [], + "source": [ + "from uuid import UUID" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d95c523e-78f9-4c68-ae0f-e1fc77cca9bb", + "metadata": {}, + "outputs": [], + "source": [ + "MACHINE_UUID = UUID(\"22a71d1e-8262-4c27-b5fb-66598027936a\")" + ] + }, + { + "cell_type": "markdown", + "id": "2daff658-0ed4-4564-8a9f-b182cbbc436a", + "metadata": {}, + "source": [ + "We can create a function, that returns the machine, if it matches the UUID:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c920fd8d-bc74-4c0b-ab78-d6f5651dcba1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_machine_by_uuid(uuid: UUID, machines: list[Machine]):\n", + " for machine in machines:\n", + " if machine.uuid == uuid:\n", + " return machine\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e0507860-468a-4923-a28b-861361355b5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-66598027936a'), display_name='W&H Varex', genesis_date=datetime.date(2022, 3, 18), site=Site(_id=32, display_name='Köln', address='Heliosstrasse 6a'))" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_machine_by_uuid(MACHINE_UUID, cologne_machines)" + ] + }, + { + "cell_type": "markdown", + "id": "4cdacd41-eb81-4cf8-b8b8-383e421cf4d5", + "metadata": {}, + "source": [ + "Or we can use Python's built-in `filter()` method, more compact but less readable:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1ec26d71-2a44-427b-a4fb-ed05e92a539c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-66598027936a'), display_name='W&H Varex', genesis_date=datetime.date(2022, 3, 18), site=Site(_id=32, display_name='Köln', address='Heliosstrasse 6a'))" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = filter(lambda a: a.uuid == MACHINE_UUID, cologne_machines)\n", + "machine = next(results)\n", + "\n", + "machine" + ] + }, + { + "cell_type": "markdown", + "id": "09c87deb-7054-441d-893d-8b501abbb23f", + "metadata": {}, + "source": [ + "### Variables\n", + "\n", + "The last step we need to make is getting the variables for a machine. We use the `get_variables`method, which always requires the specification of a machine." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b45c1a3a-65e1-4ba0-8a5e-11d569ae4e9e", + "metadata": {}, + "outputs": [], + "source": [ + "variables = enlyze.get_variables(machine)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "81293554-d4a6-4d41-a9fe-5a858dfc8f30", + "metadata": {}, + "outputs": [], + "source": [ + "variables_df = models_to_dataframe(variables)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8624057f-f070-46a0-b1a6-33ae30cf3745", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_nameunitdata_typemachine
003e13e36-3a5f-4d9b-babf-58da68cde23fExtruder C Schneckenumdrehung1/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
1063e9d4a-1687-4d7e-95e3-fdd56be1c68cExtruder A SchmelzedruckbarVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
207420ec1-76da-4288-b9ee-b14891005144Extruder C SchmelzedruckbarVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
3094853e6-702d-43ab-a612-15ea0dfc90c3Extruder I Schneckenumdrehung1/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
40b8af028-7bea-4b71-8719-48bb8c57505dAnzahl MessungennVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
\n", + "
" + ], + "text/plain": [ + " uuid display_name unit \\\n", + "0 03e13e36-3a5f-4d9b-babf-58da68cde23f Extruder C Schneckenumdrehung 1/min \n", + "1 063e9d4a-1687-4d7e-95e3-fdd56be1c68c Extruder A Schmelzedruck bar \n", + "2 07420ec1-76da-4288-b9ee-b14891005144 Extruder C Schmelzedruck bar \n", + "3 094853e6-702d-43ab-a612-15ea0dfc90c3 Extruder I Schneckenumdrehung 1/min \n", + "4 0b8af028-7bea-4b71-8719-48bb8c57505d Anzahl Messungen n \n", + "\n", + " data_type machine \n", + "0 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "1 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "2 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "3 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "4 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variables_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c3f9cd8a-5880-40d0-9fdf-d38fdf79065e", + "metadata": {}, + "outputs": [], + "source": [ + "variables_df = (\n", + " variables_df\n", + " .sort_values(by=\"display_name\")\n", + " .reset_index(drop=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0e5a8384-0812-4a30-979d-68f013658e0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_nameunitdata_typemachine
084802f7e-96a5-4e81-a61d-2c9c133bc58c2-Sigma%VariableDataType.FLOATMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
1163fb0cc-0e24-4092-8873-775ea9daccadAbzugsgeschwindigkeitm/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
20b8af028-7bea-4b71-8719-48bb8c57505dAnzahl MessungennVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
3f0e3d6cc-d302-4b0f-a660-8217f1b5d1eaDurchsatzkg/hVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
4378f5c4f-958e-4d72-98dc-88b6ac700b4bExtruder A Durchsatzkg/hVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
\n", + "
" + ], + "text/plain": [ + " uuid display_name unit \\\n", + "0 84802f7e-96a5-4e81-a61d-2c9c133bc58c 2-Sigma % \n", + "1 163fb0cc-0e24-4092-8873-775ea9daccad Abzugsgeschwindigkeit m/min \n", + "2 0b8af028-7bea-4b71-8719-48bb8c57505d Anzahl Messungen n \n", + "3 f0e3d6cc-d302-4b0f-a660-8217f1b5d1ea Durchsatz kg/h \n", + "4 378f5c4f-958e-4d72-98dc-88b6ac700b4b Extruder A Durchsatz kg/h \n", + "\n", + " data_type machine \n", + "0 VariableDataType.FLOAT Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "1 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "2 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "3 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "4 VariableDataType.INTEGER Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variables_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "8a99baeb-a034-4763-b980-9e1970989c0b", + "metadata": {}, + "source": [ + "We will select variables of the Extruder A and all melt pressures for the next steps:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e9c5ef33-4a87-4af4-8312-094b9f558b2f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_nameunitdata_typemachine
4378f5c4f-958e-4d72-98dc-88b6ac700b4bExtruder A Durchsatzkg/hVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
53bbcd24b-684c-49fc-b3d4-8e77f9f13a07Extruder A FolienstärkeµmVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
6dab58100-7bf2-4a37-ba9c-7130897051a8Extruder A Förderratekg/U/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
7063e9d4a-1687-4d7e-95e3-fdd56be1c68cExtruder A SchmelzedruckbarVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
85c5eb4a7-a6ed-47ff-a971-722e6334801bExtruder A Schmelzetemperatur°CVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
9c1a6fde4-d5e8-4b0f-a064-ae961d2bf962Extruder A Schneckenumdrehung1/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
\n", + "
" + ], + "text/plain": [ + " uuid display_name \\\n", + "4 378f5c4f-958e-4d72-98dc-88b6ac700b4b Extruder A Durchsatz \n", + "5 3bbcd24b-684c-49fc-b3d4-8e77f9f13a07 Extruder A Folienstärke \n", + "6 dab58100-7bf2-4a37-ba9c-7130897051a8 Extruder A Förderrate \n", + "7 063e9d4a-1687-4d7e-95e3-fdd56be1c68c Extruder A Schmelzedruck \n", + "8 5c5eb4a7-a6ed-47ff-a971-722e6334801b Extruder A Schmelzetemperatur \n", + "9 c1a6fde4-d5e8-4b0f-a064-ae961d2bf962 Extruder A Schneckenumdrehung \n", + "\n", + " unit data_type \\\n", + "4 kg/h VariableDataType.INTEGER \n", + "5 µm VariableDataType.INTEGER \n", + "6 kg/U/min VariableDataType.INTEGER \n", + "7 bar VariableDataType.INTEGER \n", + "8 °C VariableDataType.INTEGER \n", + "9 1/min VariableDataType.INTEGER \n", + "\n", + " machine \n", + "4 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "5 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "6 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "7 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "8 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "9 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "extruder_a_variables_df = (\n", + " variables_df.loc[\n", + " variables_df[\"display_name\"].str.contains(\"Extruder A\")\n", + " ]\n", + " .copy()\n", + ")\n", + "\n", + "# a filtered copy of the DataFrame\n", + "extruder_a_variables_df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "28651ac9-b781-46d3-a3c2-3beb841d3ddf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuiddisplay_nameunitdata_typemachine
4378f5c4f-958e-4d72-98dc-88b6ac700b4bExtruder A Durchsatzkg/hVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
53bbcd24b-684c-49fc-b3d4-8e77f9f13a07Extruder A FolienstärkeµmVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
6dab58100-7bf2-4a37-ba9c-7130897051a8Extruder A Förderratekg/U/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
7063e9d4a-1687-4d7e-95e3-fdd56be1c68cExtruder A SchmelzedruckbarVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
85c5eb4a7-a6ed-47ff-a971-722e6334801bExtruder A Schmelzetemperatur°CVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
9c1a6fde4-d5e8-4b0f-a064-ae961d2bf962Extruder A Schneckenumdrehung1/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
10c9709564-395c-403e-be34-96bb2c41551cExtruder B Durchsatzkg/hVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
118057cb67-e711-4b43-93cc-0342ec7dae44Extruder B FolienstärkeµmVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
121a07c75f-6c00-4d21-8512-8578008928faExtruder B Förderratekg/U/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
13aa62c591-bd89-4423-8ea4-d973290f0dadExtruder B SchmelzedruckbarVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
14f84a0232-bd3b-4978-8d21-a10922846926Extruder B Schmelzetemperatur°CVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
158755d5bb-e3ea-4ab0-994b-2b7cca767b76Extruder B Schneckenumdrehung1/minVariableDataType.INTEGERMachine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6...
\n", + "
" + ], + "text/plain": [ + " uuid display_name \\\n", + "4 378f5c4f-958e-4d72-98dc-88b6ac700b4b Extruder A Durchsatz \n", + "5 3bbcd24b-684c-49fc-b3d4-8e77f9f13a07 Extruder A Folienstärke \n", + "6 dab58100-7bf2-4a37-ba9c-7130897051a8 Extruder A Förderrate \n", + "7 063e9d4a-1687-4d7e-95e3-fdd56be1c68c Extruder A Schmelzedruck \n", + "8 5c5eb4a7-a6ed-47ff-a971-722e6334801b Extruder A Schmelzetemperatur \n", + "9 c1a6fde4-d5e8-4b0f-a064-ae961d2bf962 Extruder A Schneckenumdrehung \n", + "10 c9709564-395c-403e-be34-96bb2c41551c Extruder B Durchsatz \n", + "11 8057cb67-e711-4b43-93cc-0342ec7dae44 Extruder B Folienstärke \n", + "12 1a07c75f-6c00-4d21-8512-8578008928fa Extruder B Förderrate \n", + "13 aa62c591-bd89-4423-8ea4-d973290f0dad Extruder B Schmelzedruck \n", + "14 f84a0232-bd3b-4978-8d21-a10922846926 Extruder B Schmelzetemperatur \n", + "15 8755d5bb-e3ea-4ab0-994b-2b7cca767b76 Extruder B Schneckenumdrehung \n", + "\n", + " unit data_type \\\n", + "4 kg/h VariableDataType.INTEGER \n", + "5 µm VariableDataType.INTEGER \n", + "6 kg/U/min VariableDataType.INTEGER \n", + "7 bar VariableDataType.INTEGER \n", + "8 °C VariableDataType.INTEGER \n", + "9 1/min VariableDataType.INTEGER \n", + "10 kg/h VariableDataType.INTEGER \n", + "11 µm VariableDataType.INTEGER \n", + "12 kg/U/min VariableDataType.INTEGER \n", + "13 bar VariableDataType.INTEGER \n", + "14 °C VariableDataType.INTEGER \n", + "15 1/min VariableDataType.INTEGER \n", + "\n", + " machine \n", + "4 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "5 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "6 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "7 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "8 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "9 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "10 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "11 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "12 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "13 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "14 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... \n", + "15 Machine(uuid=UUID('22a71d1e-8262-4c27-b5fb-6... " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melt_pressures_df = (\n", + " variables_df.loc[\n", + " variables_df[\"display_name\"].str.contains(r\"Extruder [A-B]\")\n", + " ].copy()\n", + ")\n", + "\n", + "melt_pressures_df" + ] + }, + { + "cell_type": "markdown", + "id": "8bf00c18-7670-4ba5-9c0c-ca88273708dd", + "metadata": {}, + "source": [ + "The resulting DataFrames `extruder_a_variables_df` and `melt_pressures_df` are a copy of the filtered `variables_df` DataFrame. In this case, creating a copy is not necessary because we are not further changing the data, but it's good practice when you work with DataFrames.\n", + "\n", + "If you don't use `.copy()`, the assigned variable will only hold a view of the original DataFrame and changing it would also change the data in the original DataFrame. This can cause some unexpected behaviour and difficult to trace errors, so usually pandas will throw a `SettingWithCopyWarning` error if you do so.\n", + "\n", + "
\n", + "SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.\n", + "
\n", + "\n", + "Check out the [pandas documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.copy.html) for more details." + ] + }, + { + "cell_type": "markdown", + "id": "a3033a2d-b022-4559-8f98-cae16bf42043", + "metadata": {}, + "source": [ + "## Time series data" + ] + }, + { + "cell_type": "markdown", + "id": "03ebad8e-8e54-448f-9d6f-053487ca7d76", + "metadata": {}, + "source": [ + "Now that we have selected the variables we are interested in, we can use them to get time series data.\n", + "\n", + "Pandas built-in filtering allows to quickly select the variables we want, but `get_timeseries` expects a list of `enlyze.models.Variable`'s." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c64809fc-d6dc-4309-9f7a-bd256dc82f67", + "metadata": {}, + "outputs": [], + "source": [ + "extruder_a_variables = []\n", + "\n", + "# iterate over all variables\n", + "for variable in variables:\n", + " if variable.uuid in extruder_a_variables_df[\"uuid\"].tolist():\n", + " # if a uuid is in the uuid-column of the DataFrame\n", + " # add the variable to the list\n", + " extruder_a_variables.append(variable)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "593d289f-6958-4927-85f1-adecd309fb54", + "metadata": {}, + "outputs": [], + "source": [ + "melt_pressures = []\n", + "\n", + "for variable in variables:\n", + " if variable.uuid in melt_pressures_df[\"uuid\"].tolist():\n", + " melt_pressures.append(variable)\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "8ec97ec7-8e4b-4553-ac20-1e668108eabd", + "metadata": {}, + "source": [ + "### For small periods and quick explorations - get_timeseries()" + ] + }, + { + "cell_type": "markdown", + "id": "66e8c2ce-ceff-43d0-9d98-ceb4b23f2f9e", + "metadata": {}, + "source": [ + "The `get_timeseries` expects a start and end time, ideally with a timezone specified, and a list of variables.\n", + "\n", + "It then returns a `TimeseriesData` object, from which the data can be retrieved as a pandas DataFrame or dictionary (records). By default, the column names will be set to the `variable_uuids` but if `use_display_names` is set to True, it will return the DataFrame with human-readable names.\n", + "\n", + "
\n", + "Note: Display names are not guaranteed to be unique for a machine.\n", + "
\n", + "\n", + "If two or more variables with the same display name exist, there will be multiple columns with the same name. This can lead to unexpected behavior in later steps. However, display names can be adjusted in the app at any time, thus solving this problem.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "bd3d8705-511b-490d-bfcd-7af70d7bb174", + "metadata": {}, + "outputs": [], + "source": [ + "start = datetime.fromisoformat(\"2022-05-17 05:00:00+01:00\")\n", + "\n", + "end = datetime.fromisoformat(\"2022-05-28 18:00:00+01:00\")\n", + "\n", + "ts_data = enlyze.get_timeseries(start=start, end=end, variables=melt_pressures)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "971125fe-5763-4525-bc92-7905abf7206a", + "metadata": {}, + "outputs": [], + "source": [ + "data = ts_data.to_dataframe(use_display_names=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "cbee3902-5048-4bf6-9691-59e57a80910e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Extruder A SchmelzedruckExtruder B FörderrateExtruder A DurchsatzExtruder A FolienstärkeExtruder A SchmelzetemperaturExtruder B FolienstärkeExtruder B SchneckenumdrehungExtruder B SchmelzedruckExtruder A SchneckenumdrehungExtruder B DurchsatzExtruder A FörderrateExtruder B Schmelzetemperatur
time
2022-05-17 04:00:00+00:00387.0127.050.02.0211.03.071.0502.067.090.074.0210.0
2022-05-17 04:00:10+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
2022-05-17 04:00:20+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
2022-05-17 04:00:30+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
2022-05-17 04:00:40+00:00388.0127.050.02.0211.03.071.0500.067.090.074.0210.0
\n", + "
" + ], + "text/plain": [ + " Extruder A Schmelzedruck Extruder B Förderrate \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 387.0 127.0 \n", + "2022-05-17 04:00:10+00:00 388.0 127.0 \n", + "2022-05-17 04:00:20+00:00 388.0 127.0 \n", + "2022-05-17 04:00:30+00:00 388.0 127.0 \n", + "2022-05-17 04:00:40+00:00 388.0 127.0 \n", + "\n", + " Extruder A Durchsatz Extruder A Folienstärke \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 50.0 2.0 \n", + "2022-05-17 04:00:10+00:00 50.0 2.0 \n", + "2022-05-17 04:00:20+00:00 50.0 2.0 \n", + "2022-05-17 04:00:30+00:00 50.0 2.0 \n", + "2022-05-17 04:00:40+00:00 50.0 2.0 \n", + "\n", + " Extruder A Schmelzetemperatur \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 211.0 \n", + "2022-05-17 04:00:10+00:00 211.0 \n", + "2022-05-17 04:00:20+00:00 211.0 \n", + "2022-05-17 04:00:30+00:00 211.0 \n", + "2022-05-17 04:00:40+00:00 211.0 \n", + "\n", + " Extruder B Folienstärke \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 3.0 \n", + "2022-05-17 04:00:10+00:00 3.0 \n", + "2022-05-17 04:00:20+00:00 3.0 \n", + "2022-05-17 04:00:30+00:00 3.0 \n", + "2022-05-17 04:00:40+00:00 3.0 \n", + "\n", + " Extruder B Schneckenumdrehung \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 71.0 \n", + "2022-05-17 04:00:10+00:00 71.0 \n", + "2022-05-17 04:00:20+00:00 71.0 \n", + "2022-05-17 04:00:30+00:00 71.0 \n", + "2022-05-17 04:00:40+00:00 71.0 \n", + "\n", + " Extruder B Schmelzedruck \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 502.0 \n", + "2022-05-17 04:00:10+00:00 502.0 \n", + "2022-05-17 04:00:20+00:00 502.0 \n", + "2022-05-17 04:00:30+00:00 502.0 \n", + "2022-05-17 04:00:40+00:00 500.0 \n", + "\n", + " Extruder A Schneckenumdrehung \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 67.0 \n", + "2022-05-17 04:00:10+00:00 67.0 \n", + "2022-05-17 04:00:20+00:00 67.0 \n", + "2022-05-17 04:00:30+00:00 67.0 \n", + "2022-05-17 04:00:40+00:00 67.0 \n", + "\n", + " Extruder B Durchsatz Extruder A Förderrate \\\n", + "time \n", + "2022-05-17 04:00:00+00:00 90.0 74.0 \n", + "2022-05-17 04:00:10+00:00 90.0 74.0 \n", + "2022-05-17 04:00:20+00:00 90.0 74.0 \n", + "2022-05-17 04:00:30+00:00 90.0 74.0 \n", + "2022-05-17 04:00:40+00:00 90.0 74.0 \n", + "\n", + " Extruder B Schmelzetemperatur \n", + "time \n", + "2022-05-17 04:00:00+00:00 210.0 \n", + "2022-05-17 04:00:10+00:00 210.0 \n", + "2022-05-17 04:00:20+00:00 210.0 \n", + "2022-05-17 04:00:30+00:00 210.0 \n", + "2022-05-17 04:00:40+00:00 210.0 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "dfd17fd7-6f21-47dd-9753-db8702b99674", + "metadata": {}, + "source": [ + "Note that the index of the DataFrame is the time-column and the timezone is always UTC, regardless of the timezone specified in the request.\n", + "\n", + "Using the `reset_index()` method will use the index as a column and set a default index instead:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "f17ebc8c-edd0-4479-a037-5e48c655d0fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeExtruder A SchmelzedruckExtruder B FörderrateExtruder A DurchsatzExtruder A FolienstärkeExtruder A SchmelzetemperaturExtruder B FolienstärkeExtruder B SchneckenumdrehungExtruder B SchmelzedruckExtruder A SchneckenumdrehungExtruder B DurchsatzExtruder A FörderrateExtruder B Schmelzetemperatur
02022-05-17 04:00:00+00:00387.0127.050.02.0211.03.071.0502.067.090.074.0210.0
12022-05-17 04:00:10+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
22022-05-17 04:00:20+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
32022-05-17 04:00:30+00:00388.0127.050.02.0211.03.071.0502.067.090.074.0210.0
42022-05-17 04:00:40+00:00388.0127.050.02.0211.03.071.0500.067.090.074.0210.0
\n", + "
" + ], + "text/plain": [ + " time Extruder A Schmelzedruck Extruder B Förderrate \\\n", + "0 2022-05-17 04:00:00+00:00 387.0 127.0 \n", + "1 2022-05-17 04:00:10+00:00 388.0 127.0 \n", + "2 2022-05-17 04:00:20+00:00 388.0 127.0 \n", + "3 2022-05-17 04:00:30+00:00 388.0 127.0 \n", + "4 2022-05-17 04:00:40+00:00 388.0 127.0 \n", + "\n", + " Extruder A Durchsatz Extruder A Folienstärke \\\n", + "0 50.0 2.0 \n", + "1 50.0 2.0 \n", + "2 50.0 2.0 \n", + "3 50.0 2.0 \n", + "4 50.0 2.0 \n", + "\n", + " Extruder A Schmelzetemperatur Extruder B Folienstärke \\\n", + "0 211.0 3.0 \n", + "1 211.0 3.0 \n", + "2 211.0 3.0 \n", + "3 211.0 3.0 \n", + "4 211.0 3.0 \n", + "\n", + " Extruder B Schneckenumdrehung Extruder B Schmelzedruck \\\n", + "0 71.0 502.0 \n", + "1 71.0 502.0 \n", + "2 71.0 502.0 \n", + "3 71.0 502.0 \n", + "4 71.0 500.0 \n", + "\n", + " Extruder A Schneckenumdrehung Extruder B Durchsatz Extruder A Förderrate \\\n", + "0 67.0 90.0 74.0 \n", + "1 67.0 90.0 74.0 \n", + "2 67.0 90.0 74.0 \n", + "3 67.0 90.0 74.0 \n", + "4 67.0 90.0 74.0 \n", + "\n", + " Extruder B Schmelzetemperatur \n", + "0 210.0 \n", + "1 210.0 \n", + "2 210.0 \n", + "3 210.0 \n", + "4 210.0 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data.reset_index()\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e2ac6c18-da31-4ebc-869c-b4c2b0911d3c", + "metadata": {}, + "source": [ + "To convert the timezone back to the local timezone you can use pandas Datetime functionality:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "879a3abc-8732-4395-8957-52d5e5c2b154", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timetime_localExtruder A Schmelzedruck
02022-05-17 04:00:00+00:002022-05-17 06:00:00+02:00387.0
12022-05-17 04:00:10+00:002022-05-17 06:00:10+02:00388.0
22022-05-17 04:00:20+00:002022-05-17 06:00:20+02:00388.0
32022-05-17 04:00:30+00:002022-05-17 06:00:30+02:00388.0
42022-05-17 04:00:40+00:002022-05-17 06:00:40+02:00388.0
............
996992022-05-28 16:59:10+00:002022-05-28 18:59:10+02:00338.0
997002022-05-28 16:59:20+00:002022-05-28 18:59:20+02:00338.0
997012022-05-28 16:59:30+00:002022-05-28 18:59:30+02:00338.0
997022022-05-28 16:59:40+00:002022-05-28 18:59:40+02:00338.0
997032022-05-28 16:59:50+00:002022-05-28 18:59:50+02:00338.0
\n", + "

99704 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " time time_local \\\n", + "0 2022-05-17 04:00:00+00:00 2022-05-17 06:00:00+02:00 \n", + "1 2022-05-17 04:00:10+00:00 2022-05-17 06:00:10+02:00 \n", + "2 2022-05-17 04:00:20+00:00 2022-05-17 06:00:20+02:00 \n", + "3 2022-05-17 04:00:30+00:00 2022-05-17 06:00:30+02:00 \n", + "4 2022-05-17 04:00:40+00:00 2022-05-17 06:00:40+02:00 \n", + "... ... ... \n", + "99699 2022-05-28 16:59:10+00:00 2022-05-28 18:59:10+02:00 \n", + "99700 2022-05-28 16:59:20+00:00 2022-05-28 18:59:20+02:00 \n", + "99701 2022-05-28 16:59:30+00:00 2022-05-28 18:59:30+02:00 \n", + "99702 2022-05-28 16:59:40+00:00 2022-05-28 18:59:40+02:00 \n", + "99703 2022-05-28 16:59:50+00:00 2022-05-28 18:59:50+02:00 \n", + "\n", + " Extruder A Schmelzedruck \n", + "0 387.0 \n", + "1 388.0 \n", + "2 388.0 \n", + "3 388.0 \n", + "4 388.0 \n", + "... ... \n", + "99699 338.0 \n", + "99700 338.0 \n", + "99701 338.0 \n", + "99702 338.0 \n", + "99703 338.0 \n", + "\n", + "[99704 rows x 3 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"time_local\"] = data[\"time\"].dt.tz_convert(\"Europe/Berlin\")\n", + "data[[\"time\",\"time_local\", \"Extruder A Schmelzedruck\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "0239a449-efc0-4cae-a233-5987c6a45968", + "metadata": {}, + "source": [ + "### For large periods and more fine-grained control - get_timeseries_with_resampling()" + ] + }, + { + "cell_type": "markdown", + "id": "88bd9db6-6561-480f-bc96-ccddbe68cbcd", + "metadata": {}, + "source": [ + "The `get_timeseries` method returns the time series data in the currently highest resolution (10 seconds). For a long period and many variables, this quickly results in a large amount of data. Due to the size of the data, there might be a noticeable delay in receiving the results, and in some cases, it may even cause a request timeout.\n", + "\n", + "The `get_timeseries_with_resampling` method allows for a higher control over the way in which the data is retrieved. By specifying a resampling method and a resampling interval, the amount of data can be adjusted for the use case. Resampling happens on the server-side and thus reduces the payload.\n", + "\n", + "\n", + "A separate resampling method must be specified for each variable. This is necessary because not every resampling method is available for every data type. For example, categorical data types such as strings can only be aggregated with `first`, `last` or `count`. You can find all available resampling methods [in our documentation](https://enlyze-python.readthedocs.io/en/latest/models.html#enlyze.models.ResamplingMethod)." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "9c004353-b0e8-41e2-9fe5-a30ee22a3010", + "metadata": {}, + "outputs": [], + "source": [ + "from enlyze.models import ResamplingMethod" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "df79349c-f999-4754-8174-3433f3599c9e", + "metadata": {}, + "outputs": [], + "source": [ + "# pandas has some very handy Timestamp and Timedelta functionalities that are worth checking out\n", + "end = pd.Timestamp.utcnow()\n", + "start = end - pd.Timedelta(weeks=2)\n" + ] + }, + { + "cell_type": "markdown", + "id": "81aa8b85-a83c-4518-b94e-a83022211436", + "metadata": {}, + "source": [ + "
\n", + "Tip: You can access the Docstring of a function in Jupyter Notebooks, if you call it with a question mark right after the name, for example: `enlyze.get_timeseries_with_resampling?`\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "a92d1975-8cc2-4ade-8ab7-330c36598fc5", + "metadata": {}, + "source": [ + "For our example, we will use the same resampling method for all variables and a `resampling_interval` of 10 minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "783d0c11-b5e2-4e7b-8cbb-fcb9f777012f", + "metadata": {}, + "outputs": [], + "source": [ + "ts_data_resampled = enlyze.get_timeseries_with_resampling(\n", + " start=start,\n", + " end=end,\n", + " variables={v: ResamplingMethod(\"avg\") for v in melt_pressures},\n", + " resampling_interval=600\n", + ")\n", + "\n", + "data_resampled = ts_data_resampled.to_dataframe(use_display_names=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ca1b68ac-d0c0-45b7-aa4f-918c401ab171", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Extruder A SchmelzedruckExtruder B FörderrateExtruder A DurchsatzExtruder A FolienstärkeExtruder A SchmelzetemperaturExtruder B FolienstärkeExtruder B SchneckenumdrehungExtruder B SchmelzedruckExtruder A SchneckenumdrehungExtruder B DurchsatzExtruder A FörderrateExtruder B Schmelzetemperatur
time
2023-11-22 15:40:00+00:00-10.8333330.00.00.018.00.00.0-6.9444440.00.00.018.0
2023-11-22 15:50:00+00:00-10.8833330.00.00.018.00.00.0-7.0166670.00.00.018.0
2023-11-22 16:00:00+00:00-10.9333330.00.00.018.00.00.0-7.0000000.00.00.018.0
2023-11-22 16:10:00+00:00-10.9500000.00.00.018.00.00.0-7.0333330.00.00.018.0
2023-11-22 16:20:00+00:00-10.9333330.00.00.018.00.00.0-7.0000000.00.00.018.0
\n", + "
" + ], + "text/plain": [ + " Extruder A Schmelzedruck Extruder B Förderrate \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 -10.833333 0.0 \n", + "2023-11-22 15:50:00+00:00 -10.883333 0.0 \n", + "2023-11-22 16:00:00+00:00 -10.933333 0.0 \n", + "2023-11-22 16:10:00+00:00 -10.950000 0.0 \n", + "2023-11-22 16:20:00+00:00 -10.933333 0.0 \n", + "\n", + " Extruder A Durchsatz Extruder A Folienstärke \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 0.0 0.0 \n", + "2023-11-22 15:50:00+00:00 0.0 0.0 \n", + "2023-11-22 16:00:00+00:00 0.0 0.0 \n", + "2023-11-22 16:10:00+00:00 0.0 0.0 \n", + "2023-11-22 16:20:00+00:00 0.0 0.0 \n", + "\n", + " Extruder A Schmelzetemperatur \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 18.0 \n", + "2023-11-22 15:50:00+00:00 18.0 \n", + "2023-11-22 16:00:00+00:00 18.0 \n", + "2023-11-22 16:10:00+00:00 18.0 \n", + "2023-11-22 16:20:00+00:00 18.0 \n", + "\n", + " Extruder B Folienstärke \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 0.0 \n", + "2023-11-22 15:50:00+00:00 0.0 \n", + "2023-11-22 16:00:00+00:00 0.0 \n", + "2023-11-22 16:10:00+00:00 0.0 \n", + "2023-11-22 16:20:00+00:00 0.0 \n", + "\n", + " Extruder B Schneckenumdrehung \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 0.0 \n", + "2023-11-22 15:50:00+00:00 0.0 \n", + "2023-11-22 16:00:00+00:00 0.0 \n", + "2023-11-22 16:10:00+00:00 0.0 \n", + "2023-11-22 16:20:00+00:00 0.0 \n", + "\n", + " Extruder B Schmelzedruck \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 -6.944444 \n", + "2023-11-22 15:50:00+00:00 -7.016667 \n", + "2023-11-22 16:00:00+00:00 -7.000000 \n", + "2023-11-22 16:10:00+00:00 -7.033333 \n", + "2023-11-22 16:20:00+00:00 -7.000000 \n", + "\n", + " Extruder A Schneckenumdrehung \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 0.0 \n", + "2023-11-22 15:50:00+00:00 0.0 \n", + "2023-11-22 16:00:00+00:00 0.0 \n", + "2023-11-22 16:10:00+00:00 0.0 \n", + "2023-11-22 16:20:00+00:00 0.0 \n", + "\n", + " Extruder B Durchsatz Extruder A Förderrate \\\n", + "time \n", + "2023-11-22 15:40:00+00:00 0.0 0.0 \n", + "2023-11-22 15:50:00+00:00 0.0 0.0 \n", + "2023-11-22 16:00:00+00:00 0.0 0.0 \n", + "2023-11-22 16:10:00+00:00 0.0 0.0 \n", + "2023-11-22 16:20:00+00:00 0.0 0.0 \n", + "\n", + " Extruder B Schmelzetemperatur \n", + "time \n", + "2023-11-22 15:40:00+00:00 18.0 \n", + "2023-11-22 15:50:00+00:00 18.0 \n", + "2023-11-22 16:00:00+00:00 18.0 \n", + "2023-11-22 16:10:00+00:00 18.0 \n", + "2023-11-22 16:20:00+00:00 18.0 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_resampled.head()" + ] + }, + { + "cell_type": "markdown", + "id": "609024e5-d407-49f8-9f28-0aa81463ca7f", + "metadata": {}, + "source": [ + "### Data preparation and exploration\n", + "\n", + "Data preparation and exploration is a bigger topic that will be covered in later tutorials, but here are a few examples of how to get started. " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d05a9b60-e954-416e-9258-d96084281d6f", + "metadata": {}, + "outputs": [], + "source": [ + "# quick step to sort the columns alphabetically\n", + "data_resampled = data_resampled[sorted(data_resampled.columns)]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8f7fc1a4-b41e-48b9-b402-18d429f2c550", + "metadata": {}, + "outputs": [], + "source": [ + "# reset index and convert time to local timezone\n", + "data_resampled = data_resampled.reset_index()\n", + "data_resampled[\"time\"] = data_resampled[\"time\"].dt.tz_convert(\"Europe/Berlin\")" + ] + }, + { + "cell_type": "markdown", + "id": "f9223dfd-bbf6-4057-a4c8-791adac84e6a", + "metadata": {}, + "source": [ + "#### Simple plot" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "b678e183-e184-426f-bfd0-447fab8cc841", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1, 1, figsize=(20,5))\n", + "data_resampled.plot(x=\"time\", ax=ax);" + ] + }, + { + "cell_type": "markdown", + "id": "809202b5-b86b-4f2e-8695-af1167e33c82", + "metadata": {}, + "source": [ + "#### Subplots" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "dcc24cf2-2d0f-4846-a2df-786c478eb342", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y_values = [col for col in data_resampled.columns if \"Extruder A\" in col]\n", + "\n", + "rows = len(y_values) \n", + "cols = 1\n", + "\n", + "fig, axes = plt.subplots(rows, cols, figsize=(20, 2*rows), sharex=True)\n", + "\n", + "for i, col in enumerate(y_values):\n", + " data_resampled.plot(x=\"time\", y=col, ax=axes[i])" + ] + }, + { + "cell_type": "markdown", + "id": "5f237b06-8e03-4f46-8f4e-a99486d8aff5", + "metadata": {}, + "source": [ + "#### Pairplots" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "780a9b10-2ca0-40d3-8d4a-3b7db7715bab", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_theme(style=\"ticks\")\n", + "\n", + "columns = [col for col in data_resampled.columns if \"Durchsatz\" in col]\n", + "\n", + "sns.pairplot(data_resampled[columns]);" + ] + }, + { + "cell_type": "markdown", + "id": "40ec502a-9ce0-4fdc-be44-b114a1564580", + "metadata": {}, + "source": [ + "## Production runs\n", + "If your production runs are integrated to the ENLYZE Platform, you can also use the `EnlyzeClient` to query them, alongside the calculated OEE metrics per run.\n", + "You can fetch all runs at once or use filters to fetch runs specific product, orders, machines or within a certain time range." + ] + }, + { + "cell_type": "markdown", + "id": "1b25c5f8-82b6-40b3-aef4-ee5058d61842", + "metadata": {}, + "source": [ + "We will use our previously selected machine from above to filter the runs. Since this is a demo account, the KPIs are not meaningful, but this should give you a good idea of the data and what is possible." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "1da46a78-75d2-41f2-9447-56c4486d0e4f", + "metadata": {}, + "outputs": [], + "source": [ + "runs = enlyze.get_production_runs(machine=machine)" + ] + }, + { + "cell_type": "markdown", + "id": "8086ac26-d0b9-4ec9-bf9b-6b0e26697725", + "metadata": {}, + "source": [ + "Unlike the time series data or the machines, the runs are returned as a `enlyze.models.ProductionRuns` model. This model as a `to_dataframe` method, which conveniently converts the runs into a pandas DataFrame for us." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7bc79a88-14ed-4ee0-8704-0d3b79ce5673", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "enlyze.models.ProductionRuns" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(runs)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "ae8eba18-ed77-4594-b722-c295947c9121", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidaverage_throughputproduction_orderstartendquantity_totalquantity_yieldavailabilityperformancequality...quantity_yield.valueavailability.scoreavailability.time_lossperformance.scoreperformance.time_lossquality.scorequality.time_lossproductivity.scoreproductivity.time_lossquantity_scrap
099245861-840e-42f6-9888-494339e88eb4NaNFA-0A5A-12022-03-18 00:13:08+00:002022-03-18 01:16:09+00:00NaNNaNNaNNaNNaN...NaNNaNNaTNaNNaTNaNNaTNaNNaTNaN
13756a10c-b564-42bb-83f1-5f5d6aba6d30230.552952FA-0A5A-22022-03-18 01:16:19+00:002022-03-18 18:41:55+00:00NaNNaNNaNNaNNaN...3236.3194441.0000000 days 00:00:000.3842550 days 08:38:360.8055020 days 03:23:220.3095180 days 12:01:58NaN
21366390c-a1c5-4fc3-a4f5-74ade57e118d459.256323FA-0A5A-32022-03-18 18:42:01+00:002022-03-19 06:38:57+00:00NaNNaNNaNNaNNaN...5107.4527781.0000000 days 00:00:000.9076210 days 01:01:380.9307260 days 00:49:390.8447460 days 01:51:17NaN
398de1da6-69c5-4afa-bd0f-1c1f52440a8b486.231561FA-0A5A-42022-03-19 06:39:04+00:002022-03-20 22:34:24+00:00NaNNaNNaNNaNNaN...18144.6944441.0000000 days 00:00:000.8090380 days 07:07:340.9347420 days 02:36:180.7562410 days 09:43:52NaN
4b9efc595-ceb7-424f-99b0-f47d2d588cfe499.188133FA-0A5A-52022-03-20 22:34:30+00:002022-03-22 15:33:14+00:00NaNNaNNaNNaNNaN...19749.2694440.9991870 days 00:01:590.8183410 days 07:10:520.9654430 days 01:24:530.7894190 days 08:37:44NaN
\n", + "

5 rows × 34 columns

\n", + "
" + ], + "text/plain": [ + " uuid average_throughput production_order \\\n", + "0 99245861-840e-42f6-9888-494339e88eb4 NaN FA-0A5A-1 \n", + "1 3756a10c-b564-42bb-83f1-5f5d6aba6d30 230.552952 FA-0A5A-2 \n", + "2 1366390c-a1c5-4fc3-a4f5-74ade57e118d 459.256323 FA-0A5A-3 \n", + "3 98de1da6-69c5-4afa-bd0f-1c1f52440a8b 486.231561 FA-0A5A-4 \n", + "4 b9efc595-ceb7-424f-99b0-f47d2d588cfe 499.188133 FA-0A5A-5 \n", + "\n", + " start end quantity_total \\\n", + "0 2022-03-18 00:13:08+00:00 2022-03-18 01:16:09+00:00 NaN \n", + "1 2022-03-18 01:16:19+00:00 2022-03-18 18:41:55+00:00 NaN \n", + "2 2022-03-18 18:42:01+00:00 2022-03-19 06:38:57+00:00 NaN \n", + "3 2022-03-19 06:39:04+00:00 2022-03-20 22:34:24+00:00 NaN \n", + "4 2022-03-20 22:34:30+00:00 2022-03-22 15:33:14+00:00 NaN \n", + "\n", + " quantity_yield availability performance quality ... \\\n", + "0 NaN NaN NaN NaN ... \n", + "1 NaN NaN NaN NaN ... \n", + "2 NaN NaN NaN NaN ... \n", + "3 NaN NaN NaN NaN ... \n", + "4 NaN NaN NaN NaN ... \n", + "\n", + " quantity_yield.value availability.score availability.time_loss \\\n", + "0 NaN NaN NaT \n", + "1 3236.319444 1.000000 0 days 00:00:00 \n", + "2 5107.452778 1.000000 0 days 00:00:00 \n", + "3 18144.694444 1.000000 0 days 00:00:00 \n", + "4 19749.269444 0.999187 0 days 00:01:59 \n", + "\n", + " performance.score performance.time_loss quality.score quality.time_loss \\\n", + "0 NaN NaT NaN NaT \n", + "1 0.384255 0 days 08:38:36 0.805502 0 days 03:23:22 \n", + "2 0.907621 0 days 01:01:38 0.930726 0 days 00:49:39 \n", + "3 0.809038 0 days 07:07:34 0.934742 0 days 02:36:18 \n", + "4 0.818341 0 days 07:10:52 0.965443 0 days 01:24:53 \n", + "\n", + " productivity.score productivity.time_loss quantity_scrap \n", + "0 NaN NaT NaN \n", + "1 0.309518 0 days 12:01:58 NaN \n", + "2 0.844746 0 days 01:51:17 NaN \n", + "3 0.756241 0 days 09:43:52 NaN \n", + "4 0.789419 0 days 08:37:44 NaN \n", + "\n", + "[5 rows x 34 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "runs_df = runs.to_dataframe()\n", + "runs_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "bdab41a4-3558-49a9-8a0b-613d828ddb7f", + "metadata": {}, + "source": [ + "## Exporting Data" + ] + }, + { + "cell_type": "markdown", + "id": "352d7f7d-e41e-4a3d-8b47-33bcde4bcd99", + "metadata": {}, + "source": [ + "Pandas DataFrames can be exported to many different data formats. For a full list of supported methods, you can check out the [IO tools](https://pandas.pydata.org/docs/user_guide/io.html#io) section of the pandas documentation or the [Importing and exporting data](https://pandas.pydata.org/docs/user_guide/10min.html#importing-and-exporting-data) part of the **10 minutes to pandas** guide." + ] + }, + { + "cell_type": "markdown", + "id": "b7648dcc-ec46-4f1f-80c8-43c6b5ea2ae4", + "metadata": {}, + "source": [ + "### To CSV\n", + "\n", + "Probably the most used method and quite straightforward:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9afb2b53-0528-4f42-bba7-dfd2b44a357b", + "metadata": {}, + "outputs": [], + "source": [ + "# path to directory with filename\n", + "runs_df.to_csv(\"./filename.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "1c772885-39dc-4be7-83b5-628357069ed3", + "metadata": {}, + "source": [ + "If a different separator or decimal character is required, they can be specified in the export method.\n", + "\n", + "A full, detailed list of all the options can be found [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html). Specifying the separator and the decimal character is especially useful when opening the file in German Excel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bce5ebd8-9e1b-4cdf-8771-741ecfb8a32e", + "metadata": {}, + "outputs": [], + "source": [ + "runs_df.to_csv(\"./filename.csv\", sep=\";\", decimal=\",\", index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "505dbb46-cf71-4126-9ea7-2e2f7ef056a6", + "metadata": {}, + "source": [ + "### Excel and Power BI" + ] + }, + { + "cell_type": "markdown", + "id": "f614084c-99c0-42d7-b1d1-b2a622306f66", + "metadata": {}, + "source": [ + "
\n", + "Note: Excel and Power BI don't support all Python data types.\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "84ec8124-28ef-4488-aef5-620c1e38868a", + "metadata": {}, + "source": [ + "There is an option to directly save the data as an Excel file. This requires the additional `openpyxl` package, which can be easily installed via pip:\n", + "\n", + " pip install openpyxl\n", + "\n", + "If you use the [Python Scripts](https://learn.microsoft.com/de-de/power-bi/connect-data/desktop-python-scripts) data connector in Power BI to load data into Power BI, you can directly use the API but need to convert the unsupported data types before returning the DataFrame.\n", + "\n", + "To avoid errors or false data representations, some Python data types need to be converted to supported types. Excel and Power BI don't support timestamps with time zone information and timedeltas. You can use the `DataFrame.info()` method, to see which data types your DataFrame contains." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "71534943-20b0-42bd-9d9a-ded3d4e129f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 227 entries, 0 to 226\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 uuid 227 non-null object \n", + " 1 average_throughput 217 non-null float64 \n", + " 2 production_order 227 non-null object \n", + " 3 start 227 non-null datetime64[ns, UTC]\n", + " 4 end 226 non-null datetime64[ns, UTC]\n", + " 5 quantity_total 0 non-null float64 \n", + " 6 quantity_yield 0 non-null float64 \n", + " 7 availability 0 non-null float64 \n", + " 8 performance 0 non-null float64 \n", + " 9 quality 0 non-null float64 \n", + " 10 productivity 0 non-null float64 \n", + " 11 machine.uuid 227 non-null object \n", + " 12 machine.display_name 227 non-null object \n", + " 13 machine.genesis_date 227 non-null object \n", + " 14 machine.site._id 227 non-null int64 \n", + " 15 machine.site.display_name 227 non-null object \n", + " 16 machine.site.address 227 non-null object \n", + " 17 product.code 227 non-null object \n", + " 18 product.name 0 non-null object \n", + " 19 quantity_scrap.unit 213 non-null object \n", + " 20 quantity_scrap.value 213 non-null float64 \n", + " 21 quantity_total.unit 221 non-null object \n", + " 22 quantity_total.value 221 non-null float64 \n", + " 23 quantity_yield.unit 208 non-null object \n", + " 24 quantity_yield.value 208 non-null float64 \n", + " 25 availability.score 217 non-null float64 \n", + " 26 availability.time_loss 217 non-null timedelta64[ns] \n", + " 27 performance.score 217 non-null float64 \n", + " 28 performance.time_loss 217 non-null timedelta64[ns] \n", + " 29 quality.score 204 non-null float64 \n", + " 30 quality.time_loss 204 non-null timedelta64[ns] \n", + " 31 productivity.score 204 non-null float64 \n", + " 32 productivity.time_loss 204 non-null timedelta64[ns] \n", + " 33 quantity_scrap 0 non-null float64 \n", + "dtypes: datetime64[ns, UTC](2), float64(15), int64(1), object(12), timedelta64[ns](4)\n", + "memory usage: 60.4+ KB\n" + ] + } + ], + "source": [ + "runs_df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "0806290e-36d6-49d9-867d-c7ee6a9ed87f", + "metadata": {}, + "source": [ + "**Removing timezone information:**\n", + "\n", + "To remove the time zone information, we will first convert the timestamp to the desired time zone (if it's UTC, than this can be skipped) and then remove the timezone information, making them timezone naive." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "514652f5-283e-46ef-9150-f159ebdee339", + "metadata": {}, + "outputs": [], + "source": [ + "# convert timestamps to local time zone\n", + "runs_df[\"start\"] = runs_df[\"start\"].dt.tz_convert(\"Europe/Berlin\")\n", + "runs_df[\"end\"] = runs_df[\"end\"].dt.tz_convert(\"Europe/Berlin\")\n", + "\n", + "# remove time zone information\n", + "runs_df[\"start\"] = runs_df[\"start\"].dt.tz_localize(None)\n", + "runs_df[\"end\"] = runs_df[\"end\"].dt.tz_localize(None)" + ] + }, + { + "cell_type": "markdown", + "id": "add2d1cf-6f83-432c-9b76-9bedea7b97c3", + "metadata": {}, + "source": [ + "**Convert timedeltas into total_seconds:**\n", + "\n", + "The timedelta datatype is best transformed into the total seconds equivalent of the duration. From there, it can be converted to other proprietary data types." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "dceb8113-5985-4861-9560-24c34e1bf6c6", + "metadata": {}, + "outputs": [], + "source": [ + "runs_df[\"availability.time_loss\"] = runs_df[\"availability.time_loss\"].dt.total_seconds()\n", + "runs_df[\"performance.time_loss\"] = runs_df[\"performance.time_loss\"].dt.total_seconds()\n", + "runs_df[\"quality.time_loss\"] = runs_df[\"quality.time_loss\"].dt.total_seconds()\n", + "runs_df[\"productivity.time_loss\"] = runs_df[\"productivity.time_loss\"].dt.total_seconds()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "9051d479-3fbb-4337-964e-91cc5e49833b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 227 entries, 0 to 226\n", + "Data columns (total 34 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 uuid 227 non-null object \n", + " 1 average_throughput 217 non-null float64 \n", + " 2 production_order 227 non-null object \n", + " 3 start 227 non-null datetime64[ns]\n", + " 4 end 226 non-null datetime64[ns]\n", + " 5 quantity_total 0 non-null float64 \n", + " 6 quantity_yield 0 non-null float64 \n", + " 7 availability 0 non-null float64 \n", + " 8 performance 0 non-null float64 \n", + " 9 quality 0 non-null float64 \n", + " 10 productivity 0 non-null float64 \n", + " 11 machine.uuid 227 non-null object \n", + " 12 machine.display_name 227 non-null object \n", + " 13 machine.genesis_date 227 non-null object \n", + " 14 machine.site._id 227 non-null int64 \n", + " 15 machine.site.display_name 227 non-null object \n", + " 16 machine.site.address 227 non-null object \n", + " 17 product.code 227 non-null object \n", + " 18 product.name 0 non-null object \n", + " 19 quantity_scrap.unit 213 non-null object \n", + " 20 quantity_scrap.value 213 non-null float64 \n", + " 21 quantity_total.unit 221 non-null object \n", + " 22 quantity_total.value 221 non-null float64 \n", + " 23 quantity_yield.unit 208 non-null object \n", + " 24 quantity_yield.value 208 non-null float64 \n", + " 25 availability.score 217 non-null float64 \n", + " 26 availability.time_loss 217 non-null float64 \n", + " 27 performance.score 217 non-null float64 \n", + " 28 performance.time_loss 217 non-null float64 \n", + " 29 quality.score 204 non-null float64 \n", + " 30 quality.time_loss 204 non-null float64 \n", + " 31 productivity.score 204 non-null float64 \n", + " 32 productivity.time_loss 204 non-null float64 \n", + " 33 quantity_scrap 0 non-null float64 \n", + "dtypes: datetime64[ns](2), float64(19), int64(1), object(12)\n", + "memory usage: 60.4+ KB\n" + ] + } + ], + "source": [ + "runs_df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "51d8d167-3bc4-44f5-acf5-576f85bc4d0d", + "metadata": {}, + "source": [ + "With the necessary data transformations done, the DataFrame can now be saved as an Excel file or is ready to be returned back to Power BI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9525437a-28a3-4811-9163-f7e4c075d72e", + "metadata": {}, + "outputs": [], + "source": [ + "runs_df.to_excel(\"./filename.xlsx\")" + ] + }, + { + "cell_type": "markdown", + "id": "030dc29e-ba51-4413-bd34-e9301e87049d", + "metadata": {}, + "source": [ + "🎉 This is it for now. Thank you for reading, and please [reach out to us]() if you have questions or would like us to cover a certain topic. \n", + "Best \n", + "Clemens" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index e3a81d6..9aafb9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,15 +4,12 @@ requires = [ "setuptools", ] -[tool.setuptools.dynamic] -version = {attr = "enlyze._version.VERSION"} - [project] name = "enlyze" description = "Python SDK for interacting with the ENLYZE platform https://www.enlyze.com" readme = "README.rst" -license = {text = "MIT"} -authors = [{name = "ENLYZE GmbH", email = "hello@enlyze.com"},] +license = { text = "MIT" } +authors = [ { name = "ENLYZE GmbH", email = "hello@enlyze.com" } ] requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3 :: Only", @@ -21,21 +18,20 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] dynamic = [ - 'version', + "version", ] dependencies = [ "httpx", "pandas>=2", "pydantic>=2", ] -[project.optional-dependencies] -docs = [ +optional-dependencies.docs = [ "sphinx", "sphinx-rtd-theme", "sphinx-tabs", "sphinxcontrib-spelling", ] -lint = [ +optional-dependencies.lint = [ "bandit", "black", "flake8", @@ -45,7 +41,7 @@ lint = [ "safety", "tox-ini-fmt", ] -test = [ +optional-dependencies.test = [ "coverage", "hypothesis", "pandas-stubs", @@ -57,8 +53,11 @@ test = [ "respx", ] -[tool.mypy] -exclude="tests" +[tool.setuptools.dynamic] +version = { attr = "enlyze._version.VERSION" } [tool.isort] profile = "black" + +[tool.mypy] +exclude = "tests" diff --git a/src/enlyze/__init__.py b/src/enlyze/__init__.py index 547a6ec..dd25043 100644 --- a/src/enlyze/__init__.py +++ b/src/enlyze/__init__.py @@ -1,9 +1,9 @@ from .client import EnlyzeClient from .errors import EnlyzeError, InvalidTokenError -from .models import Appliance, ResamplingMethod, Site, Variable +from .models import Machine, ResamplingMethod, Site, Variable __all__ = [ - "Appliance", + "Machine", "EnlyzeClient", "EnlyzeError", "InvalidTokenError", diff --git a/src/enlyze/api_clients/base.py b/src/enlyze/api_clients/base.py index 557deb3..1eec15c 100644 --- a/src/enlyze/api_clients/base.py +++ b/src/enlyze/api_clients/base.py @@ -8,10 +8,20 @@ import httpx from pydantic import BaseModel, ValidationError +from enlyze._version import VERSION from enlyze.auth import TokenAuth -from enlyze.constants import HTTPX_TIMEOUT +from enlyze.constants import HTTPX_TIMEOUT, USER_AGENT from enlyze.errors import EnlyzeError, InvalidTokenError +USER_AGENT_NAME_VERSION_SEPARATOR = "/" + + +@cache +def _construct_user_agent( + *, user_agent: str = USER_AGENT, version: str = VERSION +) -> str: + return f"{user_agent}{USER_AGENT_NAME_VERSION_SEPARATOR}{version}" + class ApiBaseModel(BaseModel): """Base class for ENLYZE platform API object models using pydantic @@ -60,6 +70,7 @@ def __init__( auth=TokenAuth(token), base_url=httpx.URL(base_url), timeout=timeout, + headers={"user-agent": _construct_user_agent()}, ) @cache diff --git a/src/enlyze/api_clients/production_runs/models.py b/src/enlyze/api_clients/production_runs/models.py index 192c2d3..d52fe06 100644 --- a/src/enlyze/api_clients/production_runs/models.py +++ b/src/enlyze/api_clients/production_runs/models.py @@ -3,6 +3,8 @@ from typing import Any, Optional from uuid import UUID +from pydantic import Field + import enlyze.models as user_models from enlyze.api_clients.base import ApiBaseModel @@ -59,14 +61,14 @@ def to_user_model(self) -> user_models.Quantity: ) -class Appliance(ApiBaseModel): +class Machine(ApiBaseModel): name: str uuid: UUID class ProductionRun(ProductionRunsApiModel): uuid: UUID - appliance: Appliance + machine: Machine = Field(alias="appliance") average_throughput: Optional[float] production_order: str product: Product @@ -81,7 +83,7 @@ class ProductionRun(ProductionRunsApiModel): productivity: Optional[OEEComponent] def to_user_model( - self, appliances_by_uuid: dict[UUID, user_models.Appliance] + self, machines_by_uuid: dict[UUID, user_models.Machine] ) -> user_models.ProductionRun: """Convert into a :ref:`user model `""" @@ -101,7 +103,7 @@ def to_user_model( return user_models.ProductionRun( uuid=self.uuid, - appliance=appliances_by_uuid[self.appliance.uuid], + machine=machines_by_uuid[self.machine.uuid], average_throughput=self.average_throughput, production_order=self.production_order, product=self.product.to_user_model(), diff --git a/src/enlyze/api_clients/timeseries/models.py b/src/enlyze/api_clients/timeseries/models.py index 5f84820..632657f 100644 --- a/src/enlyze/api_clients/timeseries/models.py +++ b/src/enlyze/api_clients/timeseries/models.py @@ -32,16 +32,16 @@ def to_user_model(self) -> user_models.Site: ) -class Appliance(TimeseriesApiModel): +class Machine(TimeseriesApiModel): uuid: UUID name: str genesis_date: date site: int - def to_user_model(self, site: user_models.Site) -> user_models.Appliance: + def to_user_model(self, site: user_models.Site) -> user_models.Machine: """Convert into a :ref:`user model `""" - return user_models.Appliance( + return user_models.Machine( uuid=self.uuid, display_name=self.name, genesis_date=self.genesis_date, @@ -55,7 +55,7 @@ class Variable(TimeseriesApiModel): unit: Optional[str] data_type: user_models.VariableDataType - def to_user_model(self, appliance: user_models.Appliance) -> user_models.Variable: + def to_user_model(self, machine: user_models.Machine) -> user_models.Variable: """Convert into a :ref:`user model `.""" return user_models.Variable( @@ -63,7 +63,7 @@ def to_user_model(self, appliance: user_models.Appliance) -> user_models.Variabl display_name=self.display_name, unit=self.unit, data_type=self.data_type, - appliance=appliance, + machine=machine, ) @@ -75,6 +75,31 @@ def extend(self, other: "TimeseriesData") -> None: """Add records from ``other`` after the existing records.""" self.records.extend(other.records) + def merge(self, other: "TimeseriesData") -> "TimeseriesData": + """Merge records from ``other`` into the existing records.""" + slen, olen = len(self.records), len(other.records) + if olen < slen: + raise ValueError( + "Cannot merge. Attempted to merge" + f" an instance with {olen} records into an instance with {slen}" + " records. The instance to merge must have a number" + " of records greater than or equal to the number of records of" + " the instance you're trying to merge into." + ) + + self.columns.extend(other.columns[1:]) + + for s, o in zip(self.records, other.records[:slen]): + if s[0] != o[0]: + raise ValueError( + "Cannot merge. Attempted to merge records " + f"with mismatched timestamps {s[0]}, {o[0]}" + ) + + s.extend(o[1:]) + + return self + def to_user_model( self, start: datetime, diff --git a/src/enlyze/client.py b/src/enlyze/client.py index ec4aa11..a29f6bf 100644 --- a/src/enlyze/client.py +++ b/src/enlyze/client.py @@ -1,6 +1,7 @@ +from collections import abc from datetime import datetime -from functools import cache -from typing import Iterator, Mapping, Optional, Sequence +from functools import cache, reduce +from typing import Any, Iterator, Mapping, Optional, Sequence, Tuple, Union from uuid import UUID import enlyze.api_clients.timeseries.models as timeseries_api_models @@ -10,9 +11,11 @@ from enlyze.api_clients.timeseries.client import TimeseriesApiClient from enlyze.constants import ( ENLYZE_BASE_URL, + MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST, VARIABLE_UUID_AND_RESAMPLING_METHOD_SEPARATOR, ) -from enlyze.errors import EnlyzeError +from enlyze.errors import EnlyzeError, ResamplingValidationError +from enlyze.iterable_tools import chunk from enlyze.validators import ( validate_datetime, validate_resampling_interval, @@ -21,6 +24,8 @@ validate_timeseries_arguments, ) +FETCHING_TIMESERIES_DATA_ERROR_MSG = "Error occurred when fetching timeseries data." + def _get_timeseries_data_from_pages( pages: Iterator[timeseries_api_models.TimeseriesData], @@ -42,6 +47,38 @@ def _get_timeseries_data_from_pages( return timeseries_data +def _get_variables_sequence_and_query_parameter_list( + variables: Union[ + Sequence[user_models.Variable], + Mapping[user_models.Variable, user_models.ResamplingMethod], + ], + resampling_interval: Optional[int], +) -> Tuple[Sequence[user_models.Variable], Sequence[str]]: + if isinstance(variables, abc.Sequence) and resampling_interval is not None: + raise ResamplingValidationError( + "`variables` must be a mapping {variable: ResamplingMethod}" + ) + + if resampling_interval: + validate_resampling_interval(resampling_interval) + variables_sequence = [] + variables_query_parameter_list = [] + for variable, resampling_method in variables.items(): # type: ignore + variables_sequence.append(variable) + variables_query_parameter_list.append( + f"{variable.uuid}" + f"{VARIABLE_UUID_AND_RESAMPLING_METHOD_SEPARATOR}" + f"{resampling_method.value}" + ) + + validate_resampling_method_for_data_type( + resampling_method, variable.data_type + ) + return variables_sequence, variables_query_parameter_list + + return variables, [str(v.uuid) for v in variables] # type: ignore + + class EnlyzeClient: """Main entrypoint for interacting with the ENLYZE platform. @@ -82,28 +119,27 @@ def get_sites(self) -> list[user_models.Site]: """ return [site.to_user_model() for site in self._get_sites()] - def _get_appliances(self) -> Iterator[timeseries_api_models.Appliance]: - """Get all appliances from the API""" + def _get_machines(self) -> Iterator[timeseries_api_models.Machine]: + """Get all machines from the API""" return self._timeseries_api_client.get_paginated( - "appliances", timeseries_api_models.Appliance + "appliances", timeseries_api_models.Machine ) @cache - def get_appliances( + def get_machines( self, site: Optional[user_models.Site] = None - ) -> list[user_models.Appliance]: - """Retrieve all :ref:`appliances `, optionally filtered by site. + ) -> list[user_models.Machine]: + """Retrieve all :ref:`machines `, optionally filtered by site. - :param site: Only get appliances of this site. Gets all appliances of the + :param site: Only get machines of this site. Gets all machines of the organization if None. - :type site: :class:`~enlyze.models.Site` or None :raises: |token-error| :raises: |generic-error| - :returns: Appliances - :rtype: list[:class:`~enlyze.models.Appliance`] + :returns: Machines + :rtype: list[:class:`~enlyze.models.Machine`] """ @@ -112,46 +148,134 @@ def get_appliances( else: sites_by_id = {site._id: site for site in self.get_sites()} - appliances = [] - for appliance_api in self._get_appliances(): - site_ = sites_by_id.get(appliance_api.site) + machines = [] + for machine_api in self._get_machines(): + site_ = sites_by_id.get(machine_api.site) if not site_: continue - appliances.append(appliance_api.to_user_model(site_)) + machines.append(machine_api.to_user_model(site_)) - return appliances + return machines def _get_variables( - self, appliance_uuid: UUID + self, machine_uuid: UUID ) -> Iterator[timeseries_api_models.Variable]: - """Get variables for an appliance from the API.""" + """Get variables for a machine from the API.""" return self._timeseries_api_client.get_paginated( "variables", timeseries_api_models.Variable, - params={"appliance": str(appliance_uuid)}, + params={"appliance": str(machine_uuid)}, ) def get_variables( - self, appliance: user_models.Appliance + self, machine: user_models.Machine ) -> Sequence[user_models.Variable]: - """Retrieve all variables of an :ref:`appliance `. + """Retrieve all variables of a :ref:`machine `. - :param appliance: The appliance for which to get all variables. - :type appliance: :class:`~enlyze.models.Appliance` + :param machine: The machine for which to get all variables. :raises: |token-error| :raises: |generic-error| - :returns: Variables of ``appliance`` + :returns: Variables of ``machine`` """ return [ - variable.to_user_model(appliance) - for variable in self._get_variables(appliance.uuid) + variable.to_user_model(machine) + for variable in self._get_variables(machine.uuid) + ] + + def _get_paginated_timeseries( + self, + *, + machine_uuid: str, + start: datetime, + end: datetime, + variables: Sequence[str], + resampling_interval: Optional[int], + ) -> Iterator[timeseries_api_models.TimeseriesData]: + params: dict[str, Any] = { + "appliance": machine_uuid, + "start_datetime": start.isoformat(), + "end_datetime": end.isoformat(), + "variables": ",".join(variables), + } + + if resampling_interval: + params["resampling_interval"] = resampling_interval + + return self._timeseries_api_client.get_paginated( + "timeseries", timeseries_api_models.TimeseriesData, params=params + ) + + def _get_timeseries( + self, + start: datetime, + end: datetime, + variables: Union[ + Sequence[user_models.Variable], + Mapping[user_models.Variable, user_models.ResamplingMethod], + ], + resampling_interval: Optional[int] = None, + ) -> Optional[user_models.TimeseriesData]: + variables_sequence, variables_query_parameter_list = ( + _get_variables_sequence_and_query_parameter_list( + variables, resampling_interval + ) + ) + + start, end, machine_uuid = validate_timeseries_arguments( + start, end, variables_sequence + ) + + try: + chunks = chunk( + variables_query_parameter_list, + MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST, + ) + except ValueError as e: + raise EnlyzeError(FETCHING_TIMESERIES_DATA_ERROR_MSG) from e + + chunks_pages = ( + self._get_paginated_timeseries( + machine_uuid=machine_uuid, + start=start, + end=end, + variables=chunk, + resampling_interval=resampling_interval, + ) + for chunk in chunks + ) + + timeseries_data_chunked = [ + _get_timeseries_data_from_pages(pages) for pages in chunks_pages ] + if not timeseries_data_chunked or all( + data is None for data in timeseries_data_chunked + ): + return None + + if any(data is None for data in timeseries_data_chunked) and any( + data is not None for data in timeseries_data_chunked + ): + raise EnlyzeError( + "The timeseries API didn't return data for some of the variables." + ) + + try: + timeseries_data = reduce(lambda x, y: x.merge(y), timeseries_data_chunked) # type: ignore # noqa + except ValueError as e: + raise EnlyzeError(FETCHING_TIMESERIES_DATA_ERROR_MSG) from e + + return timeseries_data.to_user_model( # type: ignore + start=start, + end=end, + variables=variables_sequence, + ) + def get_timeseries( self, start: datetime, @@ -161,7 +285,7 @@ def get_timeseries( """Get timeseries data of :ref:`variables ` for a given time frame. Timeseries data for multiple variables can be requested at once. However, all - variables must belong to the same appliance. + variables must belong to the same machine. You should always pass :ref:`timezone-aware datetime ` objects to this method! If you don't, naive @@ -182,30 +306,7 @@ def get_timeseries( """ - start, end, appliance_uuid = validate_timeseries_arguments( - start, end, variables - ) - - pages = self._timeseries_api_client.get_paginated( - "timeseries", - timeseries_api_models.TimeseriesData, - params={ - "appliance": appliance_uuid, - "start_datetime": start.isoformat(), - "end_datetime": end.isoformat(), - "variables": ",".join(str(v.uuid) for v in variables), - }, - ) - - timeseries_data = _get_timeseries_data_from_pages(pages) - if timeseries_data is None: - return None - - return timeseries_data.to_user_model( - start=start, - end=end, - variables=variables, - ) + return self._get_timeseries(start, end, variables) def get_timeseries_with_resampling( self, @@ -217,7 +318,7 @@ def get_timeseries_with_resampling( """Get resampled timeseries data of :ref:`variables ` for a given time frame. Timeseries data for multiple variables can be requested at once. However, all - variables must belong to the same appliance. + variables must belong to the same machine. You should always pass :ref:`timezone-aware datetime ` objects to this method! If you don't, naive @@ -245,55 +346,14 @@ def get_timeseries_with_resampling( request """ # noqa: E501 - variables_sequence = [] - variables_query_parameter_list = [] - for variable, resampling_method in variables.items(): - variables_sequence.append(variable) - variables_query_parameter_list.append( - f"{variable.uuid}" - f"{VARIABLE_UUID_AND_RESAMPLING_METHOD_SEPARATOR}" - f"{resampling_method.value}" - ) - - validate_resampling_method_for_data_type( - resampling_method, variable.data_type - ) - - start, end, appliance_uuid = validate_timeseries_arguments( - start, - end, - variables_sequence, - ) - validate_resampling_interval(resampling_interval) - - pages = self._timeseries_api_client.get_paginated( - "timeseries", - timeseries_api_models.TimeseriesData, - params={ - "appliance": appliance_uuid, - "start_datetime": start.isoformat(), - "end_datetime": end.isoformat(), - "variables": ",".join(variables_query_parameter_list), - "resampling_interval": resampling_interval, - }, - ) - - timeseries_data = _get_timeseries_data_from_pages(pages) - if timeseries_data is None: - return None - - return timeseries_data.to_user_model( - start=start, - end=end, - variables=variables_sequence, - ) + return self._get_timeseries(start, end, variables, resampling_interval) def _get_production_runs( self, *, production_order: Optional[str] = None, product: Optional[str] = None, - appliance: Optional[UUID] = None, + machine: Optional[UUID] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, ) -> Iterator[ProductionRun]: @@ -302,7 +362,7 @@ def _get_production_runs( filters = { "production_order": production_order, "product": product, - "appliance": appliance, + "appliance": machine, "start": start.isoformat() if start else None, "end": end.isoformat() if end else None, } @@ -316,13 +376,13 @@ def get_production_runs( *, production_order: Optional[str] = None, product: Optional[user_models.Product | str] = None, - appliance: Optional[user_models.Appliance] = None, + machine: Optional[user_models.Machine] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, ) -> user_models.ProductionRuns: """Retrieve optionally filtered list of :ref:`production runs `. - :param appliance: The appliance for which to get all production runs. + :param machine: The machine for which to get all production runs. :param product: Filter production runs by product. :param production_order: Filter production runs by production order. @@ -344,12 +404,12 @@ def get_production_runs( product_filter = ( product.code if isinstance(product, user_models.Product) else product ) - appliances_by_uuid = {a.uuid: a for a in self.get_appliances()} + machines_by_uuid = {a.uuid: a for a in self.get_machines()} return user_models.ProductionRuns( [ - production_run.to_user_model(appliances_by_uuid) + production_run.to_user_model(machines_by_uuid) for production_run in self._get_production_runs( - appliance=appliance.uuid if appliance else None, + machine=machine.uuid if machine else None, production_order=production_order, product=product_filter, start=start, diff --git a/src/enlyze/constants.py b/src/enlyze/constants.py index 90c8eaf..1989dbe 100644 --- a/src/enlyze/constants.py +++ b/src/enlyze/constants.py @@ -9,7 +9,7 @@ #: HTTP timeout for requests to the Timeseries API. #: -#: Reference: https://www.python-httpx.org/advanced/#timeout-configuration +#: Reference: https://www.python-httpx.org/advanced/timeouts/ HTTPX_TIMEOUT = 30.0 #: The separator to use when to separate the variable UUID and the resampling method @@ -18,3 +18,10 @@ #: The minimum allowed resampling interval when resampling timeseries data. MINIMUM_RESAMPLING_INTERVAL = 10 + +#: The maximum number of variables that can be used in a single request when querying +#: timeseries data. +MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST = 100 + +#: The user agent that the SDK identifies itself as when making HTTP requests +USER_AGENT = "enlyze-python" diff --git a/src/enlyze/iterable_tools.py b/src/enlyze/iterable_tools.py new file mode 100644 index 0000000..64ca17c --- /dev/null +++ b/src/enlyze/iterable_tools.py @@ -0,0 +1,12 @@ +from typing import Iterable, Sequence, TypeVar + +MINIMUM_CHUNK_SIZE = 1 + +T = TypeVar("T") + + +def chunk(seq: Sequence[T], chunk_size: int) -> Iterable[Sequence[T]]: + if chunk_size < MINIMUM_CHUNK_SIZE: + raise ValueError(f"{chunk_size=} is less than {MINIMUM_CHUNK_SIZE=}") + + return (seq[i : i + chunk_size] for i in range(0, len(seq), chunk_size)) diff --git a/src/enlyze/models.py b/src/enlyze/models.py index a47d6a5..637b294 100644 --- a/src/enlyze/models.py +++ b/src/enlyze/models.py @@ -7,6 +7,8 @@ import pandas +from enlyze.schema import dataframe_ensure_schema + @dataclass(frozen=True) class Site: @@ -26,23 +28,23 @@ class Site: @dataclass(frozen=True) -class Appliance: - """Representation of an :ref:`appliance ` in the ENLYZE platform. +class Machine: + """Representation of a :ref:`machine ` in the ENLYZE platform. - Contains details about the appliance. + Contains details about the machine. """ - #: Stable identifier of the appliance. + #: Stable identifier of the machine. uuid: UUID - #: Display name of the appliance. + #: Display name of the machine. display_name: str - #: The date when the appliance has been connected to the ENLYZE platform. + #: The date when the machine has been connected to the ENLYZE platform. genesis_date: date - #: The site where the appliance is located. + #: The site where the machine is located. site: Site @@ -99,8 +101,8 @@ class Variable: #: The underlying data type of the variable. data_type: VariableDataType - #: The appliance on which this variable is read out. - appliance: Appliance + #: The machine on which this variable is read out. + machine: Machine @dataclass(frozen=True) @@ -233,7 +235,7 @@ class Quantity: @dataclass(frozen=True) class Product: - """Representation of a product that is produced on an appliance""" + """Representation of a product that is produced on a machine""" #: The identifier of the product code: str @@ -253,8 +255,8 @@ class ProductionRun: #: The UUID of the production run uuid: UUID - #: The appliance the production run was executed on. - appliance: Appliance + #: The machine the production run was executed on. + machine: Machine #: The average throughput of the production run excluding downtimes. average_throughput: Optional[float] @@ -280,10 +282,10 @@ class ProductionRun: #: The amount of product produced that can be sold. quantity_yield: Optional[Quantity] - #: OEE component that reflects when the appliance did not produce. + #: OEE component that reflects when the machine did not produce. availability: Optional[OEEComponent] - #: OEE component that reflects how fast the appliance has run. + #: OEE component that reflects how fast the machine has run. performance: Optional[OEEComponent] #: OEE component that reflects how much defects have been produced. @@ -303,13 +305,15 @@ def to_dataframe(self) -> pandas.DataFrame: ``end`` of every production run will be represented as :ref:`timezone-aware ` :py:class:`datetime.datetime` localized in UTC. - :returns: DataFrame with production runs - + :returns: DataFrame with production runs. """ if not self: return pandas.DataFrame() - df = pandas.json_normalize([asdict(run) for run in self]) + path_separator = "." + + df = pandas.json_normalize([asdict(run) for run in self], sep=path_separator) df.start = pandas.to_datetime(df.start, utc=True, format="ISO8601") df.end = pandas.to_datetime(df.end, utc=True, format="ISO8601") - return df + + return dataframe_ensure_schema(df, ProductionRun, path_separator=path_separator) diff --git a/src/enlyze/schema.py b/src/enlyze/schema.py new file mode 100644 index 0000000..ebd31a9 --- /dev/null +++ b/src/enlyze/schema.py @@ -0,0 +1,62 @@ +import typing +from dataclasses import is_dataclass +from types import UnionType +from typing import Any, Protocol + +import pandas + + +class DataclassTypeOrInstance(Protocol): + __dataclass_fields__: dict[str, Any] + + +def _flat_dataclass_schema( + dataclass_obj_or_type: DataclassTypeOrInstance, + path_separator: str, + _parent_path: list[str] = [], +) -> list[str]: + """Derive flat schema of potentially nested dataclass ``dataclass_obj_or_type``""" + + flat: list[str] = [] + + for field, typ in typing.get_type_hints(dataclass_obj_or_type).items(): + current_path = _parent_path + [field] + field_types = (typ,) + + # expand union types (includes typing.Optional) + origin_type = typing.get_origin(typ) + if origin_type is UnionType or origin_type is typing.Union: + field_types = typing.get_args(typ) + + for field_type in field_types: + if is_dataclass(field_type): + flat.extend( + _flat_dataclass_schema(field_type, path_separator, current_path) + ) + elif field_type is not type(None): + flat.append(path_separator.join(current_path)) + + # dedupe while preserving order + return list(dict.fromkeys(flat)) + + +def dataframe_ensure_schema( + df: pandas.DataFrame, + dataclass_obj_or_type: DataclassTypeOrInstance, + path_separator: str = ".", +) -> pandas.DataFrame: + """Add missing columns to ``df`` based on flattened dataclass schema""" + + flat_schema = _flat_dataclass_schema( + dataclass_obj_or_type, + path_separator=path_separator, + ) + + add_colums = set(flat_schema) - set(df.columns) + remove_columns = set(df.columns) - set(flat_schema) + + return df.assign( + **{col: None for col in add_colums}, + ).drop( + columns=list(remove_columns), + ) diff --git a/src/enlyze/validators.py b/src/enlyze/validators.py index dd95fde..50bb992 100644 --- a/src/enlyze/validators.py +++ b/src/enlyze/validators.py @@ -56,14 +56,14 @@ def validate_timeseries_arguments( start, end = validate_start_and_end(start, end) - appliance_uuids = frozenset(v.appliance.uuid for v in variables) + machine_uuids = frozenset(v.machine.uuid for v in variables) - if len(appliance_uuids) != 1: + if len(machine_uuids) != 1: raise EnlyzeError( - "Cannot request timeseries data for more than one appliance per request." + "Cannot request timeseries data for more than one machine per request." ) - return start, end, str(next(iter(appliance_uuids))) + return start, end, str(next(iter(machine_uuids))) def validate_resampling_interval( diff --git a/tests/conftest.py b/tests/conftest.py index 3cb10a2..2129782 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,22 @@ hypothesis.settings.register_profile("ci", deadline=None) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default")) +# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations +PANDAS_MIN_DATETIME = datetime(1677, 9, 21, 0, 12, 44) +PANDAS_MAX_DATETIME = datetime(2262, 4, 11, 23, 47, 16) + +# https://github.com/python/cpython/issues/94414 +WINDOWS_MIN_DATETIME = datetime(1970, 1, 2, 1, 0, 0) +WINDOWS_MAX_DATETIME = datetime(3001, 1, 19, 7, 59, 59) + +st.register_type_strategy( + datetime, + st.datetimes( + min_value=max(PANDAS_MIN_DATETIME, WINDOWS_MIN_DATETIME), + max_value=min(PANDAS_MAX_DATETIME, WINDOWS_MAX_DATETIME), + ), +) + datetime_today_until_now_strategy = st.datetimes( min_value=datetime.now().replace(hour=0), max_value=datetime.now(), diff --git a/tests/enlyze/api_clients/test_base.py b/tests/enlyze/api_clients/test_base.py index 922db32..897cd81 100644 --- a/tests/enlyze/api_clients/test_base.py +++ b/tests/enlyze/api_clients/test_base.py @@ -7,11 +7,15 @@ from hypothesis import HealthCheck, given, settings from hypothesis import strategies as st +from enlyze._version import VERSION from enlyze.api_clients.base import ( + USER_AGENT_NAME_VERSION_SEPARATOR, ApiBaseClient, ApiBaseModel, PaginatedResponseBaseModel, + _construct_user_agent, ) +from enlyze.constants import USER_AGENT from enlyze.errors import EnlyzeError, InvalidTokenError @@ -83,6 +87,46 @@ def base_client(auth_token, string_model, base_url): yield client +@pytest.fixture +def custom_user_agent(): + return "custom-user-agent" + + +@pytest.fixture +def custom_user_agent_version(): + return "3.4.5" + + +class TestConstructUserAgent: + def test__construct_user_agent_with_defaults(self): + ua, version = _construct_user_agent().split(USER_AGENT_NAME_VERSION_SEPARATOR) + assert ua == USER_AGENT + assert version == VERSION + + def test__construct_user_agent_custom_agent(self, custom_user_agent): + ua, version = _construct_user_agent(user_agent=custom_user_agent).split( + USER_AGENT_NAME_VERSION_SEPARATOR + ) + assert ua == custom_user_agent + assert version == VERSION + + def test__construct_user_agent_custom_version(self, custom_user_agent_version): + ua, version = _construct_user_agent(version=custom_user_agent_version).split( + USER_AGENT_NAME_VERSION_SEPARATOR + ) + assert ua == USER_AGENT + assert version == custom_user_agent_version + + def test__construct_user_agent_custom_agent_and_version( + self, custom_user_agent, custom_user_agent_version + ): + ua, version = _construct_user_agent( + user_agent=custom_user_agent, version=custom_user_agent_version + ).split(USER_AGENT_NAME_VERSION_SEPARATOR) + assert ua == custom_user_agent + assert version == custom_user_agent_version + + @settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) @given( token=st.text(string.printable, min_size=1), diff --git a/tests/enlyze/api_clients/timeseries/test_models.py b/tests/enlyze/api_clients/timeseries/test_models.py new file mode 100644 index 0000000..7e6683c --- /dev/null +++ b/tests/enlyze/api_clients/timeseries/test_models.py @@ -0,0 +1,113 @@ +import itertools +import random +from datetime import datetime, timedelta, timezone + +import pytest + +from enlyze.api_clients.timeseries.models import TimeseriesData + +# We use this to skip columns that contain the timestamp assuming +# it starts at the beginning of the sequence. We also use it +# when computing lengths to account for a timestamp column. +TIMESTAMP_OFFSET = 1 +NOW = datetime.now(tz=timezone.utc) + + +def _generate_timeseries_data(*, columns, number_of_records): + timeseries_columns = ["time"] + timeseries_columns.extend(columns) + + counter = itertools.count(start=10) + + return TimeseriesData( + columns=timeseries_columns, + records=[ + [ + (NOW - timedelta(minutes=next(counter))).isoformat(), + *[random.randint(1, 100) for _ in range(len(columns))], + ] + for _ in range(number_of_records) + ], + ) + + +class TestTimeseriesData: + @pytest.mark.parametrize( + "data_parameters,data_to_merge_parameters", + [ + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 3}, + ), + ], + ) + def test_merge(self, data_parameters, data_to_merge_parameters): + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) + data_records_len = len(data.records) + data_columns_len = len(data.columns) + data_to_merge_columns_len = len(data_to_merge.columns) + expected_merged_record_len = len(data.records[0]) + len( + data_to_merge.records[0][TIMESTAMP_OFFSET:] + ) + + merged = data.merge(data_to_merge) + + assert merged is data + assert len(merged.records) == data_records_len + assert ( + len(merged.columns) + == data_columns_len + data_to_merge_columns_len - TIMESTAMP_OFFSET + ) + + for r in merged.records: + assert len(r) == expected_merged_record_len == len(merged.columns) + + @pytest.mark.parametrize( + "data_parameters,data_to_merge_parameters", + [ + ( + {"columns": ["var1", "var2"], "number_of_records": 2}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ], + ) + def test_merge_raises_number_of_records_to_merge_less_than_existing( + self, data_parameters, data_to_merge_parameters + ): + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) + + with pytest.raises( + ValueError, + match=( + "The instance to merge must have a number of" + " records greater than or equal to the number" + " of records of the instance you're trying to merge into." + ), + ): + data.merge(data_to_merge) + + @pytest.mark.parametrize( + "data_parameters,data_to_merge_parameters", + [ + ( + {"columns": ["var1", "var2"], "number_of_records": 1}, + {"columns": ["var3"], "number_of_records": 1}, + ), + ], + ) + def test_merge_raises_mismatched_timestamps( + self, data_parameters, data_to_merge_parameters + ): + data = _generate_timeseries_data(**data_parameters) + data_to_merge = _generate_timeseries_data(**data_to_merge_parameters) + + data_to_merge.records[0][0] = (NOW - timedelta(days=1)).isoformat() + + with pytest.raises(ValueError, match="mismatched timestamps"): + data.merge(data_to_merge) diff --git a/tests/enlyze/test_client.py b/tests/enlyze/test_client.py index 4c3e1aa..9bfe547 100644 --- a/tests/enlyze/test_client.py +++ b/tests/enlyze/test_client.py @@ -23,10 +23,11 @@ from enlyze.client import EnlyzeClient from enlyze.constants import ( ENLYZE_BASE_URL, + MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST, PRODUCTION_RUNS_API_SUB_PATH, TIMESERIES_API_SUB_PATH, ) -from enlyze.errors import EnlyzeError +from enlyze.errors import EnlyzeError, ResamplingValidationError from tests.conftest import ( datetime_before_today_strategy, datetime_today_until_now_strategy, @@ -34,7 +35,7 @@ MOCK_RESPONSE_HEADERS = {"Content-Type": "application/json"} -APPLIANCE_UUID = "ebef7e5a-5921-4cf3-9a52-7ff0e98e8306" +MACHINE_UUID = "ebef7e5a-5921-4cf3-9a52-7ff0e98e8306" PRODUCT_CODE = "product-code" PRODUCTION_ORDER = "production-order" SITE_ID = 1 @@ -62,7 +63,7 @@ start=datetime_before_today_strategy, end=datetime_today_until_now_strategy, appliance=st.builds( - production_runs_api_models.Appliance, uuid=st.just(APPLIANCE_UUID) + production_runs_api_models.Machine, uuid=st.just(MACHINE_UUID) ), product=st.builds( production_runs_api_models.Product, @@ -142,58 +143,58 @@ def test_get_sites(site1, site2): @given( site1=st.builds(timeseries_api_models.Site, id=st.just(1)), site2=st.builds(timeseries_api_models.Site, id=st.just(2)), - appliance1=st.builds(timeseries_api_models.Appliance, site=st.just(1)), - appliance2=st.builds(timeseries_api_models.Appliance, site=st.just(2)), + machine1=st.builds(timeseries_api_models.Machine, site=st.just(1)), + machine2=st.builds(timeseries_api_models.Machine, site=st.just(2)), ) -def test_get_appliances(site1, site2, appliance1, appliance2): +def test_get_machines(site1, site2, machine1, machine2): client = make_client() with respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as mock: mock.get("appliances").mock( - PaginatedTimeseriesApiResponse(data=[appliance1, appliance2]) + PaginatedTimeseriesApiResponse(data=[machine1, machine2]) ) mock.get("sites").mock(PaginatedTimeseriesApiResponse(data=[site1, site2])) - all_appliances = client.get_appliances() - assert all_appliances == [ - appliance1.to_user_model(site1.to_user_model()), - appliance2.to_user_model(site2.to_user_model()), + all_machines = client.get_machines() + assert all_machines == [ + machine1.to_user_model(site1.to_user_model()), + machine2.to_user_model(site2.to_user_model()), ] - appliances_site2 = client.get_appliances(site2.to_user_model()) - assert appliances_site2 == [ - appliance2.to_user_model(site2.to_user_model()), + machines_site2 = client.get_machines(site2.to_user_model()) + assert machines_site2 == [ + machine2.to_user_model(site2.to_user_model()), ] @given( - appliance=st.builds(timeseries_api_models.Appliance), + machine=st.builds(timeseries_api_models.Machine), ) -def test_get_appliances_site_not_found(appliance): +def test_get_machines_site_not_found(machine): client = make_client() with respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as mock: mock.get("sites").mock(PaginatedTimeseriesApiResponse(data=[])) - mock.get("appliances").mock(PaginatedTimeseriesApiResponse(data=[appliance])) + mock.get("appliances").mock(PaginatedTimeseriesApiResponse(data=[machine])) - assert client.get_appliances() == [] + assert client.get_machines() == [] @given( - appliance=st.builds(user_models.Appliance), + machine=st.builds(user_models.Machine), var1=st.builds(timeseries_api_models.Variable), var2=st.builds(timeseries_api_models.Variable), ) -def test_get_variables(appliance, var1, var2): +def test_get_variables(machine, var1, var2): client = make_client() with respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as mock: mock.get("variables").mock(PaginatedTimeseriesApiResponse(data=[var1, var2])) - variables = client.get_variables(appliance) + variables = client.get_variables(machine) assert variables == [ - var1.to_user_model(appliance), - var2.to_user_model(appliance), + var1.to_user_model(machine), + var2.to_user_model(machine), ] @@ -335,6 +336,74 @@ def test_get_timeseries_returns_none_on_empty_response( ) +@given( + data_strategy=st.data(), + records=st.lists( + st.tuples( + datetime_today_until_now_strategy.map(datetime.isoformat), + st.integers(), + ), + min_size=2, + max_size=5, + ), + machine=st.builds(timeseries_api_models.Machine, uuid=st.just(MACHINE_UUID)), +) +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +def test__get_timeseries_raises_on_mixed_response( + data_strategy, + start_datetime, + end_datetime, + records, + machine, +): + """ + Tests that an `EnlyzeError` is raised if the timeseries API returns + data for some of the variables but not all of them. + """ + client = make_client() + variables = data_strategy.draw( + st.lists( + st.builds( + user_models.Variable, + data_type=st.just("INTEGER"), + machine=st.just(machine), + ), + min_size=MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST + 1, + max_size=MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST + 5, + ) + ) + + with respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as mock: + mock.get("timeseries").mock( + side_effect=[ + PaginatedTimeseriesApiResponse( + data=timeseries_api_models.TimeseriesData( + columns=[ + "time", + *[ + str(variable.uuid) + for variable in variables[ + :MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST + ] + ], + ], + records=records, + ).model_dump(), + ), + PaginatedTimeseriesApiResponse( + data=timeseries_api_models.TimeseriesData( + columns=[], + records=[], + ).model_dump(), + ), + ] + ) + with pytest.raises( + EnlyzeError, match="didn't return data for some of the variables" + ): + client._get_timeseries(start_datetime, end_datetime, variables) + + def test_get_timeseries_raises_no_variables(start_datetime, end_datetime): client = make_client() with pytest.raises(EnlyzeError, match="at least one variable"): @@ -354,17 +423,17 @@ def test_get_timeseries_raises_invalid_time_bounds(variable): @given( - # we rely on variable{1,2}.appliance.uuid to be different because they are random + # we rely on variable{1,2}.machine.uuid to be different because they are random variable1=st.builds(user_models.Variable), variable2=st.builds(user_models.Variable), ) @settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) -def test_get_timeseries_raises_variables_of_different_appliances( +def test_get_timeseries_raises_variables_of_different_machines( variable1, variable2, start_datetime, end_datetime ): client = make_client() - with pytest.raises(EnlyzeError, match="for more than one appliance"): + with pytest.raises(EnlyzeError, match="for more than one machine"): client.get_timeseries(start_datetime, end_datetime, [variable1, variable2]) @@ -390,16 +459,89 @@ def test_get_timeseries_raises_api_returned_no_timestamps( client.get_timeseries(start_datetime, end_datetime, [variable]) +@given( + variable=st.builds(user_models.Variable), +) +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +def test__get_timeseries_raises_variables_without_resampling_method( + start_datetime, end_datetime, variable +): + """ + Test that `get_timeseries` will raise an `EnlyzeError` when a + `resampling_interval` is specified but variables don't have + resampling methods. + """ + client = make_client() + with pytest.raises(ResamplingValidationError): + client._get_timeseries(start_datetime, end_datetime, [variable], 30) + + +@given( + variable=st.builds(user_models.Variable), +) +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +def test__get_timeseries_raises_on_chunk_value_error( + start_datetime, end_datetime, variable, monkeypatch +): + monkeypatch.setattr( + "enlyze.client.MAXIMUM_NUMBER_OF_VARIABLES_PER_TIMESERIES_REQUEST", 0 + ) + client = make_client() + with pytest.raises(EnlyzeError) as exc_info: + client._get_timeseries(start_datetime, end_datetime, [variable]) + assert isinstance(exc_info.value.__cause__, ValueError) + + +@given( + start=datetime_before_today_strategy, + end=datetime_today_until_now_strategy, + variable=st.builds( + user_models.Variable, + data_type=st.just("INTEGER"), + machine=st.builds(timeseries_api_models.Machine), + ), + records=st.lists( + st.tuples( + datetime_today_until_now_strategy.map(datetime.isoformat), + st.integers(), + ), + min_size=2, + ), +) +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +def test__get_timeseries_raises_on_merge_value_error( + start, end, variable, records, monkeypatch +): + client = make_client() + + def f(*args, **kwargs): + raise ValueError + + monkeypatch.setattr("enlyze.client.reduce", f) + + with respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as mock: + mock.get("timeseries").mock( + PaginatedTimeseriesApiResponse( + data=timeseries_api_models.TimeseriesData( + columns=["time", str(variable.uuid)], + records=records, + ).model_dump() + ) + ) + with pytest.raises(EnlyzeError): + client._get_timeseries(start, end, [variable]) + + @given( production_order=st.just(PRODUCTION_ORDER), product=st.one_of( st.builds(user_models.Product, code=st.just(PRODUCT_CODE)), st.text(), ), - appliance=st.builds( - timeseries_api_models.Appliance, + machine=st.builds( + timeseries_api_models.Machine, site=st.just(SITE_ID), - uuid=st.just(APPLIANCE_UUID), + uuid=st.just(MACHINE_UUID), ), site=st.builds(timeseries_api_models.Site, id=st.just(SITE_ID)), start=st.one_of(datetime_before_today_strategy, st.none()), @@ -409,7 +551,7 @@ def test_get_timeseries_raises_api_returned_no_timestamps( def test_get_production_runs( production_order, product, - appliance, + machine, site, start, end, @@ -418,37 +560,38 @@ def test_get_production_runs( client = make_client() site_user_model = site.to_user_model() - appliance_user_model = appliance.to_user_model(site_user_model) - appliances_by_uuid = {appliance.uuid: appliance_user_model} - - with respx_mock_with_base_url( - TIMESERIES_API_SUB_PATH - ) as timeseries_api_mock, respx_mock_with_base_url( - PRODUCTION_RUNS_API_SUB_PATH - ) as production_runs_api_mock: + machine_user_model = machine.to_user_model(site_user_model) + machines_by_uuid = {machine.uuid: machine_user_model} + + with ( + respx_mock_with_base_url(TIMESERIES_API_SUB_PATH) as timeseries_api_mock, + respx_mock_with_base_url( + PRODUCTION_RUNS_API_SUB_PATH + ) as production_runs_api_mock, + ): timeseries_api_mock.get("appliances").mock( - PaginatedTimeseriesApiResponse(data=[appliance]) + PaginatedTimeseriesApiResponse(data=[machine]) ) timeseries_api_mock.get("sites").mock( PaginatedTimeseriesApiResponse(data=[site]) ) production_runs_api_mock.get("production-runs").mock( PaginatedProductionRunsApiResponse( - data=[p.model_dump() for p in production_runs] + data=[p.model_dump(by_alias=True) for p in production_runs] ) ) result = client.get_production_runs( production_order=production_order, product=product, - appliance=appliance_user_model, + machine=machine_user_model, start=start, end=end, ) assert ( user_models.ProductionRuns( - [pr.to_user_model(appliances_by_uuid) for pr in production_runs] + [pr.to_user_model(machines_by_uuid) for pr in production_runs] ) == result ) diff --git a/tests/enlyze/test_iterable_tools.py b/tests/enlyze/test_iterable_tools.py new file mode 100644 index 0000000..eb9f034 --- /dev/null +++ b/tests/enlyze/test_iterable_tools.py @@ -0,0 +1,26 @@ +from typing import Sequence + +import pytest +from hypothesis import given +from hypothesis.strategies import integers, lists + +from enlyze.iterable_tools import MINIMUM_CHUNK_SIZE, chunk + + +@given( + seq=lists(integers()), + chunk_size=integers(min_value=MINIMUM_CHUNK_SIZE), +) +def test_chunk(seq: Sequence[int], chunk_size: int): + result = list(chunk(seq, chunk_size)) + assert sum(len(sublist) for sublist in result) == len(seq) + assert all(len(sublist) <= chunk_size for sublist in result) + + +@given( + seq=lists(integers()), + chunk_size=integers(max_value=MINIMUM_CHUNK_SIZE - 1), +) +def test_chunk_raises_invalid_chunk_size(seq: Sequence[int], chunk_size: int): + with pytest.raises(ValueError): + chunk(seq, chunk_size) diff --git a/tests/enlyze/test_models.py b/tests/enlyze/test_models.py new file mode 100644 index 0000000..3bba305 --- /dev/null +++ b/tests/enlyze/test_models.py @@ -0,0 +1,30 @@ +from dataclasses import replace + +import hypothesis.strategies as st +from hypothesis import given + +from enlyze.models import ProductionRun, ProductionRuns + + +@given(runs=st.lists(st.from_type(ProductionRun), max_size=10)) +def test_production_runs_to_dataframe(runs: list[ProductionRun]): + runs = ProductionRuns(runs) + runs.to_dataframe() + + +@given(run=st.from_type(ProductionRun)) +def test_production_runs_to_dataframe_no_empty_columns_for_optional_dataclasses( + run: ProductionRun, +): + df = ProductionRuns( + [ + replace( + run, + average_throughput=None, + quantity_total=None, + ) + ] + ).to_dataframe() + + assert "quantity_total" not in df.columns + assert "average_throughput" in df.columns diff --git a/tests/enlyze/test_schema.py b/tests/enlyze/test_schema.py new file mode 100644 index 0000000..4137d31 --- /dev/null +++ b/tests/enlyze/test_schema.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass + +import pandas + +from enlyze.schema import dataframe_ensure_schema + + +@dataclass +class Some: + a: int + + +@dataclass +class Thing: + number: int + maybe_string: str | None + maybe_some: Some | None + multiple_but_required: float | str | Some + + +def test_dataframe_ensure_schema(): + df = pandas.DataFrame() + + added_columns = set(dataframe_ensure_schema(df, Thing, path_separator="|").columns) + + assert added_columns == { + "number", + "maybe_string", + "maybe_some|a", + "multiple_but_required", + "multiple_but_required|a", + } diff --git a/tests/enlyze/test_validators.py b/tests/enlyze/test_validators.py index 132d9ac..6bc7859 100644 --- a/tests/enlyze/test_validators.py +++ b/tests/enlyze/test_validators.py @@ -69,12 +69,10 @@ class TestValidateTimeseriesArguments: variable=VARIABLE_STRATEGY, ) def test_validate_timeseries_arguments(self, start, end, variable): - start, end, appliance_uuid = validate_timeseries_arguments( - start, end, [variable] - ) + start, end, machine_uuid = validate_timeseries_arguments(start, end, [variable]) assert start assert end - assert UUID(appliance_uuid) + assert UUID(machine_uuid) @given(variable=VARIABLE_STRATEGY) def test_validate_start_must_be_earlier_than_end(self, variable): @@ -97,7 +95,7 @@ def test_empty_variables(self, start, end): variable1=VARIABLE_STRATEGY, variable2=VARIABLE_STRATEGY, ) - def test_variables_with_different_appliance(self, start, end, variable1, variable2): + def test_variables_with_different_machine(self, start, end, variable1, variable2): with pytest.raises(EnlyzeError): validate_timeseries_arguments(start, end, [variable1, variable2])