From 0c10924a2ccfca38481342cfecf113614ae04d5f Mon Sep 17 00:00:00 2001 From: tmichela Date: Fri, 13 Sep 2024 14:56:46 +0200 Subject: [PATCH 1/5] get techniques information from mymdc --- damnit/ctxsupport/ctxrunner.py | 96 ++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py index 0253751b..7f61ce85 100644 --- a/damnit/ctxsupport/ctxrunner.py +++ b/damnit/ctxsupport/ctxrunner.py @@ -17,8 +17,10 @@ import traceback from datetime import timezone from enum import Enum +from functools import wraps from graphlib import CycleError, TopologicalSorter from pathlib import Path +from typing import Any from unittest.mock import MagicMock import extra_data @@ -70,45 +72,61 @@ def __init__(self, proposal, timeout=10, init_server="https://exfldadev01.desy.d self._headers = { "X-API-key": self.token } - def _run_info(self, run): - key = (run, "run_info") - if key not in self._cache: - response = requests.get(f"{self.server}/api/mymdc/proposals/by_number/{self.proposal}/runs/{run}", - headers=self._headers, timeout=self.timeout) - response.raise_for_status() - json = response.json() - if len(json["runs"]) == 0: - raise RuntimeError(f"Couldn't get run information from mymdc for p{self.proposal}, r{run}") - - self._cache[key] = json["runs"][0] - - return self._cache[key] - - def sample_name(self, run): - key = (run, "sample_name") - if key not in self._cache: - run_info = self._run_info(run) - sample_id = run_info["sample_id"] - response = requests.get(f"{self.server}/api/mymdc/samples/{sample_id}", - headers=self._headers, timeout=self.timeout) - response.raise_for_status() - - self._cache[key] = response.json()["name"] - - return self._cache[key] - - def run_type(self, run): - key = (run, "run_type") - if key not in self._cache: - run_info = self._run_info(run) - experiment_id = run_info["experiment_id"] - response = requests.get(f"{self.server}/api/mymdc/experiments/{experiment_id}", - headers=self._headers, timeout=self.timeout) - response.raise_for_status() - - self._cache[key] = response.json()["name"] - - return self._cache[key] + @staticmethod + def _cache(func): + @wraps(func) + def wrapper(self, run): + key = (run, func.__name__) + if key in self._cache: + return self._cache[key] + self._cache[key] = func(self, run) + return self._cache[key] + return wrapper + + @_cache + def _run_info(self, run: int) -> dict[str, Any]: + response = requests.get(f"{self.server}/api/mymdc/proposals/by_number/{self.proposal}/runs/{run}", + headers=self._headers, timeout=self.timeout) + response.raise_for_status() + json = response.json() + if len(json["runs"]) == 0: + raise RuntimeError(f"Couldn't get run information from mymdc for p{self.proposal}, r{run}") + + return json["runs"][0] + + @_cache + def _techniques_info(self, run: int) -> dict[str, Any]: + run_info = self._run_info(run) + response = requests.get(f'{self.server}/api/mymed/runs/{run_info["id"]}', + headers=self._headers, timeout=self.timeout) + response.raise_for_status() + return response.json()['techniques'] + + @_cache + def sample_name(self, run: int) -> str: + run_info = self._run_info(run) + sample_id = run_info["sample_id"] + response = requests.get(f"{self.server}/api/mymdc/samples/{sample_id}", + headers=self._headers, timeout=self.timeout) + response.raise_for_status() + + return response.json()["name"] + + @_cache + def run_type(self, run: int) -> str: + run_info = self._run_info(run) + experiment_id = run_info["experiment_id"] + response = requests.get(f"{self.server}/api/mymdc/experiments/{experiment_id}", + headers=self._headers, timeout=self.timeout) + response.raise_for_status() + + return response.json()["name"] + + def techniques_name(self, run: int) -> str: + return ', '.join(t['name'] for t in self._techniques_info(run)) + + def techniques_identifier(self, run: int) -> str: + return ', '.join(t['indentifier'] for t in self._techniques_info(run)) class ContextFileErrors(RuntimeError): From 3494545c5d49ca3e29e4324d3f3e0e035d52815c Mon Sep 17 00:00:00 2001 From: tmichela Date: Mon, 16 Sep 2024 14:34:13 +0200 Subject: [PATCH 2/5] add test for new metadata --- damnit/ctxsupport/ctxrunner.py | 14 +++++--------- tests/test_backend.py | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py index 7f61ce85..ad1bfd9a 100644 --- a/damnit/ctxsupport/ctxrunner.py +++ b/damnit/ctxsupport/ctxrunner.py @@ -97,7 +97,7 @@ def _run_info(self, run: int) -> dict[str, Any]: @_cache def _techniques_info(self, run: int) -> dict[str, Any]: run_info = self._run_info(run) - response = requests.get(f'{self.server}/api/mymed/runs/{run_info["id"]}', + response = requests.get(f'{self.server}/api/mymdc/runs/{run_info["id"]}', headers=self._headers, timeout=self.timeout) response.raise_for_status() return response.json()['techniques'] @@ -126,7 +126,7 @@ def techniques_name(self, run: int) -> str: return ', '.join(t['name'] for t in self._techniques_info(run)) def techniques_identifier(self, run: int) -> str: - return ', '.join(t['indentifier'] for t in self._techniques_info(run)) + return ', '.join(t['identifier'] for t in self._techniques_info(run)) class ContextFileErrors(RuntimeError): @@ -191,7 +191,7 @@ def check(self): for name, var in self.vars.items(): mymdc_args = var.arg_dependencies("mymdc#") for arg_name, annotation in mymdc_args.items(): - if annotation not in ["sample_name", "run_type"]: + if annotation not in ["sample_name", "run_type", "techniques_name", "techniques_identifier"]: problems.append(f"Argument '{arg_name}' of variable '{name}' has an invalid MyMdC dependency: '{annotation}'") if problems: @@ -319,12 +319,8 @@ def execute(self, run_data, run_number, proposal, input_vars) -> 'Results': elif annotation.startswith("mymdc#"): if mymdc is None: mymdc = MyMetadataClient(proposal) - - mymdc_field = annotation.removeprefix("mymdc#") - if mymdc_field == "sample_name": - kwargs[arg_name] = mymdc.sample_name(run_number) - elif mymdc_field == "run_type": - kwargs[arg_name] = mymdc.run_type(run_number) + metadata = annotation.removeprefix('mymdc#') + kwargs[arg_name] = getattr(mymdc, metadata)(run_number) elif annotation == "meta#run_number": kwargs[arg_name] = run_number diff --git a/tests/test_backend.py b/tests/test_backend.py index 58714b66..a0cac8c1 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -398,6 +398,14 @@ def sample(run, x: "mymdc#sample_name"): @Variable(title="Run type") def run_type(run, x: "mymdc#run_type"): return x + + @Variable(title="Run Techniques ID") + def techniques_id(run, x: "mymdc#techniques_identifier"): + return x + + @Variable(title="Run Techniques name") + def techniques_name(run, x: "mymdc#techniques_name"): + return x """ mymdc_ctx = mkcontext(mymdc_code) @@ -416,11 +424,16 @@ def mock_get(url, headers, timeout): assert headers["X-API-key"] == "foo" if "proposals/by_number" in url: - result = dict(runs=[dict(sample_id=1, experiment_id=1)]) + result = dict(runs=[dict(id=1, sample_id=1, experiment_id=1)]) elif "samples" in url: result = dict(name="mithril") elif "experiments" in url: result = dict(name="alchemy") + elif "/runs/" in url: + result = {'techniques': [ + {'identifier': 'PaNET01168', 'name': 'SFX'}, + {'identifier': 'PaNET01188', 'name': 'SAXS'}, + ]} response = MagicMock() response.json.return_value = result @@ -430,8 +443,11 @@ def mock_get(url, headers, timeout): with patch.object(requests, "get", side_effect=mock_get), \ patch.object(ed.read_machinery, "find_proposal", return_value=tmp_path): results = results_create(mymdc_ctx) + assert results.cells["sample"].data == "mithril" assert results.cells["run_type"].data == "alchemy" + assert results.cells["techniques_id"].data == "PaNET01168, PaNET01188" + assert results.cells["techniques_name"].data == "SFX, SAXS" def test_return_bool(mock_run, tmp_path): From 6ac96d1f7f6f5dbe44acc32701ba013fef741024 Mon Sep 17 00:00:00 2001 From: tmichela Date: Tue, 17 Sep 2024 18:06:56 +0200 Subject: [PATCH 3/5] documentation update documentation --- docs/backend.md | 17 ++++++++++++++--- tests/test_backend.py | 12 ++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/docs/backend.md b/docs/backend.md index 271bb1b9..527d0705 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -76,16 +76,27 @@ Variable functions can return any of: The functions must always take in one argument, `run`, to which is passed a [`DataCollection`](https://extra-data.readthedocs.io/en/latest/reading_files.html#data-structure) -of the data in the run. In addition, a function can take some other special -arguments if they have the right _annotations_: +of the data in the run. In addition, a function can take some other special +arguments if they have the right _annotations_, currently. +`meta` accesses internal arguments: - `meta#run_number`: The number of the current run being processed. - `meta#proposal`: The number of the current proposal. - `meta#proposal_dir`: The root [Path](https://docs.python.org/3/library/pathlib.html) to the current proposal. -- `mymdc#sample_name`: The sample name from myMdC. + +`mymdc` requests information from the EuXFEL data management portal +[MyMDC](https://in.xfel.eu/metadata/): + - `mymdc#run_type`: The run type from myMdC. +- `mymdc#sample_name`: The sample name from myMdC. +- `mymdc#technique_identifiers`: list of + [technique](https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html) + identifiers associated with the run. +- `mymdc#technique_names`: list of + [technique](https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html) + names associated with the run. You can also use annotations to express a dependency between `Variable`'s using the `var#` annotation: diff --git a/tests/test_backend.py b/tests/test_backend.py index a0cac8c1..38dc9376 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -399,12 +399,12 @@ def sample(run, x: "mymdc#sample_name"): def run_type(run, x: "mymdc#run_type"): return x - @Variable(title="Run Techniques ID") - def techniques_id(run, x: "mymdc#techniques_identifier"): + @Variable(title="Run Technique IDs") + def technique_ids(run, x: "mymdc#technique_identifiers"): return x - @Variable(title="Run Techniques name") - def techniques_name(run, x: "mymdc#techniques_name"): + @Variable(title="Run Technique names") + def technique_names(run, x: "mymdc#technique_names"): return x """ mymdc_ctx = mkcontext(mymdc_code) @@ -446,8 +446,8 @@ def mock_get(url, headers, timeout): assert results.cells["sample"].data == "mithril" assert results.cells["run_type"].data == "alchemy" - assert results.cells["techniques_id"].data == "PaNET01168, PaNET01188" - assert results.cells["techniques_name"].data == "SFX, SAXS" + assert results.cells["technique_ids"].data == "PaNET01168, PaNET01188" + assert results.cells["technique_names"].data == "SFX, SAXS" def test_return_bool(mock_run, tmp_path): From 1078828e88b15aee10f91f1fc2818da0617a161c Mon Sep 17 00:00:00 2001 From: tmichela Date: Wed, 18 Sep 2024 10:15:56 +0200 Subject: [PATCH 4/5] fix method names in mymdcclient --- damnit/ctxsupport/ctxrunner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py index ad1bfd9a..721f21bb 100644 --- a/damnit/ctxsupport/ctxrunner.py +++ b/damnit/ctxsupport/ctxrunner.py @@ -122,10 +122,10 @@ def run_type(self, run: int) -> str: return response.json()["name"] - def techniques_name(self, run: int) -> str: + def technique_names(self, run: int) -> str: return ', '.join(t['name'] for t in self._techniques_info(run)) - def techniques_identifier(self, run: int) -> str: + def technique_identifiers(self, run: int) -> str: return ', '.join(t['identifier'] for t in self._techniques_info(run)) From b402ec86354dece54ecb0e5daad4c6a6e1f57d59 Mon Sep 17 00:00:00 2001 From: tmichela Date: Mon, 30 Sep 2024 15:59:39 +0200 Subject: [PATCH 5/5] return the techniques information --- damnit/ctxsupport/ctxrunner.py | 10 ++-------- docs/backend.md | 9 ++++----- tests/test_backend.py | 13 ++++--------- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py index 721f21bb..88b919e6 100644 --- a/damnit/ctxsupport/ctxrunner.py +++ b/damnit/ctxsupport/ctxrunner.py @@ -95,7 +95,7 @@ def _run_info(self, run: int) -> dict[str, Any]: return json["runs"][0] @_cache - def _techniques_info(self, run: int) -> dict[str, Any]: + def techniques(self, run: int) -> dict[str, Any]: run_info = self._run_info(run) response = requests.get(f'{self.server}/api/mymdc/runs/{run_info["id"]}', headers=self._headers, timeout=self.timeout) @@ -122,12 +122,6 @@ def run_type(self, run: int) -> str: return response.json()["name"] - def technique_names(self, run: int) -> str: - return ', '.join(t['name'] for t in self._techniques_info(run)) - - def technique_identifiers(self, run: int) -> str: - return ', '.join(t['identifier'] for t in self._techniques_info(run)) - class ContextFileErrors(RuntimeError): def __init__(self, problems): @@ -191,7 +185,7 @@ def check(self): for name, var in self.vars.items(): mymdc_args = var.arg_dependencies("mymdc#") for arg_name, annotation in mymdc_args.items(): - if annotation not in ["sample_name", "run_type", "techniques_name", "techniques_identifier"]: + if annotation not in ["sample_name", "run_type", "techniques"]: problems.append(f"Argument '{arg_name}' of variable '{name}' has an invalid MyMdC dependency: '{annotation}'") if problems: diff --git a/docs/backend.md b/docs/backend.md index 527d0705..c8e3651f 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -91,12 +91,11 @@ arguments if they have the right _annotations_, currently. - `mymdc#run_type`: The run type from myMdC. - `mymdc#sample_name`: The sample name from myMdC. -- `mymdc#technique_identifiers`: list of +- `mymdc#techniques`: list of [technique](https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html) - identifiers associated with the run. -- `mymdc#technique_names`: list of - [technique](https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html) - names associated with the run. + associated with the run. Each technique listed is a `dict` containing the + following keys: `description`, `flg_available`, `id`, `identifier`, `name`, + `runs_techniques_id`, `url`. You can also use annotations to express a dependency between `Variable`'s using the `var#` annotation: diff --git a/tests/test_backend.py b/tests/test_backend.py index 38dc9376..39046d48 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -399,13 +399,9 @@ def sample(run, x: "mymdc#sample_name"): def run_type(run, x: "mymdc#run_type"): return x - @Variable(title="Run Technique IDs") - def technique_ids(run, x: "mymdc#technique_identifiers"): - return x - - @Variable(title="Run Technique names") - def technique_names(run, x: "mymdc#technique_names"): - return x + @Variable(title="Run Techniques") + def techniques(run, x: "mymdc#techniques"): + return ', '.join(t['name'] for t in x) """ mymdc_ctx = mkcontext(mymdc_code) @@ -446,8 +442,7 @@ def mock_get(url, headers, timeout): assert results.cells["sample"].data == "mithril" assert results.cells["run_type"].data == "alchemy" - assert results.cells["technique_ids"].data == "PaNET01168, PaNET01188" - assert results.cells["technique_names"].data == "SFX, SAXS" + assert results.cells["techniques"].data == "SFX, SAXS" def test_return_bool(mock_run, tmp_path):