Skip to content

Commit

Permalink
Merge pull request #338 : Adds mymdc techniques annotation
Browse files Browse the repository at this point in the history
Adds mymdc techniques annotation
  • Loading branch information
tmichela authored Oct 2, 2024
2 parents d511cd2 + b402ec8 commit b211df3
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 50 deletions.
100 changes: 54 additions & 46 deletions damnit/ctxsupport/ctxrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
import traceback
from datetime import timezone
from enum import Enum
from functools import wraps
from graphlib import CycleError, TopologicalSorter
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock

import extra_data
Expand Down Expand Up @@ -70,45 +72,55 @@ def __init__(self, proposal, timeout=10, init_server="https://exfldadev01.desy.d

self._headers = { "X-API-key": self.token }

def _run_info(self, run):
key = (run, "run_info")
if key not in self._cache:
response = requests.get(f"{self.server}/api/mymdc/proposals/by_number/{self.proposal}/runs/{run}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()
json = response.json()
if len(json["runs"]) == 0:
raise RuntimeError(f"Couldn't get run information from mymdc for p{self.proposal}, r{run}")

self._cache[key] = json["runs"][0]

return self._cache[key]

def sample_name(self, run):
key = (run, "sample_name")
if key not in self._cache:
run_info = self._run_info(run)
sample_id = run_info["sample_id"]
response = requests.get(f"{self.server}/api/mymdc/samples/{sample_id}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()

self._cache[key] = response.json()["name"]

return self._cache[key]

def run_type(self, run):
key = (run, "run_type")
if key not in self._cache:
run_info = self._run_info(run)
experiment_id = run_info["experiment_id"]
response = requests.get(f"{self.server}/api/mymdc/experiments/{experiment_id}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()

self._cache[key] = response.json()["name"]

return self._cache[key]
@staticmethod
def _cache(func):
@wraps(func)
def wrapper(self, run):
key = (run, func.__name__)
if key in self._cache:
return self._cache[key]
self._cache[key] = func(self, run)
return self._cache[key]
return wrapper

@_cache
def _run_info(self, run: int) -> dict[str, Any]:
response = requests.get(f"{self.server}/api/mymdc/proposals/by_number/{self.proposal}/runs/{run}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()
json = response.json()
if len(json["runs"]) == 0:
raise RuntimeError(f"Couldn't get run information from mymdc for p{self.proposal}, r{run}")

return json["runs"][0]

@_cache
def techniques(self, run: int) -> dict[str, Any]:
run_info = self._run_info(run)
response = requests.get(f'{self.server}/api/mymdc/runs/{run_info["id"]}',
headers=self._headers, timeout=self.timeout)
response.raise_for_status()
return response.json()['techniques']

@_cache
def sample_name(self, run: int) -> str:
run_info = self._run_info(run)
sample_id = run_info["sample_id"]
response = requests.get(f"{self.server}/api/mymdc/samples/{sample_id}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()

return response.json()["name"]

@_cache
def run_type(self, run: int) -> str:
run_info = self._run_info(run)
experiment_id = run_info["experiment_id"]
response = requests.get(f"{self.server}/api/mymdc/experiments/{experiment_id}",
headers=self._headers, timeout=self.timeout)
response.raise_for_status()

return response.json()["name"]


class ContextFileErrors(RuntimeError):
Expand Down Expand Up @@ -173,7 +185,7 @@ def check(self):
for name, var in self.vars.items():
mymdc_args = var.arg_dependencies("mymdc#")
for arg_name, annotation in mymdc_args.items():
if annotation not in ["sample_name", "run_type"]:
if annotation not in ["sample_name", "run_type", "techniques"]:
problems.append(f"Argument '{arg_name}' of variable '{name}' has an invalid MyMdC dependency: '{annotation}'")

if problems:
Expand Down Expand Up @@ -301,12 +313,8 @@ def execute(self, run_data, run_number, proposal, input_vars) -> 'Results':
elif annotation.startswith("mymdc#"):
if mymdc is None:
mymdc = MyMetadataClient(proposal)

mymdc_field = annotation.removeprefix("mymdc#")
if mymdc_field == "sample_name":
kwargs[arg_name] = mymdc.sample_name(run_number)
elif mymdc_field == "run_type":
kwargs[arg_name] = mymdc.run_type(run_number)
metadata = annotation.removeprefix('mymdc#')
kwargs[arg_name] = getattr(mymdc, metadata)(run_number)

elif annotation == "meta#run_number":
kwargs[arg_name] = run_number
Expand Down
16 changes: 13 additions & 3 deletions docs/backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,26 @@ Variable functions can return any of:

The functions must always take in one argument, `run`, to which is passed a
[`DataCollection`](https://extra-data.readthedocs.io/en/latest/reading_files.html#data-structure)
of the data in the run. In addition, a function can take some other special
arguments if they have the right _annotations_:
of the data in the run. In addition, a function can take some other special
arguments if they have the right _annotations_, currently.
`meta` accesses internal arguments:

- `meta#run_number`: The number of the current run being processed.
- `meta#proposal`: The number of the current proposal.
- `meta#proposal_dir`: The root
[Path](https://docs.python.org/3/library/pathlib.html) to the current
proposal.
- `mymdc#sample_name`: The sample name from myMdC.

`mymdc` requests information from the EuXFEL data management portal
[MyMDC](https://in.xfel.eu/metadata/):

- `mymdc#run_type`: The run type from myMdC.
- `mymdc#sample_name`: The sample name from myMdC.
- `mymdc#techniques`: list of
[technique](https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html)
associated with the run. Each technique listed is a `dict` containing the
following keys: `description`, `flg_available`, `id`, `identifier`, `name`,
`runs_techniques_id`, `url`.

You can also use annotations to express a dependency between `Variable`'s using
the `var#<name>` annotation:
Expand Down
13 changes: 12 additions & 1 deletion tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ def sample(run, x: "mymdc#sample_name"):
@Variable(title="Run type")
def run_type(run, x: "mymdc#run_type"):
return x
@Variable(title="Run Techniques")
def techniques(run, x: "mymdc#techniques"):
return ', '.join(t['name'] for t in x)
"""
mymdc_ctx = mkcontext(mymdc_code)

Expand All @@ -416,11 +420,16 @@ def mock_get(url, headers, timeout):
assert headers["X-API-key"] == "foo"

if "proposals/by_number" in url:
result = dict(runs=[dict(sample_id=1, experiment_id=1)])
result = dict(runs=[dict(id=1, sample_id=1, experiment_id=1)])
elif "samples" in url:
result = dict(name="mithril")
elif "experiments" in url:
result = dict(name="alchemy")
elif "/runs/" in url:
result = {'techniques': [
{'identifier': 'PaNET01168', 'name': 'SFX'},
{'identifier': 'PaNET01188', 'name': 'SAXS'},
]}

response = MagicMock()
response.json.return_value = result
Expand All @@ -430,8 +439,10 @@ def mock_get(url, headers, timeout):
with patch.object(requests, "get", side_effect=mock_get), \
patch.object(ed.read_machinery, "find_proposal", return_value=tmp_path):
results = results_create(mymdc_ctx)

assert results.cells["sample"].data == "mithril"
assert results.cells["run_type"].data == "alchemy"
assert results.cells["techniques"].data == "SFX, SAXS"


def test_return_bool(mock_run, tmp_path):
Expand Down

0 comments on commit b211df3

Please sign in to comment.