From bda5117640fd268967e3d873beec59b1f9a310ca Mon Sep 17 00:00:00 2001 From: Xiao Gui Date: Tue, 27 Aug 2024 11:53:00 +0200 Subject: [PATCH] fix: sparse map reference cache: all maps serialization (ds/dsv caching) --- new_api/v3/data_handlers/map/__init__.py | 23 ++++- new_api/v3/serialization/map.py | 123 +++++++++++++++++------ 2 files changed, 113 insertions(+), 33 deletions(-) diff --git a/new_api/v3/data_handlers/map/__init__.py b/new_api/v3/data_handlers/map/__init__.py index d8687a5..8264769 100644 --- a/new_api/v3/data_handlers/map/__init__.py +++ b/new_api/v3/data_handlers/map/__init__.py @@ -98,9 +98,28 @@ def resampled_template(parcellation_id: str, space_id: str): def warmup_maps(): import siibra from siibra.atlases.sparsemap import SparseMap - maps: List[SparseMap] = siibra.find_maps(maptype="statistical") - for map in maps: + from siibra.atlases.parcellationmap import Map + smaps: List[SparseMap] = siibra.find_maps(maptype="statistical") + for map in smaps: try: map._get_readable_sparseindex(warmup=True) except Exception as e: logger.warning(f"Failed to save sparseindex: {str(e)}") + try: + instance_to_model(map, detail=True) + except Exception as e: + logger.warning(f"map {map.space_id=!r} {map.parcellation_id=!r} instance to model {str(e)}") + + maps: List[Map] = siibra.find_maps(maptype="labelled") + + def try_instance_to_model(m: Map): + logger.info(f"Caching map {map.space_id=!r} {map.parcellation_id=!r}") + try: + return instance_to_model(m, detail=True) + except Exception as e: + logger.warning(f"map {map.space_id=!r} {map.parcellation_id=!r} instance to model {str(e)}") + + all_maps = [*smaps, *maps] + for mp in all_maps: + try_instance_to_model(mp) + diff --git a/new_api/v3/serialization/map.py b/new_api/v3/serialization/map.py index bb82259..859ccc0 100644 --- a/new_api/v3/serialization/map.py +++ b/new_api/v3/serialization/map.py @@ -1,5 +1,7 @@ -from typing import Union +from typing import Union, List from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor +from tqdm import tqdm from . import serialize @@ -14,7 +16,7 @@ from siibra.attributes.descriptions import Name, EbrainsRef from siibra.attributes.dataitems.base import Archive from siibra.attributes.dataitems.volume.base import Volume, MESH_FORMATS, IMAGE_FORMATS -from siibra.factory.livequery.ebrains import EbrainsQuery +from siibra.factory.livequery.ebrains import EbrainsQuery, DatasetVersion def parse_archive_options(archive: Union[Archive, None]): if archive is None: @@ -46,11 +48,39 @@ def clear_name(name: str): return " ".join(w for w in result.split(" ") if len(w)) @fn_call_cache -def retrieve_dsv(mp: Map): - got_dsv = [ EbrainsQuery.get_dsv(dsv) - for ref in mp._find(EbrainsRef) - for dsv in ref._dataset_verion_ids] - return got_dsv +def retrieve_dsv_ds(mp: Map): + unique_dsvs = list({dsv for ref in mp._find(EbrainsRef) for dsv in ref._dataset_verion_ids}) + with ThreadPoolExecutor() as ex: + got_dsv = list( + tqdm( + ex.map( + EbrainsQuery.get_dsv, + unique_dsvs + ), + desc="Populating all unique DSVs", + total=len(unique_dsvs), + leave=True + ) + ) + unique_ds = list( + { + is_version_of["id"].split("/")[-1] + for dsv in got_dsv + for is_version_of in dsv["isVersionOf"] + } + ) + got_ds = list( + tqdm( + ex.map( + EbrainsQuery.get_ds, + unique_ds + ), + desc="Populating all unique DSs", + total=len(unique_ds), + leave=True + ) + ) + return got_dsv, got_ds @serialize(SparseMap) @serialize(Map) @@ -65,36 +95,67 @@ def map_to_model(mp: Map, **kwargs): publications = [SiibraPublication(citation=pub.text, url=pub.value) for pub in mp.publications] - got_dsv = retrieve_dsv(mp) + got_dsv, got_ds = retrieve_dsv_ds(mp) + + dsv_dict = {dsv["id"]: dsv for dsv in got_dsv} + ds_dict = {ds["id"]: ds for ds in got_ds} + + def get_description(dsv: DatasetVersion): + if dsv["description"]: + return dsv["description"] + for v in dsv.get("isVersionOf", []): + if v["id"] in ds_dict: + return ds_dict[v["id"]]["description"] + + def dsv_id_to_model(id: str): + id = "https://kg.ebrains.eu/api/instances/" + id.replace("https://kg.ebrains.eu/api/instances/", "") + assert id in dsv_dict, f"{id} not found in dsv_dict" + dsv = dsv_dict[id] + return EbrainsDatasetModel(id=id, + name=dsv["fullName"] or "", + urls=[{"url": dsv["homepage"]}] if dsv["homepage"] else [], + description=get_description(dsv), + contributors=[EbrainsDsPerson(id=author["id"], + identifier=author["id"], + shortName=author["shortName"] or f"{author['givenName']} {author['familyName']}", + name=author["fullName"] or f"{author['givenName']} {author['familyName']}") for author in dsv["author"]], + custodians=[]) + # TODO check for any non empty entry of custodian and transform properly - datasets = [EbrainsDatasetModel(id=dsv["id"], - name=dsv["fullName"] or "", - urls=[{"url": dsv["homepage"]}] if dsv["homepage"] else [], - description=dsv["description"], - contributors=[EbrainsDsPerson(id=author["id"], - identifier=author["id"], - shortName=author["shortName"] or f"{author['givenName']} {author['familyName']}", - name=author["fullName"] or f"{author['givenName']} {author['familyName']}") for author in dsv["author"]], - custodians=[]) for dsv in got_dsv] + untargeted_datasets = [dsv_id_to_model(dsv) + for ref in mp._find(EbrainsRef) + for dsv in ref._dataset_verion_ids + if ref.annotates is None] # specific to map model species = mp.species # TODO fix datasets all_volumes = mp._find(Volume) - volumes = [VolumeModel(name="", - formats=[vol.format], - provides_mesh=vol.format in MESH_FORMATS, - provides_image=vol.format in IMAGE_FORMATS, - fragments={}, - variant=None, - provided_volumes={ - f"{parse_archive_options(vol.archive_options)[0]}{vol.format}": f"{vol.url}{parse_archive_options(vol.archive_options)[0]}" - }, - space={ - "@id": vol.space_id - }, - datasets=[]) for vol in all_volumes] + volumes: List[VolumeModel] = [] + + for vol in all_volumes: + vol_ds: List[EbrainsDatasetModel] = [] + if vol.id: + vol_ds = [dsv_id_to_model(dsv) + for ref in mp._find(EbrainsRef) + for dsv in ref._dataset_verion_ids + if ref.annotates == vol.id] + + volumes.append( + VolumeModel(name="", + formats=[vol.format], + provides_mesh=vol.format in MESH_FORMATS, + provides_image=vol.format in IMAGE_FORMATS, + fragments={}, + variant=None, + provided_volumes={ + f"{parse_archive_options(vol.archive_options)[0]}{vol.format}": f"{vol.url}{parse_archive_options(vol.archive_options)[0]}" + }, + space={ + "@id": vol.space_id + }, + datasets=vol_ds)) indices = defaultdict(list) for idx, vol in enumerate(all_volumes): @@ -112,7 +173,7 @@ def map_to_model(mp: Map, **kwargs): shortname=shortname, description=description, publications=publications, - datasets=datasets, + datasets=untargeted_datasets, species=species, indices=indices, volumes=volumes,