diff --git a/src/uproot/interpretation/known_forth/__init__.py b/src/uproot/interpretation/known_forth/__init__.py new file mode 100644 index 000000000..0f0626e62 --- /dev/null +++ b/src/uproot/interpretation/known_forth/__init__.py @@ -0,0 +1,46 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +""" +This module provides known forth code and awkward forms for types where it is known a priori. + +See :doc:`uproot.interpretation.known_forth.known_forth_of` for the function +that provides the lookup of known forth codes and :doc:`uproot.interpretation.known_forth.atlas.VectorVectorElementLink` for an +implementation used in ATLAS (D)AODs. +""" +from __future__ import annotations + +import uproot +from uproot.interpretation.known_forth.atlas import VectorVectorElementLink + +KNOWN_FORTH_DICT = { + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, + "std::vector>>>": VectorVectorElementLink, +} + + +def known_forth_of(model): + """ + Args: + model: The :doc:`uproot.model.Model` to look up known forth for + + Returns an object with attributes `forth_code` and `awkward_form` if a known + special case exists, else None + """ + try: + typename = model.typename + except AttributeError: + try: + typename = model.classname + except AttributeError: + typename = uproot.model.classname_decode(model.__name__) + + if typename not in KNOWN_FORTH_DICT: + return + + return KNOWN_FORTH_DICT[typename](typename) diff --git a/src/uproot/interpretation/known_forth/atlas.py b/src/uproot/interpretation/known_forth/atlas.py new file mode 100644 index 000000000..4db699748 --- /dev/null +++ b/src/uproot/interpretation/known_forth/atlas.py @@ -0,0 +1,99 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +""" +This module defines ATLAS specific known forth code +""" + +from __future__ import annotations + +import re + + +class VectorVectorElementLink: + """ + Known forth and awkward form for ``std::vector>`` types in ATLAS (D)AODs + + The forth code was adjusted from what was provided in + ``branch._complete_forth_code`` after running ``.array()`` once. + + The binary data of one vector> looks as follows: + + * 6 bytes header for the outer vector + * 4 bytes big endian uint for the size of the outer vector (node1) + * for each outer vector element: + * 4 bytes big endian uint for the size of the inner vector (node2) + * for each inner vector element: + * 20 bytes header for the ElementLink object + * 4 bytes big endian uint for the ``m_persKey`` member (node3) + * 4 bytes big endian uint for the ``m_persIndex`` member (node4) + """ + + forth_code = """ +input stream +input byteoffsets +input bytestops +output node1-offsets int64 +output node2-offsets int64 +output node3-data uint32 +output node4-data uint32 + +0 node1-offsets <- stack +0 node2-offsets <- stack + +0 do + byteoffsets I-> stack + stream seek + 6 stream skip + stream !I-> stack + dup node1-offsets +<- stack + 0 do + stream !I-> stack + dup node2-offsets +<- stack + 0 do + 20 stream skip + stream !I-> node3-data + stream !I-> node4-data + loop + loop +loop +""" + + def __init__(self, typename): + self.typename = typename + self.inner_typename = re.sub( + "std::vector>", r"\1", self.typename + ) + + @property + def awkward_form(self): + return { + "class": "ListOffsetArray", + "offsets": "i64", + "form_key": "node1", + "content": { + "class": "ListOffsetArray", + "offsets": "i64", + "form_key": "node2", + "content": { + "class": "RecordArray", + "fields": ["m_persKey", "m_persIndex"], + "contents": [ + { + "class": "NumpyArray", + "primitive": "uint32", + "inner_shape": [], + "parameters": {}, + "form_key": "node3", + }, + { + "class": "NumpyArray", + "primitive": "uint32", + "inner_shape": [], + "parameters": {}, + "form_key": "node4", + }, + ], + "parameters": {"__record__": f"{self.inner_typename}"}, + }, + }, + } diff --git a/src/uproot/interpretation/objects.py b/src/uproot/interpretation/objects.py index 8404977db..37306a135 100644 --- a/src/uproot/interpretation/objects.py +++ b/src/uproot/interpretation/objects.py @@ -28,6 +28,7 @@ import uproot import uproot._awkwardforth +from uproot.interpretation.known_forth import known_forth_of class AsObjects(uproot.interpretation.Interpretation): @@ -45,14 +46,22 @@ class AsObjects(uproot.interpretation.Interpretation): :ref:`uproot.interpretation.objects.AsObjects.simplify` attempts to replace this interpretation with a faster-to-read equivalent, but not all data types can be simplified. + + :doc:`uproot.interpretation.known_forth` defines forth code and forms for + special cases that will be picked up here as well """ def __init__(self, model, branch=None): self._model = model self._branch = branch - self._form = None self._forth = True - self._complete_forth_code = None + known_forth = known_forth_of(self._model) + if known_forth is not None: + self._complete_forth_code = known_forth.forth_code + self._form = known_forth.awkward_form + else: + self._complete_forth_code = None + self._form = None self._forth_lock = threading.Lock() @property @@ -122,6 +131,10 @@ def awkward_form( tobject_header=False, breadcrumbs=(), ): + if self._form is not None: + awkward = uproot.extras.awkward() + return awkward.forms.from_dict(self._form) + context = self._make_context( context, index_format, header, tobject_header, breadcrumbs ) diff --git a/tests/test_1282_add_known_forth_for_atlas.py b/tests/test_1282_add_known_forth_for_atlas.py new file mode 100644 index 000000000..e57bcc26d --- /dev/null +++ b/tests/test_1282_add_known_forth_for_atlas.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import awkward +import pytest +import skhep_testdata +import uproot + +VECTOR_VECTOR_ELEMENTLINK_BRANCHES = [ + "AnalysisHLT_e12_lhloose_nod0_2mu10AuxDyn.TrigMatchedObjects", + "AnalysisElectronsAuxDyn.caloClusterLinks", + "AnalysisPhotonsAuxDyn.vertexLinks", + "TruthMuonsAuxDyn.childLinks", + "AnalysisElectronsAuxDyn.trackParticleLinks", + "PrimaryVerticesAuxDyn.neutralParticleLinks", + "AnalysisTauJetsAuxDyn.tauTrackLinks", +] + + +@pytest.mark.parametrize("key", VECTOR_VECTOR_ELEMENTLINK_BRANCHES) +def test_pickup_vector_vector_elementlink(key): + with uproot.open( + {skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} + ) as tree: + branch = tree[key] + assert branch.interpretation._complete_forth_code is not None + assert branch.interpretation._form is not None + + +def test_consistent_library_np_vector_vector_elementlink(): + arrays_np = {} + with uproot.open( + {skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} + ) as tree: + for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES: + arrays_np[key] = tree[key].array(library="np") + arrays_ak = {} + with uproot.open( + {skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} + ) as tree: + for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES: + arrays_ak[key] = tree[key].array() + for key in arrays_np: + array_ak = arrays_ak[key] + array_np = uproot.interpretation.library._object_to_awkward_array( + awkward, array_ak.layout.form.to_dict(), arrays_np[key] + ) + for field in array_ak.fields: + assert awkward.all(array_np[field] == array_ak[field])