From c4eaa7f2e9ddb8ddbb3fa884f255cff798d34a1c Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 11 Mar 2024 16:02:34 +0100 Subject: [PATCH 1/3] Example of a converted DATS metadata record --- .../DatasetVersionObject-dats.yaml | 223 ++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 src/examples/dataset-version/DatasetVersionObject-dats.yaml diff --git a/src/examples/dataset-version/DatasetVersionObject-dats.yaml b/src/examples/dataset-version/DatasetVersionObject-dats.yaml new file mode 100644 index 0000000..4ceb7d4 --- /dev/null +++ b/src/examples/dataset-version/DatasetVersionObject-dats.yaml @@ -0,0 +1,223 @@ +# +# DATS-compliant record taken from +# https://portal.conp.ca/dataset?id=projects/multicenter-phantom +# +title: Multicenter Single Subject Human MRI Phantom +description: >- + Longitudinal brain scans of a single human phantom acquired on multiple + MRI devices across North America over a period of 11 years. In addition + to the human brain images, lego phantom scans have been acquired in + parallel for quality assessments over time across sites. +#"dates": [ +# { +# "date": "2008-04-25 00:00:00", +# "type": { +# "@type": "Annotation", +# "value": "start date" +# } +# }, +# { +# "date": "2019-04-15 00:00:00", +# "type": { +# "@type": "Annotation", +# "value": "end date" +# } +# } +#], +was_attributed_to: + - meta_type: dlco:OrganizationObject + meta_code: MCIN + name: McGill Center for Integrative Neuroscience + - meta_type: dlco:ResearchContributorObject + meta_code: ACEvans + name: Alan C. Evans +qualified_attribution: + - agent: ACEvans + had_role: + # conceptor + - marcrel:ccp + # creator + - marcrel:cre + # laboratory dirctor + - marcrel:ldr + # research team head + - marcrel:rth +#"types": [ +# { +# "information": { +# "value": "MRI", +# "valueIRI": "" +# } +# }, +# { +# "information": { +# "value": "quality control subject", +# "valueIRI": "http://uri.interlex.org/base/ilx_0381893" +# } +# } +#], +version: "2.0" +#"privacy": "open", +#"distributions": [ +# { +# "@type": "DatasetDistribution", +# "formats": [ +# "MINC", +# "JSON" +# ], +# "size" : 40, +# "unit" : { +# "value": "GB" +# }, +# "access" : { +# "landingPage" : "https://phantom.loris.ca", +# "authorizations": [ +# { +# "value": "private" +# } +# ] +# } +# } +#], +# "isAbout": [ +# { +# "identifier": { +# "identifier": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606", +# "identifierSource": "NCBI Taxonomy Database" +# }, +# "name":"Homo sapiens" +# }, +# { +# "name": "adult" +# } +# ], +# "spatialCoverage": [ +# { +# "name": "North America", +# "description": "13 scanners in 6 sites accross North America" +# } +license: licenses:CC-BY-ND-4.0 +# "aggregation": "instance of dataset", +# "dimensions": [ +# { +# "name" : { +# "value": "resting BOLD" +# } +# }, +# { +# "name" : { +# "value": "fieldmap BOLD" +# } +# }, +# { +# "name" : { +# "value": "fieldmap DWI" +# } +# }, +# { +# "name" : { +# "value": "DWI 65 directions" +# } +# }, +# { +# "name" : { +# "value": "DWI 25 directions" +# } +# }, +# { +# "name" : { +# "value": "T1W" +# } +# }, +# { +# "name" : { +# "value": "T2W" +# } +# } +# ], +keyword: + - phantom + - MRI +# "extraProperties": [ +# { +# "category": "Total number of sessions", +# "values": [ +# { +# "value": "521" +# } +# ] +# }, +# { +# "category": "logo", +# "values": [ +# { +# "value": "logo.png" +# } +# ] +# }, +# { +# "category": "CONP_status", +# "values": [ +# { +# "value": "CONP" +# } +# ] +# }, +# { +# "category": "files", +# "values": [ +# { +# "value": "3771" +# } +# ] +# }, +# { +# "category": "subjects", +# "values": [ +# { +# "value": "1" +# } +# ] +# }, +# { +# "category": "origin_consortium", +# "values": [ +# { +# "value": "IBIS" +# } +# ] +# }, +# { +# "category": "origin_institution", +# "values": [ +# { +# "value": "McGill Centre for Integrative Neuroscience" +# } +# ] +# }, +# { +# "category": "origin_city", +# "values": [ +# { +# "value": "Montreal" +# } +# ] +# }, +# { +# "category": "origin_province", +# "values": [ +# { +# "value": "Quebec" +# } +# ] +# }, +# { +# "category": "origin_country", +# "values": [ +# { +# "value": "Canada" +# } +# ] +# } +# ] + From fc2ebcd6b7c7526e6a7626924a6a421c263f0fa2 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 13 Mar 2024 09:29:00 +0100 Subject: [PATCH 2/3] Make better use of Activity annotation --- .../DatasetVersionObject-dats.yaml | 28 +++++-------------- src/linkml/ontology/datasets.yaml | 1 + src/linkml/ontology/provenance.yaml | 19 +++++++++++++ src/linkml/schemas/dataset-version.yaml | 25 +++++++++++++++++ 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/examples/dataset-version/DatasetVersionObject-dats.yaml b/src/examples/dataset-version/DatasetVersionObject-dats.yaml index 4ceb7d4..ccfadb9 100644 --- a/src/examples/dataset-version/DatasetVersionObject-dats.yaml +++ b/src/examples/dataset-version/DatasetVersionObject-dats.yaml @@ -8,22 +8,13 @@ description: >- MRI devices across North America over a period of 11 years. In addition to the human brain images, lego phantom scans have been acquired in parallel for quality assessments over time across sites. -#"dates": [ -# { -# "date": "2008-04-25 00:00:00", -# "type": { -# "@type": "Annotation", -# "value": "start date" -# } -# }, -# { -# "date": "2019-04-15 00:00:00", -# "type": { -# "@type": "Annotation", -# "value": "end date" -# } -# } -#], +was_generated_by: + # data collection + - started_at: "2008-04-25T00:00:00" + ended_at: "2019-04-15T00:00:00" + at_location: + name: North America + description: 13 scanners in 6 sites accross North America was_attributed_to: - meta_type: dlco:OrganizationObject meta_code: MCIN @@ -91,11 +82,6 @@ version: "2.0" # "name": "adult" # } # ], -# "spatialCoverage": [ -# { -# "name": "North America", -# "description": "13 scanners in 6 sites accross North America" -# } license: licenses:CC-BY-ND-4.0 # "aggregation": "instance of dataset", # "dimensions": [ diff --git a/src/linkml/ontology/datasets.yaml b/src/linkml/ontology/datasets.yaml index dd44f86..b4fb907 100644 --- a/src/linkml/ontology/datasets.yaml +++ b/src/linkml/ontology/datasets.yaml @@ -189,6 +189,7 @@ classes: - type - version - was_attributed_to + - was_generated_by slot_usage: has_part: range: Resource diff --git a/src/linkml/ontology/provenance.yaml b/src/linkml/ontology/provenance.yaml index c98a4cb..693a71b 100644 --- a/src/linkml/ontology/provenance.yaml +++ b/src/linkml/ontology/provenance.yaml @@ -123,6 +123,18 @@ slots: notes: - we have our own `datetime`, because of a linkml problem with handling the actual one, see https://github.com/linkml/linkml/issues/1806 + started_at: + slot_uri: dlco:started_at + description: >- + Start is when an activity is deemed to have been started by an entity, + known as trigger. The activity did not exist before its start. + Any usage, generation, or invalidation involving an activity follows + the activity's start. + domain: Activity + range: W3CISO8601 + exact_mappings: + - prov:startedAtTime + # Commit generated_at DateTime (committer time) generated_at: slot_uri: dlco:generated_at @@ -218,6 +230,10 @@ classes: An activity is something that occurs over a period of time and acts upon or with entities; it may include consuming, processing, transforming, modifying, relocating, using, or generating entities. + slots: + - at_location + - started_at + - ended_at exact_mappings: - prov:Activity @@ -369,5 +385,8 @@ classes: row, or column. As such, there are numerous ways in which location can be expressed, such as by a coordinate, address, landmark, and so forth. + slots: + - name + - description exact_mappings: - prov:Location diff --git a/src/linkml/schemas/dataset-version.yaml b/src/linkml/schemas/dataset-version.yaml index deb1e49..6ce3ee4 100644 --- a/src/linkml/schemas/dataset-version.yaml +++ b/src/linkml/schemas/dataset-version.yaml @@ -156,6 +156,11 @@ classes: inlined_as_list: true multivalued: true range: AgentObject + was_generated_by: + inlined: true + inlined_as_list: true + multivalued: true + range: ActivityObject qualified_attribution: inlined: true inlined_as_list: true @@ -172,6 +177,18 @@ classes: multivalued: true range: EntityObject + ActivityObject: + class_uri: dlco:ActivityObject + is_a: MetadataObject + mixins: + - Activity + description: >- + Schema-specific implementation of [ActivityObject](https://concepts.datalad.org/ontology/ActivityObject). + slot_usage: + at_location: + inlined: true + range: LocationObject + AgentObject: class_uri: dlco:AgentObject is_a: MetadataObject @@ -313,3 +330,11 @@ classes: - Publication description: >- Schema-specific implementation of [PublicationObject](https://concepts.datalad.org/ontology/PublicationObject). + + LocationObject: + class_uri: dlco:LocationObject + is_a: MetadataObject + mixins: + - Location + description: >- + Schema-specific implementation of [LocationObject](https://concepts.datalad.org/ontology/LocationObject). From 8b396634736adfbc30c111fa3f92d7d1055384f8 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 13 Mar 2024 13:41:51 +0100 Subject: [PATCH 3/3] Possibility to express arbitrary properties (precisely) This is simply using a basic implementation of `rdfs:Property`. This document ```yaml name: growing-dataset properties: - type: http://purl.obolibrary.org/obo/NCIT_C90437 label: Planned Number of Female Subjects range: xsd:nonNegativeInteger value: 54 ``` will translate to this set of triples (blank node identifiers shortened) ```ntriples OBJ0 . OBJ0 "growing-dataset" . OBJ0 OBJ1 . OBJ1 . OBJ1 "http://purl.obolibrary.org/obo/NCIT_C90437"^^ . OBJ1 "54" . OBJ1 "Planned Number of Female Subjects" . OBJ1 "xsd:nonNegativeInteger"^^ . ``` --- ...DatasetVersionObject-customproperties.json | 14 +++ ...DatasetVersionObject-customproperties.yaml | 6 + .../DatasetVersionObject-dats.json | 108 +++++++++++++++++ .../DatasetVersionObject-dats.yaml | 110 ++++-------------- src/linkml/ontology/common.yaml | 4 +- src/linkml/ontology/datalad.yaml | 2 +- src/linkml/ontology/datasets.yaml | 9 -- src/linkml/ontology/projects.yaml | 2 +- src/linkml/ontology/rdfs.yaml | 67 +++++++++++ src/linkml/ontology/schema_utils.yaml | 3 + src/linkml/schemas/dataset-version.yaml | 5 + src/linkml/schemas/ontology.yaml | 1 + 12 files changed, 233 insertions(+), 98 deletions(-) create mode 100644 src/examples/dataset-version/DatasetVersionObject-customproperties.json create mode 100644 src/examples/dataset-version/DatasetVersionObject-customproperties.yaml create mode 100644 src/examples/dataset-version/DatasetVersionObject-dats.json create mode 100644 src/linkml/ontology/rdfs.yaml diff --git a/src/examples/dataset-version/DatasetVersionObject-customproperties.json b/src/examples/dataset-version/DatasetVersionObject-customproperties.json new file mode 100644 index 0000000..45cf50d --- /dev/null +++ b/src/examples/dataset-version/DatasetVersionObject-customproperties.json @@ -0,0 +1,14 @@ +{ + "properties": [ + { + "label": "Planned Number of Female Subjects", + "range": "xsd:nonNegativeInteger", + "type": "http://purl.obolibrary.org/obo/NCIT_C90437", + "value": [ + "54" + ] + } + ], + "name": "growing-dataset", + "@type": "DatasetVersionObject" +} diff --git a/src/examples/dataset-version/DatasetVersionObject-customproperties.yaml b/src/examples/dataset-version/DatasetVersionObject-customproperties.yaml new file mode 100644 index 0000000..94af395 --- /dev/null +++ b/src/examples/dataset-version/DatasetVersionObject-customproperties.yaml @@ -0,0 +1,6 @@ +name: growing-dataset +properties: + - type: http://purl.obolibrary.org/obo/NCIT_C90437 + label: Planned Number of Female Subjects + range: xsd:nonNegativeInteger + value: 54 diff --git a/src/examples/dataset-version/DatasetVersionObject-dats.json b/src/examples/dataset-version/DatasetVersionObject-dats.json new file mode 100644 index 0000000..8819394 --- /dev/null +++ b/src/examples/dataset-version/DatasetVersionObject-dats.json @@ -0,0 +1,108 @@ +{ + "properties": [ + { + "label": "Total number of sessions", + "value": [ + "521" + ] + }, + { + "label": "logo", + "value": [ + "logo.png" + ] + }, + { + "label": "CONP_status", + "value": [ + "CONP" + ] + }, + { + "label": "files", + "value": [ + "3771" + ] + }, + { + "label": "subjects", + "type": "http://purl.obolibrary.org/obo/NCIT_C98703", + "value": [ + "1" + ] + }, + { + "label": "origin_consortium", + "value": [ + "IBIS" + ] + }, + { + "label": "origin_institution", + "value": [ + "McGill Centre for Integrative Neuroscience" + ] + }, + { + "label": "origin_city", + "value": [ + "Montreal" + ] + }, + { + "label": "origin_province", + "value": [ + "Quebec" + ] + }, + { + "label": "origin_country", + "type": "http://purl.obolibrary.org/obo/HSO_0000360", + "value": [ + "Canada" + ] + } + ], + "license": "licenses:CC-BY-ND-4.0", + "was_attributed_to": [ + { + "meta_code": "MCIN", + "meta_type": "dlco:OrganizationObject", + "name": "McGill Center for Integrative Neuroscience" + }, + { + "meta_code": "ACEvans", + "meta_type": "dlco:ResearchContributorObject", + "name": "Alan C. Evans" + } + ], + "was_generated_by": [ + { + "at_location": { + "name": "North America", + "description": "13 scanners in 6 sites across North America" + }, + "started_at": "2008-04-25T00:00:00", + "ended_at": "2019-04-15T00:00:00" + } + ], + "qualified_attribution": [ + { + "agent": "ACEvans", + "had_role": [ + "marcrel:ccp", + "marcrel:cre", + "marcrel:ldr", + "marcrel:rth" + ] + } + ], + "description": "Longitudinal brain scans of a single human phantom acquired on multiple MRI devices across North America over a period of 11 years. In addition to the human brain images, lego phantom scans have been acquired in parallel for quality assessments over time across sites.", + "keyword": [ + "phantom", + "MRI" + ], + "title": "Multicenter Single Subject Human MRI Phantom", + "version": "2.0", + "@type": "DatasetVersionObject" +} diff --git a/src/examples/dataset-version/DatasetVersionObject-dats.yaml b/src/examples/dataset-version/DatasetVersionObject-dats.yaml index ccfadb9..2e7240d 100644 --- a/src/examples/dataset-version/DatasetVersionObject-dats.yaml +++ b/src/examples/dataset-version/DatasetVersionObject-dats.yaml @@ -14,7 +14,7 @@ was_generated_by: ended_at: "2019-04-15T00:00:00" at_location: name: North America - description: 13 scanners in 6 sites accross North America + description: 13 scanners in 6 sites across North America was_attributed_to: - meta_type: dlco:OrganizationObject meta_code: MCIN @@ -29,7 +29,7 @@ qualified_attribution: - marcrel:ccp # creator - marcrel:cre - # laboratory dirctor + # laboratory director - marcrel:ldr # research team head - marcrel:rth @@ -124,86 +124,26 @@ license: licenses:CC-BY-ND-4.0 keyword: - phantom - MRI -# "extraProperties": [ -# { -# "category": "Total number of sessions", -# "values": [ -# { -# "value": "521" -# } -# ] -# }, -# { -# "category": "logo", -# "values": [ -# { -# "value": "logo.png" -# } -# ] -# }, -# { -# "category": "CONP_status", -# "values": [ -# { -# "value": "CONP" -# } -# ] -# }, -# { -# "category": "files", -# "values": [ -# { -# "value": "3771" -# } -# ] -# }, -# { -# "category": "subjects", -# "values": [ -# { -# "value": "1" -# } -# ] -# }, -# { -# "category": "origin_consortium", -# "values": [ -# { -# "value": "IBIS" -# } -# ] -# }, -# { -# "category": "origin_institution", -# "values": [ -# { -# "value": "McGill Centre for Integrative Neuroscience" -# } -# ] -# }, -# { -# "category": "origin_city", -# "values": [ -# { -# "value": "Montreal" -# } -# ] -# }, -# { -# "category": "origin_province", -# "values": [ -# { -# "value": "Quebec" -# } -# ] -# }, -# { -# "category": "origin_country", -# "values": [ -# { -# "value": "Canada" -# } -# ] -# } -# ] - +properties: + - label: Total number of sessions + value: 521 + - label: logo + value: logo.png + - label: CONP_status + value: CONP + - label: files + value: 3771 + - label: subjects + type: http://purl.obolibrary.org/obo/NCIT_C98703 + value: 1 + - label: origin_consortium + value: IBIS + - label: origin_institution + value: McGill Centre for Integrative Neuroscience + - label: origin_city + value: Montreal + - label: origin_province + value: Quebec + - label: origin_country + type: http://purl.obolibrary.org/obo/HSO_0000360 + value: Canada diff --git a/src/linkml/ontology/common.yaml b/src/linkml/ontology/common.yaml index 77eac35..2e6fdf6 100644 --- a/src/linkml/ontology/common.yaml +++ b/src/linkml/ontology/common.yaml @@ -10,7 +10,7 @@ prefixes: linkml: https://w3id.org/linkml/ iao: http://purl.obolibrary.org/obo/ pav: http://purl.org/pav/ - rdfs: http://www.w3.org/2000/01/rdf-schema# + RDFS: http://www.w3.org/2000/01/rdf-schema# schema: http://schema.org/ sio: http://semanticscience.org/resource/ spdx: http://spdx.org/rdf/terms# @@ -93,7 +93,7 @@ slots: slot_uri: dlco:name description: Name of an item or entity. exact_mappings: - - rdfs:label + - RDFS:label - schema:name - foaf:name range: string diff --git a/src/linkml/ontology/datalad.yaml b/src/linkml/ontology/datalad.yaml index 3e260da..3e33a72 100644 --- a/src/linkml/ontology/datalad.yaml +++ b/src/linkml/ontology/datalad.yaml @@ -8,7 +8,7 @@ description: > prefixes: dlco: https://concepts.datalad.org/ontology/ dcterms: http://purl.org/dc/terms/ - rdfs: http://www.w3.org/2000/01/rdf-schema# + RDFS: http://www.w3.org/2000/01/rdf-schema# imports: - ../ontology/datasets - ../ontology/git diff --git a/src/linkml/ontology/datasets.yaml b/src/linkml/ontology/datasets.yaml index b4fb907..d26ea27 100644 --- a/src/linkml/ontology/datasets.yaml +++ b/src/linkml/ontology/datasets.yaml @@ -148,14 +148,6 @@ slots: todos: - Rename to `named_part`? See also todo for `QualifiedPart` - type: - slot_uri: dlco:type - description: >- - The nature of the resource. - range: string - exact_mappings: - - dcterms:type - keyword: slot_uri: dlco:keyword description: >- @@ -186,7 +178,6 @@ classes: - qualified_part - relation - title - - type - version - was_attributed_to - was_generated_by diff --git a/src/linkml/ontology/projects.yaml b/src/linkml/ontology/projects.yaml index 02069af..b012ac0 100644 --- a/src/linkml/ontology/projects.yaml +++ b/src/linkml/ontology/projects.yaml @@ -9,7 +9,7 @@ prefixes: dlco: https://concepts.datalad.org/ontology/ linkml: https://w3id.org/linkml/ iao: http://purl.obolibrary.org/obo/ - rdfs: http://www.w3.org/2000/01/rdf-schema# + RDFS: http://www.w3.org/2000/01/rdf-schema# schema: http://schema.org/ spdx: http://spdx.org/rdf/terms# reproduceme: https://w3id.org/reproduceme# diff --git a/src/linkml/ontology/rdfs.yaml b/src/linkml/ontology/rdfs.yaml new file mode 100644 index 0000000..189ec39 --- /dev/null +++ b/src/linkml/ontology/rdfs.yaml @@ -0,0 +1,67 @@ +id: https://concepts.datalad.org/ontology/rdfs +name: meta_utils +title: RDFS elements +description: >- + TODO +prefixes: + RDFS: http://www.w3.org/2000/01/rdf-schema# +emit_prefixes: + - RDFS +imports: + - ../ontology/types + + +slots: + comment: + slot_uri: RDFS:comment + description: >- + A human-readable description. + range: string + + label: + slot_uri: RDFS:label + description: >- + A human-readable version of a resource's name. + range: string + + range: + slot_uri: RDFS:range + description: >- + State that the values of a property are instances a class. + range: uriorcurie + + type: + slot_uri: RDFS:type + description: >- + State that the subject is an instance of a class. + range: uriorcurie + + value: + slot_uri: RDFS:value + description: >- + Value of a resource. + range: string + multivalued: true + relational_role: OBJECT + + properties: + slot_uri: RDFS:property + description: >- + Container for arbitrary (extra) properties that are not covered by + other dedicated properties. + range: Property + multivalued: true + relational_role: PREDICATE + + +classes: + Property: + class_uri: RDFS:Property + description: >- + RDFS based class to describe arbitrary properties. + slots: + - comment + - label + - range + - type + - value diff --git a/src/linkml/ontology/schema_utils.yaml b/src/linkml/ontology/schema_utils.yaml index a07236b..d4f2ad6 100644 --- a/src/linkml/ontology/schema_utils.yaml +++ b/src/linkml/ontology/schema_utils.yaml @@ -8,7 +8,9 @@ prefixes: dcterms: http://purl.org/dc/terms/ dlco: https://concepts.datalad.org/ontology/ linkml: https://w3id.org/linkml/ + RDFS: http://www.w3.org/2000/01/rdf-schema# imports: + - ../ontology/rdfs - ../ontology/types default_prefix: dlco @@ -48,6 +50,7 @@ slots: exact_mappings: - dcterms:type + classes: HasMetaTypeDesignator: mixin: true diff --git a/src/linkml/schemas/dataset-version.yaml b/src/linkml/schemas/dataset-version.yaml index 6ce3ee4..229322b 100644 --- a/src/linkml/schemas/dataset-version.yaml +++ b/src/linkml/schemas/dataset-version.yaml @@ -21,6 +21,7 @@ prefixes: dpv: https://w3id.org/dpv# linkml: https://w3id.org/linkml/ prov: http://www.w3.org/ns/prov# + RDFS: http://www.w3.org/2000/01/rdf-schema# schema: http://schema.org/ spdx: http://spdx.org/rdf/terms# licenses: http://spdx.org/licenses/ @@ -126,6 +127,8 @@ classes: range: LicenseDocumentObject description: >- Container to declare custom licenses. + slots: + - properties slot_usage: distribution: inlined: true @@ -161,6 +164,8 @@ classes: inlined_as_list: true multivalued: true range: ActivityObject + properties: + inlined: true qualified_attribution: inlined: true inlined_as_list: true diff --git a/src/linkml/schemas/ontology.yaml b/src/linkml/schemas/ontology.yaml index 09d8bae..1ca2be8 100644 --- a/src/linkml/schemas/ontology.yaml +++ b/src/linkml/schemas/ontology.yaml @@ -22,6 +22,7 @@ imports: - ../ontology/common - ../ontology/projects - ../ontology/provenance + - ../ontology/rdfs - ../ontology/schema_utils - ../ontology/schema_objects - ../ontology/types