diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index dabd00fa14..49b97cfb8f 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -42,7 +42,7 @@ ) from pudl.metadata.resources import FOREIGN_KEYS, RESOURCE_METADATA, eia861 from pudl.metadata.sources import SOURCES -from pudl.workspace.datastore import Datastore +from pudl.workspace.datastore import Datastore, ZenodoDoi from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -869,6 +869,27 @@ class Contributor(Base): role: Literal[ "author", "contributor", "maintainer", "publisher", "wrangler" ] = "contributor" + zenodo_role: Literal[ + "contact person", + "data collector", + "data curator", + "data manager", + "distributor", + "editor", + "hosting institution", + "other", + "producer", + "project leader", + "project member", + "registration agency", + "registration authority", + "related person", + "researcher", + "rights holder", + "sponsor", + "supervisor", + "work package leader", + ] = "project member" organization: String = None orcid: String = None @@ -911,10 +932,10 @@ class DataSource(Base): field_namespace: String = None keywords: list[str] = [] path: HttpUrl = None - contributors: list[Contributor] = [] # Or should this be compiled from Resources? + contributors: list[Contributor] = [] license_raw: License license_pudl: License - # concept_doi: Doi = None # Need to define a Doi type? + concept_doi: ZenodoDoi = None working_partitions: dict[SnakeCase, Any] = {} source_file_dict: dict[SnakeCase, Any] = {} # agency: Agency # needs to be defined diff --git a/src/pudl/metadata/constants.py b/src/pudl/metadata/constants.py index a7906dc7ec..95bffa6b56 100644 --- a/src/pudl/metadata/constants.py +++ b/src/pudl/metadata/constants.py @@ -62,7 +62,7 @@ "us-govt": { "name": "other-pd", "title": "U.S. Government Works", - "path": "https://www.usa.gov/government-works", + "path": "https://www.usa.gov/publicdomain/label/1.0/", }, } """License attributes.""" @@ -83,6 +83,7 @@ "email": "pudl@catalyst.coop", "path": "https://catalyst.coop", "role": "publisher", + "zenodo_role": "distributor", "organization": "Catalyst Cooperative", }, "zane-selvans": { @@ -90,6 +91,7 @@ "email": "zane.selvans@catalyst.coop", "path": "https://amateurearthling.org", "role": "wrangler", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", "orcid": "0000-0002-9961-7208", }, @@ -97,6 +99,7 @@ "title": "Christina Gosnell", "email": "christina.gosnell@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", "orcid": "0009-0004-2979-6142", }, @@ -104,12 +107,14 @@ "title": "Steven Winter", "email": "steven.winter@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "alana-wilson": { "title": "Alana Wilson", "email": "alana.wilson@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "karl-dunkle-werner": { @@ -117,6 +122,7 @@ "email": "karldw@berkeley.edu", "path": "https://karldw.org", "role": "contributor", + "zenodo_role": "project member", "organization": "UC Berkeley", "orcid": "0000-0003-0523-7309", }, @@ -124,6 +130,7 @@ "title": "Greg Schivley", "path": "https://gschivley.github.io", "role": "contributor", + "zenodo_role": "project member", "organization": "Carbon Impact Consulting", "orcid": "0000-0002-8947-694X", }, @@ -131,30 +138,35 @@ "title": "Austen Sharpe", "email": "austen.sharpe@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "katherine-lamb": { "title": "Katherine Lamb", "email": "katherine.lamb@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "bennett-norman": { "title": "Bennett Norman", "email": "bennett.norman@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "trenton-bush": { "title": "Trenton Bush", "email": "trenton.bush@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "ethan-welty": { "title": "Ethan Welty", "email": "ethan.welty@gmail.com", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", "orcid": "0000-0001-8046-2210", }, @@ -162,12 +174,14 @@ "title": "Dazhong Xia", "email": "dazhong.xia@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", }, "ella-belfer": { "title": "Ella Belfer", "email": "ella.belfer@catalyst.coop", "role": "contributor", + "zenodo_role": "project member", "organization": "Catalyst Cooperative", "orcid": "0000-0001-9784-8531", }, diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py index 303999da31..f941b6a6e6 100644 --- a/src/pudl/metadata/sources.py +++ b/src/pudl/metadata/sources.py @@ -20,6 +20,7 @@ ), "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], + "contributors": [CONTRIBUTORS["catalyst-cooperative"]], }, "eia176": { "title": "EIA Form 176 -- Annual Report of Natural and Supplemental Gas Supply and Disposition", @@ -75,10 +76,6 @@ "field_namespace": "eia", "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["zane-selvans"], - CONTRIBUTORS["christina-gosnell"], - CONTRIBUTORS["steven-winter"], - CONTRIBUTORS["alana-wilson"], ], "working_partitions": { "years": sorted(set(range(2001, 2023))), @@ -195,10 +192,6 @@ }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["zane-selvans"], - CONTRIBUTORS["christina-gosnell"], - CONTRIBUTORS["steven-winter"], - CONTRIBUTORS["katherine-lamb"], ], "keywords": sorted( set( @@ -240,8 +233,6 @@ "working_partitions": {}, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["zane-selvans"], - CONTRIBUTORS["trenton-bush"], ], "keywords": sorted( set( @@ -286,8 +277,6 @@ }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["karl-dunkle-werner"], - CONTRIBUTORS["zane-selvans"], ], "keywords": sorted( set( @@ -325,7 +314,6 @@ "working_partitions": {}, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["austen-sharpe"], ], "keywords": sorted( set( @@ -369,11 +357,6 @@ }, "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["zane-selvans"], - CONTRIBUTORS["christina-gosnell"], - CONTRIBUTORS["steven-winter"], - CONTRIBUTORS["alana-wilson"], - CONTRIBUTORS["austen-sharpe"], ], "keywords": sorted( set( @@ -580,9 +563,6 @@ ), "contributors": [ CONTRIBUTORS["catalyst-cooperative"], - CONTRIBUTORS["zane-selvans"], - CONTRIBUTORS["christina-gosnell"], - CONTRIBUTORS["steven-winter"], ], "field_namespace": "pudl", "keywords": ["us", "electricity", "open data", "open source"], diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index e49087ac6e..94fdfa5ade 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -27,7 +27,9 @@ logger = pudl.logging_helpers.get_logger(__name__) PUDL_YML = Path.home() / ".pudl.yml" -ZenodoDoi = constr(regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)") +ZenodoDoi = constr( + strict=True, min_length=16, regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)" +) class ChecksumMismatchError(ValueError):