diff --git a/osm/schemas/schema_helpers.py b/osm/schemas/schema_helpers.py index 8d9502d9..5ea35f05 100644 --- a/osm/schemas/schema_helpers.py +++ b/osm/schemas/schema_helpers.py @@ -26,6 +26,10 @@ def rtransparent_pub_data_processing(row): return row +def theneuro_data_processing(row): + return row + + def types_mapper(pa_type): if pa.types.is_int64(pa_type): # Map pyarrow int64 to pandas Int64 (nullable integer) diff --git a/scripts/invocation_upload.py b/scripts/invocation_upload.py index 84f82138..5935314e 100644 --- a/scripts/invocation_upload.py +++ b/scripts/invocation_upload.py @@ -23,7 +23,11 @@ "user_comment": "Bulk upload of NIH-IRP data", "components": [Component(name="Sciencebeam parser/RTransparent", version="x.x.x")], } - +theneuro_kwargs = { + "data_tags": ["Th Neuro"], + "user_comment": "Bulk upload of The Neuro data containing OddPub metrics underlying RTransparent metrics for open code/data.", + "components": [Component(name="TheNeuroOddPub", version="x.x.x")], +} logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) @@ -58,6 +62,8 @@ def get_data(args): file_in = Path(args.input_file) if file_in.is_dir() or file_in.suffix == ".parquet": tb = ds.dataset(file_in, format="parquet").to_table() + else: + raise ValueError("Only parquet files are supported") return tb @@ -70,6 +76,8 @@ def get_upload_kwargs(args): kwargs = rtrans_publication_kwargs elif args.custom_processing == "irp_data_processing": kwargs = irp_kwargs + elif args.custom_processing == "theneuro_data_processing": + kwargs = theneuro_kwargs else: raise ValueError( f"Kwargs associated with {args.custom_processing} not found"