From 8da9f4cd24c8ce48098e671b7505e68c456b1c36 Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 27 Aug 2024 14:18:32 +0100 Subject: [PATCH 1/2] adjust bulk upload labels --- scripts/invocation_upload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/invocation_upload.py b/scripts/invocation_upload.py index f80b1869..84f82138 100644 --- a/scripts/invocation_upload.py +++ b/scripts/invocation_upload.py @@ -14,12 +14,12 @@ rtrans_publication_kwargs = { - "data_tags": ["bulk_upload", "rtransparent-publication"], - "user_comment": "Bulk upload of rtransparent publication data", + "data_tags": ["PMC-OA 2020"], + "user_comment": "Bulk upload of rtransparent publication data that processed all open access XMLs from pubmed central in 2020", "components": [Component(name="rtransparent-publication", version="x.x.x")], } irp_kwargs = { - "data_tags": ["bulk_upload", "NIH-IRP"], + "data_tags": ["NIH-IRP"], "user_comment": "Bulk upload of NIH-IRP data", "components": [Component(name="Sciencebeam parser/RTransparent", version="x.x.x")], } From 01e5893948c74aeaa1f84a210a92cb6e2df2badf Mon Sep 17 00:00:00 2001 From: leej3 Date: Tue, 27 Aug 2024 15:50:16 +0100 Subject: [PATCH 2/2] add config for the neuro upload --- osm/schemas/schema_helpers.py | 4 ++++ scripts/invocation_upload.py | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/osm/schemas/schema_helpers.py b/osm/schemas/schema_helpers.py index 8d9502d9..5ea35f05 100644 --- a/osm/schemas/schema_helpers.py +++ b/osm/schemas/schema_helpers.py @@ -26,6 +26,10 @@ def rtransparent_pub_data_processing(row): return row +def theneuro_data_processing(row): + return row + + def types_mapper(pa_type): if pa.types.is_int64(pa_type): # Map pyarrow int64 to pandas Int64 (nullable integer) diff --git a/scripts/invocation_upload.py b/scripts/invocation_upload.py index 84f82138..5935314e 100644 --- a/scripts/invocation_upload.py +++ b/scripts/invocation_upload.py @@ -23,7 +23,11 @@ "user_comment": "Bulk upload of NIH-IRP data", "components": [Component(name="Sciencebeam parser/RTransparent", version="x.x.x")], } - +theneuro_kwargs = { + "data_tags": ["Th Neuro"], + "user_comment": "Bulk upload of The Neuro data containing OddPub metrics underlying RTransparent metrics for open code/data.", + "components": [Component(name="TheNeuroOddPub", version="x.x.x")], +} logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) @@ -58,6 +62,8 @@ def get_data(args): file_in = Path(args.input_file) if file_in.is_dir() or file_in.suffix == ".parquet": tb = ds.dataset(file_in, format="parquet").to_table() + else: + raise ValueError("Only parquet files are supported") return tb @@ -70,6 +76,8 @@ def get_upload_kwargs(args): kwargs = rtrans_publication_kwargs elif args.custom_processing == "irp_data_processing": kwargs = irp_kwargs + elif args.custom_processing == "theneuro_data_processing": + kwargs = theneuro_kwargs else: raise ValueError( f"Kwargs associated with {args.custom_processing} not found"