Skip to content

Commit

Permalink
add config for the neuro upload
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Aug 27, 2024
1 parent 8da9f4c commit 01e5893
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
4 changes: 4 additions & 0 deletions osm/schemas/schema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def rtransparent_pub_data_processing(row):
return row


def theneuro_data_processing(row):
return row


def types_mapper(pa_type):
if pa.types.is_int64(pa_type):
# Map pyarrow int64 to pandas Int64 (nullable integer)
Expand Down
10 changes: 9 additions & 1 deletion scripts/invocation_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@
"user_comment": "Bulk upload of NIH-IRP data",
"components": [Component(name="Sciencebeam parser/RTransparent", version="x.x.x")],
}

theneuro_kwargs = {
"data_tags": ["Th Neuro"],
"user_comment": "Bulk upload of The Neuro data containing OddPub metrics underlying RTransparent metrics for open code/data.",
"components": [Component(name="TheNeuroOddPub", version="x.x.x")],
}
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
Expand Down Expand Up @@ -58,6 +62,8 @@ def get_data(args):
file_in = Path(args.input_file)
if file_in.is_dir() or file_in.suffix == ".parquet":
tb = ds.dataset(file_in, format="parquet").to_table()
else:
raise ValueError("Only parquet files are supported")
return tb


Expand All @@ -70,6 +76,8 @@ def get_upload_kwargs(args):
kwargs = rtrans_publication_kwargs
elif args.custom_processing == "irp_data_processing":
kwargs = irp_kwargs
elif args.custom_processing == "theneuro_data_processing":
kwargs = theneuro_kwargs
else:
raise ValueError(
f"Kwargs associated with {args.custom_processing} not found"
Expand Down

0 comments on commit 01e5893

Please sign in to comment.