Skip to content

Commit

Permalink
improve error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Apr 2, 2024
1 parent d970a4c commit 885aa98
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 13 deletions.
36 changes: 24 additions & 12 deletions backend/src/decentriq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from decentriq_platform.analytics import (
AnalyticsDcrBuilder,
Column,
PrimitiveType,
FormatType,
PythonComputeNodeDefinition,
RawDataNodeDefinition,
PreviewComputeNodeDefinition,
TableDataNodeDefinition,
)
from fastapi import APIRouter, Depends, HTTPException

Expand All @@ -25,11 +26,11 @@ def get_cohort_schema(cohort_dict: Cohort) -> list[Column]:
"""Convert cohort variables to Decentriq schema"""
schema = []
for variable_id, variable_info in cohort_dict.variables.items():
prim_type = PrimitiveType.STRING
prim_type = FormatType.STRING
if variable_info.var_type == "FLOAT":
prim_type = PrimitiveType.FLOAT
prim_type = FormatType.FLOAT
if variable_info.var_type == "INT":
prim_type = PrimitiveType.INTEGER
prim_type = FormatType.INTEGER
nullable = bool(variable_info.na != 0)

schema.append(Column(name=variable_id, format_type=prim_type, is_nullable=nullable))
Expand All @@ -54,9 +55,9 @@ def create_provision_dcr(user: Any, cohort: Cohort) -> dict[str, Any]:

# Create data node for cohort
data_node_id = cohort.cohort_id.replace(" ", "-")
builder.add_node_definition(RawDataNodeDefinition(name=data_node_id, is_required=True))
# builder.add_node_definition(RawDataNodeDefinition(name=data_node_id, is_required=True))
# TODO: providing schema is broken in new SDK
# builder.add_node_definition(TableDataNodeDefinition(name=data_node_id, columns=get_cohort_schema(cohort), is_required=True))
builder.add_node_definition(TableDataNodeDefinition(name=data_node_id, columns=get_cohort_schema(cohort), is_required=True))

builder.add_participant(
user["email"],
Expand Down Expand Up @@ -122,14 +123,15 @@ async def create_compute_dcr(
# Get metadata for selected cohorts and variables
selected_cohorts = {}
# We generate a pandas script to automatically prepare the data from the cohort based on known metadata
pandas_script = "import pandas as pd\n\n"
pandas_script = "import pandas as pd\nimport decentriq_util\n\n"

for cohort_id, requested_vars in cohorts_request["cohorts"].items():
cohort_meta = deepcopy(all_cohorts[cohort_id])
df_var = f"df_{cohort_id.replace(' ', '_').replace('-', '_').replace('(', '').replace(')', '')}"
if isinstance(requested_vars, list):
# Direct cohort variables list
pandas_script += f"{df_var} = pd.read_csv('{cohort_id}.csv')\n"
# pandas_script += f"{df_var} = pd.read_csv('{cohort_id}.csv')\n"
pandas_script += f'{df_var} = decentriq_util.read_tabular_data("/input/{cohort_id}")\n'

if len(requested_vars) <= len(cohort_meta.variables):
# Add filter variables to pandas script
Expand All @@ -145,6 +147,8 @@ async def create_compute_dcr(
# TODO: add merged cohorts schema to selected_cohorts
else:
raise HTTPException(status_code=400, detail=f"Invalid structure for cohort {cohort_id}")
pandas_script += f'{df_var}.to_csv("/output/{cohort_id}.csv", index=False, header=True)\n\n'


# TODO: Add pandas_script to the DCR?
# print(pandas_script)
Expand All @@ -161,6 +165,7 @@ async def create_compute_dcr(
.with_name(dcr_title)
.with_owner(user["email"])
.with_description("A data clean room to run computations on cohorts for the iCARE4CVD project")
.with_airlock()
)

# builder = dq.DataRoomBuilder(f"iCare4CVD DCR compute {dcr_count}", enclave_specs=enclave_specs)
Expand All @@ -169,9 +174,9 @@ async def create_compute_dcr(
for cohort_id, cohort in selected_cohorts.items():
# Create data node for cohort
data_node_id = cohort_id.replace(" ", "-")
builder.add_node_definition(RawDataNodeDefinition(name=data_node_id, is_required=True))
# builder.add_node_definition(RawDataNodeDefinition(name=data_node_id, is_required=True))
# TODO: providing schema is broken in new SDK
# builder.add_node_definition(TableDataNodeDefinition(name=data_node_id, columns=get_cohort_schema(cohort), is_required=True))
builder.add_node_definition(TableDataNodeDefinition(name=data_node_id, columns=get_cohort_schema(cohort), is_required=True))
data_nodes.append(data_node_id)

# Add python data preparation script
Expand All @@ -182,6 +187,14 @@ async def create_compute_dcr(
# Add users permissions
builder.add_participant(user["email"], data_owner_of=[data_node_id], analyst_of=["prepare-data"])

# Add airlock node to make it easy to access small part of the dataset
builder.add_node_definition(PreviewComputeNodeDefinition(
name="preview-data",
dependency="prepare-data",
quota_bytes=52428800, # 50MB
))


# Build and publish DCR
dcr_definition = builder.build()
dcr = client.publish_analytics_dcr(dcr_definition)
Expand All @@ -190,7 +203,6 @@ async def create_compute_dcr(
"message": f"Data Clean Room available for compute at {dcr_url}",
"dcr_url": dcr_url,
"dcr_title": dcr_title,
# "dcr": dcr_desc,
"merge_script": pandas_script,
**cohorts_request,
}
2 changes: 1 addition & 1 deletion backend/src/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Da
except Exception as e:
raise HTTPException(
status_code=422,
detail=e,
detail=str(e),
)
return g

Expand Down
1 change: 1 addition & 0 deletions frontend/src/components/Nav.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ export function Nav() {
Close
</button>
</div>
{/* TODO: {isLoading && <div className="loader"></div>} */}
{publishedDCR && (
<div className="card card-compact">
<div className="card-body bg-success mt-5 rounded-lg text-slate-900">
Expand Down

0 comments on commit 885aa98

Please sign in to comment.