Skip to content

Commit

Permalink
fix dcr participants
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Apr 11, 2024
1 parent 5684a84 commit 4ff898d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ It aims to enable *data custodians* and *data scientists* to:
>
> If you logged in with a Decentriq user that does not have access to the Cohort Explorer, and need to re-login with another user: you will need to clear cache and cookies. Because Auth0 will keep your login in mind for some time, and it can be quite tricky to reset (they don't give the tools for managing that properly).
> [!IMPORTANT]
>
> Only the owner of the cohort (as described in the spreadsheet holding all cohorts generic metadata), and the platform admins, can upload the data dictionary or edit mappings for a cohort.
> [!NOTE]
>
> You can reupload a cohort dictionary that have been already uploaded (in case you need to fix something). The mappings defined via the Cohort Explorer will be kept, as long as the variables names do not change.
## 🗺️ Technical overview

This platform is composed of 3 main components:
Expand Down
12 changes: 5 additions & 7 deletions backend/src/decentriq.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,18 @@ def pandas_script_filter_cohort_vars(cohort: Cohort, requested_vars: list[str],

def pandas_script_merge_cohorts(merged_cohorts: dict[str, list[str]], all_cohorts: dict[str, Cohort]) -> str:
"""Generate pandas script for merging cohorts on variables mapped_id"""
# TODO: to be fixed
# TODO: to be fixed, just here as a starter example
merge_script = ""
dfs_to_merge = []
for cohort_id, vars_requested in merged_cohorts.items():
if cohort_id not in all_cohorts:
raise ValueError(f"Cohort {cohort_id} does not exist.")
# Assuming you have a way to get dataframe variable names (mapped_id) from vars_requested
df_name = f"df_{cohort_id}"
vars_mapped = [f"'{var}'" for var in vars_requested] # Example to generate a list of variable names
vars_mapped = [f"'{var}'" for var in vars_requested]
dfs_to_merge.append(df_name)
merge_script += (
f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n" # Placeholder for actual data retrieval
f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n"
)

# Assuming all dataframes have a common column for merging
merge_script += f"merged_df = pd.concat([{', '.join(dfs_to_merge)}], ignore_index=True)\n"
return merge_script
Expand Down Expand Up @@ -156,7 +154,7 @@ async def create_compute_dcr(
AnalyticsDcrBuilder(client=client)
.with_name(dcr_title)
.with_owner(settings.decentriq_email)
.with_description("A data clean room to run computations on cohorts for the iCARE4CVD project")
.with_description("A data clean room to run analyses on cohorts for the iCARE4CVD project")
)

participants = {}
Expand Down Expand Up @@ -184,7 +182,7 @@ async def create_compute_dcr(
for owner in cohort.cohort_email:
if owner not in participants:
participants[owner] = {"data_owner_of": set(), "analyst_of": set()}
participants[owner]["data_owner_of"].add(owner)
participants[owner]["data_owner_of"].add(data_node_id)

# Add pandas preparation script
pandas_script = "import pandas as pd\nimport decentriq_util\n\n"
Expand Down

0 comments on commit 4ff898d

Please sign in to comment.