From 4ff898d9d3afe057facb3a8fa05426b7b16a0a96 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Thu, 11 Apr 2024 18:54:09 +0200 Subject: [PATCH] fix dcr participants --- README.md | 8 ++++++++ backend/src/decentriq.py | 12 +++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e75fc57..56f8c9e 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,14 @@ It aims to enable *data custodians* and *data scientists* to: > > If you logged in with a Decentriq user that does not have access to the Cohort Explorer, and need to re-login with another user: you will need to clear cache and cookies. Because Auth0 will keep your login in mind for some time, and it can be quite tricky to reset (they don't give the tools for managing that properly). +> [!IMPORTANT] +> +> Only the owner of the cohort (as described in the spreadsheet holding all cohorts generic metadata), and the platform admins, can upload the data dictionary or edit mappings for a cohort. + +> [!NOTE] +> +> You can reupload a cohort dictionary that have been already uploaded (in case you need to fix something). The mappings defined via the Cohort Explorer will be kept, as long as the variables names do not change. + ## 🗺️ Technical overview This platform is composed of 3 main components: diff --git a/backend/src/decentriq.py b/backend/src/decentriq.py index 9add12a..f306a63 100644 --- a/backend/src/decentriq.py +++ b/backend/src/decentriq.py @@ -90,20 +90,18 @@ def pandas_script_filter_cohort_vars(cohort: Cohort, requested_vars: list[str], def pandas_script_merge_cohorts(merged_cohorts: dict[str, list[str]], all_cohorts: dict[str, Cohort]) -> str: """Generate pandas script for merging cohorts on variables mapped_id""" - # TODO: to be fixed + # TODO: to be fixed, just here as a starter example merge_script = "" dfs_to_merge = [] for cohort_id, vars_requested in merged_cohorts.items(): if cohort_id not in all_cohorts: raise ValueError(f"Cohort {cohort_id} does not exist.") - # Assuming you have a way to get dataframe variable names (mapped_id) from vars_requested df_name = f"df_{cohort_id}" - vars_mapped = [f"'{var}'" for var in vars_requested] # Example to generate a list of variable names + vars_mapped = [f"'{var}'" for var in vars_requested] dfs_to_merge.append(df_name) merge_script += ( - f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n" # Placeholder for actual data retrieval + f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n" ) - # Assuming all dataframes have a common column for merging merge_script += f"merged_df = pd.concat([{', '.join(dfs_to_merge)}], ignore_index=True)\n" return merge_script @@ -156,7 +154,7 @@ async def create_compute_dcr( AnalyticsDcrBuilder(client=client) .with_name(dcr_title) .with_owner(settings.decentriq_email) - .with_description("A data clean room to run computations on cohorts for the iCARE4CVD project") + .with_description("A data clean room to run analyses on cohorts for the iCARE4CVD project") ) participants = {} @@ -184,7 +182,7 @@ async def create_compute_dcr( for owner in cohort.cohort_email: if owner not in participants: participants[owner] = {"data_owner_of": set(), "analyst_of": set()} - participants[owner]["data_owner_of"].add(owner) + participants[owner]["data_owner_of"].add(data_node_id) # Add pandas preparation script pandas_script = "import pandas as pd\nimport decentriq_util\n\n"