fix dcr participants

MaastrichtU-IDS · Apr 11, 2024 · 4ff898d · 4ff898d
1 parent 5684a84
commit 4ff898d
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -25,6 +25,14 @@ It aims to enable *data custodians* and *data scientists* to:
 >
 > If you logged in with a Decentriq user that does not have access to the Cohort Explorer, and need to re-login with another user: you will need to clear cache and cookies. Because Auth0 will keep your login in mind for some time, and it can be quite tricky to reset (they don't give the tools for managing that properly).
 
+> [!IMPORTANT]
+>
+> Only the owner of the cohort (as described in the spreadsheet holding all cohorts generic metadata), and the platform admins,  can upload the data dictionary or edit mappings for a cohort.
+
+> [!NOTE]
+>
+> You can reupload a cohort dictionary that have been already uploaded (in case you need to fix something). The mappings defined via the Cohort Explorer will be kept, as long as the variables names do not change.
+
 ## 🗺️ Technical overview
 
 This platform is composed of 3 main components:

diff --git a/backend/src/decentriq.py b/backend/src/decentriq.py
@@ -90,20 +90,18 @@ def pandas_script_filter_cohort_vars(cohort: Cohort, requested_vars: list[str],
 
 def pandas_script_merge_cohorts(merged_cohorts: dict[str, list[str]], all_cohorts: dict[str, Cohort]) -> str:
     """Generate pandas script for merging cohorts on variables mapped_id"""
-    # TODO: to be fixed
+    # TODO: to be fixed, just here as a starter example
     merge_script = ""
     dfs_to_merge = []
     for cohort_id, vars_requested in merged_cohorts.items():
         if cohort_id not in all_cohorts:
             raise ValueError(f"Cohort {cohort_id} does not exist.")
-        # Assuming you have a way to get dataframe variable names (mapped_id) from vars_requested
         df_name = f"df_{cohort_id}"
-        vars_mapped = [f"'{var}'" for var in vars_requested]  # Example to generate a list of variable names
+        vars_mapped = [f"'{var}'" for var in vars_requested]
         dfs_to_merge.append(df_name)
         merge_script += (
-            f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n"  # Placeholder for actual data retrieval
+            f"{df_name} = pd.DataFrame({cohort_id})[{vars_mapped}]\n"
         )
-
     # Assuming all dataframes have a common column for merging
     merge_script += f"merged_df = pd.concat([{', '.join(dfs_to_merge)}], ignore_index=True)\n"
     return merge_script
@@ -156,7 +154,7 @@ async def create_compute_dcr(
         AnalyticsDcrBuilder(client=client)
         .with_name(dcr_title)
         .with_owner(settings.decentriq_email)
-        .with_description("A data clean room to run computations on cohorts for the iCARE4CVD project")
+        .with_description("A data clean room to run analyses on cohorts for the iCARE4CVD project")
     )
 
     participants = {}
@@ -184,7 +182,7 @@ async def create_compute_dcr(
         for owner in cohort.cohort_email:
             if owner not in participants:
                 participants[owner] = {"data_owner_of": set(), "analyst_of": set()}
-            participants[owner]["data_owner_of"].add(owner)
+            participants[owner]["data_owner_of"].add(data_node_id)
 
         # Add pandas preparation script
         pandas_script = "import pandas as pd\nimport decentriq_util\n\n"