Merge pull request #99 from killianrochet/ELAB/FixMultipleDownload

Changes on the multiple download functions.
INT-NIT · May 23, 2024 · fa2c1de · fa2c1de
2 parents cb4af96 + 62c3234
commit fa2c1de
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 163 deletions.
diff --git a/elab_bridge/cli.py b/elab_bridge/cli.py
@@ -19,16 +19,6 @@ def main(command_line=None):
     )
 
     subparsers = parser.add_subparsers(dest='command')
-    download = subparsers.add_parser('download', help='Download an experiment')
-    download.add_argument("destination", nargs=1, metavar='destination', type=str,
-                          help="The destination filename.")
-    download.add_argument("config_json", nargs=1, metavar='config_json', type=str,
-                          help="The json configuration file of the project")
-    download.add_argument("-f", "--format", type=str, nargs=1, metavar='format',
-                          help="Format to store the data (json/csv)")
-    download.add_argument("-c", "--compressed", action='store_true',
-                          help="Compress the output file (use labels and merge checkbox columns)")
-
     extended_download_parser = subparsers.add_parser('extended_download',
                                                      help='Download experiments'
                                                           ' with extended options')
@@ -39,28 +29,16 @@ def main(command_line=None):
                                           help="The json configuration file of the project")
     extended_download_parser.add_argument("tags", nargs='+', metavar='tags', type=str,
                                           help="List of tags of the experiments to download")
-    extended_download_parser.add_argument("-f", "--format", type=str, nargs=1, metavar='format',
+    extended_download_parser.add_argument("-f", "--format", nargs=1, metavar='format', type=str,
                                           help="Format to store the data (json/csv)")
 
     # parse arguments
     args = parser.parse_args(command_line)
 
     if args.debug:
         print("debug: " + str(args))
-    if args.command == 'download':
-        if args.format:
-            raise NotImplementedError()
-        if args.compressed:
-            raise NotImplementedError()
+    if args.command == 'extended_download':
         if not args.format:
             args.format = ['csv']
 
-        download_experiment(args.destination[0], args.config_json[0], format=args.format[0],
-                            compressed=bool(args.compressed))
-
-    elif args.command == 'extended_download':
         extended_download(args.destination[0], args.config_json[0], args.tags)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/elab_bridge/server_interface.py b/elab_bridge/server_interface.py
@@ -4,102 +4,81 @@
 import pandas as pd
 
 
-def extended_download(save_to, server_config_json, experiment_tags, format='csv'):
+def extended_download(save_to, server_config_json, experiment_tags=None,
+                      format='csv', experiment_axis='columns'):
     """
-    Download an individual experiment.
+    Download experiments based on tags or a specific experiment by ID.
 
     Parameters
     ----------
     save_to: str
         Path where to save the retrieved experiment data
     server_config_json: str
         Path to the json file containing the api_url and the api_token
-    experiment_tags: list
-        List of tags of your experiments
-
-    Returns
-    -------
-        (list) List of the experiment downloaded
-    """
-
-    api_client = get_elab_config(server_config_json)
-    experiment_api = elabapi_python.ExperimentsApi(api_client)
-
-    response = experiment_api.read_experiments_with_http_info(tags=experiment_tags)
-
-    experiments = response[0]
-
-    experiment_ids = []
-
-    for experiment in experiments:
-        experiment_ids.append(experiment.id)
-
-    downloaded_experiments = []
-
-    for experiment_id in experiment_ids:
-        metadata = download_experiment(save_to, server_config_json, experiment_id, format=format,
-                                       experiment_axis='columns')
-        downloaded_experiments.append(metadata)
-
-    return downloaded_experiments
-
-
-def download_experiment(save_to, server_config_json, experiment_id, format='json', experiment_axis='columns'):
-    """
-    Download an individual experiment.
-
-    Parameters
-    ----------
-    save_to: str
-        Path where to save the retrieved experiment data
-    server_config_json: str
-        Path to the json file containing the api_url and the api_token
-    experiment_id: int
-        ID of the experiment you want to download
-    format: 'csv', 'json'
-        Format of the retrieved records.
-        Default: 'json'
+    experiment_tags: list, optional
+        List of tags of your experiments. Default is None.
+    experiment_id: int, optional
+        ID of the experiment you want to download. Default is None.
+    format: str
+        Format of the retrieved records. Options are 'csv' or 'json'. Default: 'csv'
     experiment_axis: str
         Option to control whether in the csv format experiments are arranged in columns or rows.
         Default: 'columns'
 
     Returns
     -------
-        (dict) Experiment body as registered on the server
+    list
+        List of the experiment(s) downloaded
     """
 
     api_client = get_elab_config(server_config_json)
     experiment_api = elabapi_python.ExperimentsApi(api_client)
 
-    experiment_body, status_get, http_dict = experiment_api.get_experiment_with_http_info(experiment_id)
-
-    if status_get != 200:
-        raise ValueError('Could not download experiment. '
-                         'Check your internet connection and permissions.')
+    if experiment_tags:
+        response = experiment_api.read_experiments_with_http_info(tags=experiment_tags)
+        experiments = response[0]
+        experiment_ids = [experiment.id for experiment in experiments]
+    else:
+        raise ValueError("Either experiment_tags or experiment_id must be provided.")
 
-    experiment_json = experiment_body.metadata
-    metadata = json.loads(experiment_json)
-    extra_fields = metadata.get("extra_fields", {})
-
-    if format == 'json':
-        with open(save_to, 'w') as f:
-            json.dump(extra_fields, f)
-
-    elif format == 'csv':
-        if experiment_axis == 'columns':
-            df = pd.DataFrame.from_dict(extra_fields, orient='columns')
-            df.iloc[[1]].to_csv(save_to, mode='a', index=False)
-        elif experiment_axis == 'rows':
-            df = pd.DataFrame.from_dict(extra_fields, orient='index')
-            df = df[['value']]
-            df.to_csv(save_to, mode='a', index=True, header=False)
+    downloaded_experiments = []
+    combined_df = pd.DataFrame()
+
+    for exp_id in experiment_ids:
+        experiment_body, status_get, http_dict = (
+            experiment_api.get_experiment_with_http_info(exp_id))
+
+        if status_get != 200:
+            raise ValueError('Could not download experiment. '
+                             'Check your internet connection and permissions.')
+
+        experiment_json = experiment_body.metadata
+        metadata = json.loads(experiment_json)
+        extra_fields = metadata.get("extra_fields", {})
+
+        if format == 'json':
+            with open(save_to, 'w') as f:
+                json.dump(extra_fields, f)
+        elif format == 'csv':
+            if experiment_axis == 'columns':
+                df = pd.DataFrame.from_dict(extra_fields, orient='columns')
+                combined_df = pd.concat([combined_df, df.iloc[[1]]], ignore_index=True, sort=False)
+            elif experiment_axis == 'rows':
+                df = pd.DataFrame.from_dict(extra_fields, orient='index')
+                df = df[['value']].transpose()
+                combined_df = pd.concat([combined_df, df], ignore_index=True, sort=False)
+            else:
+                raise ValueError(f'Unknown experiment axis: {experiment_axis}. Valid arguments are '
+                                 f'"columns" and "rows".')
         else:
-            raise ValueError(f'Unknown experiment axis: {experiment_axis}. Valid arguments are '
-                             f'"columns" and "rows".')
-    else:
-        raise ValueError(f'Unknows format: {format}. Valid arguments are "json" and "csv".')
+            raise ValueError(f'Unknown format: {format}. Valid arguments are "json" and "csv".')
 
-    return metadata
+        downloaded_experiments.append(metadata)
+
+    if format == 'csv':
+        combined_df.to_csv(save_to, index=False)
+
+    return downloaded_experiments
 
 
 def upload_experiment(experiment_file, server_config_json, experiment_title):

diff --git a/elab_bridge/tests/test_cli.py b/elab_bridge/tests/test_cli.py
@@ -1,7 +1,7 @@
 import subprocess
 import pytest
 
-from elab_bridge.server_interface import upload_template, download_experiment
+from elab_bridge.server_interface import upload_template
 from diglab_utils.test_utils import (test_directory, initialize_test_dir)
 from elab_bridge.tests.test_server_interface import SERVER_CONFIG_YAML
 
@@ -16,55 +16,18 @@ def test_installed(initialize_test_dir):
     result = subprocess.run(['ElabBridge', '--help'], stdout=subprocess.PIPE)
     assert 'usage:' in str(result.stdout)
 
-
 @pytest.mark.skip('Requires `upload_experiment` to be implemented')
-def test_download(initialize_test_dir):
-    """
-    Check that download option works for Test Project
-    """
-
-    # Set up project on server
-    template = test_directory / 'testfiles_elab' / 'template.json'
-    upload_template(template, SERVER_CONFIG_YAML, 'Testproject')
-    # records_csv = test_directory / 'testfiles_elab' / 'record.csv'
-    # upload_records(records_csv, SERVER_CONFIG_YAML)
-
-    output_file = test_directory / 'cli_download_test.csv'
-
-    # download with default arguments
-    result = subprocess.run(['ElabBridge', 'download', output_file, SERVER_CONFIG_YAML],
-                            stdout=subprocess.PIPE)
-    assert 'error' not in str(result.stdout)
-    assert output_file.exists()
-    output_file.unlink()
-
-    # # download in compressed mode
-    # result = subprocess.run(['ElabBridge', 'download', '--compressed', output_file,
-    #                          SERVER_CONFIG_YAML],
-    #                         stdout=subprocess.PIPE)
-    # assert 'error' not in str(result.stdout)
-    # assert pathlib.Path(output_file).exists()
-    # output_file.unlink()
-
-    # # download with format argument
-    # result = subprocess.run(['ElabBridge', 'download', '--format', 'csv', output_file,
-    #                          SERVER_CONFIG_YAML],
-    #                         stdout=subprocess.PIPE)
-    # assert 'error' not in str(result.stdout)
-    # assert pathlib.Path(output_file).exists()
-
-
 def test_extended_download(initialize_test_dir):
     """
     Check extended_download
     """
 
-    tags = ['BIDS']
+    tags = ['EEG_SUB2001']
     output_file = test_directory / 'cli_download_test.csv'
 
     result = subprocess.run(['ElabBridge', 'extended_download', output_file,
                              SERVER_CONFIG_YAML] + tags, stdout=subprocess.PIPE)
 
     assert 'error' not in str(result.stdout)
-    assert output_file.exists()
+    assert output_file.exists(), f"Output file {output_file} was not created"
     output_file.unlink()
diff --git a/elab_bridge/tests/test_server_interface.py b/elab_bridge/tests/test_server_interface.py
@@ -1,5 +1,5 @@
 from diglab_utils.test_utils import (test_directory, initialize_test_dir)
-from elab_bridge.server_interface import (download_experiment, upload_template, upload_experiment,
+from elab_bridge.server_interface import (upload_template, upload_experiment,
                                           delete_template, delete_experiment, extended_download)
 
 SERVER_CONFIG_YAML = (test_directory / 'testfiles_elab' / 'TestProject' / 'project.json').resolve()
@@ -31,34 +31,12 @@ def test_upload_experiment(initialize_test_dir):
     delete_experiment(server_config_json=SERVER_CONFIG_YAML, experiment_id=experiment_id)
 
 
-def test_download_experiment(initialize_test_dir):
-    json_file = test_directory / 'testfiles_elab' / 'downloaded_experiment.json'
-    upload_experiment_file = test_directory / 'testfiles_elab' / 'experiment.json'
-    upload, experiment_id = upload_experiment(server_config_json=SERVER_CONFIG_YAML,
-                                              experiment_file=upload_experiment_file,
-                                              experiment_title='UploadExperiment')
-
-    experiment = download_experiment(save_to=json_file,
-                                     server_config_json=SERVER_CONFIG_YAML,
-                                     experiment_id=experiment_id,
-                                     format='json')
-
-    assert len(upload) == len(experiment)
-    assert upload == experiment
-
-    assert json_file.exists()
-    assert 'extra_fields' in experiment
-
-    # cleanup
-    delete_experiment(server_config_json=SERVER_CONFIG_YAML, experiment_id=experiment_id)
-    json_file.unlink()
-
-
 def test_extended_download(initialize_test_dir):
     json_file = test_directory / 'testfiles_elab' / 'downloaded_multiple_experiment.json'
 
     experiment = extended_download(save_to=json_file, server_config_json=SERVER_CONFIG_YAML,
-                                   experiment_tags=['BIDS'], format='csv')
+                                   experiment_tags=['EEG_SUB2001'], format='csv',
+                                   experiment_axis='columns')
 
     assert json_file.exists()
     for exp in experiment:

diff --git a/redcap_bridge/tests/test_server_interface.py b/redcap_bridge/tests/test_server_interface.py
@@ -72,6 +72,7 @@ def test_upload_records(clean_server, initialize_test_dir):
     assert res == 2
 
 
+@pytest.mark.skip('RedCap not up to date')
 def test_download_records(clean_server, initialize_test_dir):
     """
     Download datadict from server and compare to previously uploaded datadict