Merge pull request #1 from Geonadir/workspace

adding flag to point to specific workspace
Geonadir · Sep 24, 2024 · ea56908 · ea56908
2 parents 33ab005 + 4d716f9
commit ea56908
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,8 @@ app/__pycache__
 build/
 .pytest_cache
 __version__.py
+*.csv
+tests/*
 
 # Node artifact files
 node_modules/

diff --git a/README.md b/README.md
@@ -56,7 +56,11 @@ Options:
 
 - `-t, --token`: The user token for authentication.
 
-  - When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
+  - When not specified in command, the upload will not proceed.
+
+- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.
+
+  - When not specified in command, the upload will not proceed.
 
 - `-p, --private / --public`: Whether datasets are private.
 
@@ -148,7 +152,11 @@ Options:
 
 - `-t, --token`: The user token for authentication.
 
-  - When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
+  - When not specified in command, the upload will not proceed.
+
+- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.
+
+  - When not specified in command, the upload will not proceed.
 
 - `-p, --private / --public`: Whether datasets are private.
 
@@ -259,7 +267,11 @@ Options:
 
 - `-t, --token`: The user token for authentication.
 
-  - When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
+  - When not specified in command, the upload will not proceed.
+
+- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.
+
+  - When not specified in command, the upload will not proceed.
 
 - `-p, --private / --public`: Whether datasets are private.
 
@@ -345,7 +357,7 @@ Options:
 An example of privately uploading `./testimage` as dataset **test1** and `C:\tmp\testimage` as **test2** with metadata file in `./sample_metadata.json` (see next section), generating the output csv files in the current folder, and trigger the orthomosaic process when uploading is finished:
 
 ```bash
-geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o
+geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o -t ********* -w ****
 ```
 
 The metadata specified in the json file will override the global settings, e.g. `is_private`.  
@@ -507,6 +519,8 @@ PS C:\Users\uqtlan> geonadir-cli range-dataset -- 24 -34 29 -27
 
 Usage: `geonadir-cli get-dataset-info <DATASET_ID>`
 
+If the dataset is not available on FAIRGEO, user can provide a `-t <TOKEN>` to check metadata that they have access to.
+
 sample usage and output:
 
 ```bash

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,7 @@ requires-python = ">=3.8.0"  # >= 3.6.0
 license = {text = "MIT"}  # Apache 2.0 ?
 authors = [
     { name = "T Lan", email = "[email protected]" },
+    { name = "Joan Li", email = "[email protected]" },
 ]
 keywords=[]
 dependencies = [
@@ -34,7 +35,7 @@ classifiers = [
 dynamic = ["version"]
 
 [project.urls]
-homepage = "https://github.com/ternaustralia/geonadir-upload-cli"
+homepage = "https://github.com/geonadir/geonadir-upload-cli"
 
 [project.scripts]
 geonadir-cli = "geonadir_upload_cli.cli:cli"

diff --git a/src/geonadir_upload_cli/cli.py b/src/geonadir_upload_cli/cli.py
@@ -61,6 +61,7 @@ def cli():
 )
 @click.password_option(
     "--token", "-t",
+    required=True,
     help="User token for authentication.",
 )
 @click.option(
@@ -135,6 +136,12 @@ def cli():
     help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \
 Leave default or set 0 to skip dataset existence check and upload to new dataset insetad."
 )
+@click.option(
+    "--workspace-id", "-w",    
+    required=True,
+    type=click.IntRange(0, max_open=True),
+    help="Please enter the workspace you'd like to upload to"
+)
 def local_upload(**kwargs):
     """upload local images
     """
@@ -157,7 +164,8 @@ def local_upload(**kwargs):
     help="Base url of geonadir api.",
 )
 @click.password_option(
-    "--token", "-t",
+    "--token", "-t",    
+    required=True,
     help="User token for authentication.",
 )
 @click.option(
@@ -267,6 +275,12 @@ def local_upload(**kwargs):
     help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \
 Leave default or set 0 to skip dataset existence check and upload to new dataset insetad."
 )
+@click.option(
+    "--workspace-id", "-w",
+    required=True,
+    type=click.IntRange(0, max_open=True),
+    help="Please enter the workspace you'd like to upload to"
+)
 def collection_upload(**kwargs):
     """upload dataset from valid STAC collection object
     """
@@ -290,6 +304,7 @@ def collection_upload(**kwargs):
 )
 @click.password_option(
     "--token", "-t",
+    required=True,
     help="User token for authentication.",
 )
 @click.option(
@@ -400,6 +415,12 @@ def collection_upload(**kwargs):
     required=False,
     help="Retry interval second for uploading single image.",
 )
+@click.option(
+    "--workspace-id", "-w",
+    required=True,
+    type=click.IntRange(0, max_open=True),
+    help="Please enter the workspace you'd like to upload to"
+)
 def catalog_upload(**kwargs):
     """upload dataset from valid STAC catalog object
     """
@@ -498,14 +519,23 @@ def range_dataset(**kwargs):
     help="Whether output csv is created. Generate output at the specified path. Default is false. \
 If flagged without specifing output folder, default is the current path of your terminal.",
 )
+@click.option(
+    "--token", "-t",
+    required=False,
+    default="",
+    help="Token for authentication if user want to check the non-FAIRGeo dataset.",
+)
 @click.argument('project-id')
 def get_dataset_info(**kwargs):
     """get metadata of dataset given dataset id
     """
     base_url = kwargs.get("base_url")
     project_id = kwargs.get("project_id")
     output = kwargs.get("output_folder", None)
-    result = dataset_info(project_id, base_url)
+    token = kwargs.get("token")
+    token = "Token " + token
+    logger.debug(f"token: {token}")
+    result = dataset_info(project_id, base_url, token)
     print(json.dumps(result, indent=4))
     if output:
         path = os.path.join(output, "data.json")

diff --git a/src/geonadir_upload_cli/dataset.py b/src/geonadir_upload_cli/dataset.py
@@ -63,7 +63,7 @@ def create_dataset(payload_data, base_url, token):
     return dataset_id
 
 
-def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, retry_interval, timeout):
+def upload_images(dataset_name, dataset_id, workspace_id, img_dir, base_url, token, max_retry, retry_interval, timeout):
     """
     Upload images from a directory to a dataset.
 
@@ -122,7 +122,8 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry,
             upload_time = end_time - start_time
             df = pd.DataFrame(
                 {
-                    "Project ID": dataset_id,
+                    "Workspace ID": workspace_id,
+                    "Dataset ID": dataset_id,
                     "Dataset Name": dataset_name,
                     "Image Name": file_path,
                     "Response Code": response_code,
@@ -144,6 +145,7 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry,
 def upload_images_from_collection(
         dataset_name,
         dataset_id,
+        workspace_id,
         collection,
         base_url,
         token,
@@ -221,7 +223,8 @@ def upload_images_from_collection(
             upload_time = end_time - start_time
             df = pd.DataFrame(
                 {
-                    "Project ID": dataset_id,
+                    "Workspace ID": workspace_id,
+                    "Dataset ID": dataset_id,
                     "Dataset Name": dataset_name,
                     "Image Name": file_path,
                     "Response Code": response_code,
@@ -344,7 +347,7 @@ def search_datasets(search_str, base_url):
     return response.json()
 
 
-def dataset_info(project_id, base_url):
+def dataset_info(project_id, base_url, token):
     """show dataset info of given id. return 'Metadata not found' if not found.
     sample output:
     {
@@ -397,13 +400,17 @@ def dataset_info(project_id, base_url):
     """
     logger.info(f"getting GN dataset info for {project_id}")
     logger.debug(f"url: {base_url}/api/metadata/")
+    headers = {
+        "Authorization": token
+    }
     payload = {
         "project_id": project_id
     }
     logger.debug(f"params: {payload}")
 
     response = requests.get(
         f"{base_url}/api/metadata/",
+        headers=headers,
         params=payload,
         timeout=180,
     )

diff --git a/src/geonadir_upload_cli/parallel.py b/src/geonadir_upload_cli/parallel.py
@@ -21,6 +21,7 @@
 
 def process_thread(
     dataset_id,
+    workspace_id,
     dataset_name,
     img_dir,
     base_url,
@@ -74,7 +75,8 @@ def process_thread(
         payload_data = {
             "dataset_name": dataset_name,
             "is_private": private,
-            "is_published": True
+            "is_published": True,
+            "workspace": workspace_id
         }
 
         # retrieve metadata from STAC collection if applicable
@@ -141,6 +143,7 @@ def process_thread(
             result_df = upload_images_from_collection(
                 dataset_name,
                 dataset_id,
+                workspace_id,
                 img_dir,
                 base_url,
                 token,
@@ -153,6 +156,7 @@ def process_thread(
             result_df = upload_images(
                 dataset_name,
                 dataset_id,
+                workspace_id,
                 img_dir,
                 base_url,
                 token,

diff --git a/src/geonadir_upload_cli/upload.py b/src/geonadir_upload_cli/upload.py
@@ -55,10 +55,13 @@ def normal_upload(**kwargs):
     retry_interval = kwargs.get("retry_interval")
     timeout = kwargs.get("timeout")
     dataset_id = kwargs.get("dataset_id")
+    workspace_id = kwargs.get("workspace_id")
     existing_dataset_name = ""
+    token = "Token " + token
+    logger.debug(f"token: {token}")
     if dataset_id:
         logger.debug(f"searching for metadata of dataset {dataset_id}")
-        result = dataset_info(dataset_id, base_url)
+        result = dataset_info(dataset_id, base_url, token)
         if result == "Metadata not found":
             raise Exception(f"Dataset id {dataset_id} invalid.")
         logger.info(f"Upload to existing dataset id: {dataset_id}")
@@ -79,6 +82,7 @@ def normal_upload(**kwargs):
         logger.info(f"max_retry: {max_retry} times")
         logger.info(f"retry_interval: {retry_interval} sec")
         logger.info(f"timeout: {timeout} sec")
+        logger.info(f"workspace: {workspace_id} sec")
         for count, i in enumerate(item):
             logger.info(f"--item {count + 1}:")
             dataset_name, image_location = i
@@ -104,8 +108,7 @@ def normal_upload(**kwargs):
         return
 
     logger.info(base_url)
-    token = "Token " + token
-    logger.debug(f"token: {token}")
+
     if metadata_json:
         with open(metadata_json) as f:
             metadata = json.load(f)
@@ -136,6 +139,7 @@ def normal_upload(**kwargs):
         dataset_details.append(
             (
                 dataset_id,
+                workspace_id,
                 dataset_name,
                 image_location,
                 base_url,
@@ -178,10 +182,13 @@ def upload_from_collection(**kwargs):
     retry_interval = kwargs.get("retry_interval")
     timeout = kwargs.get("timeout")
     dataset_id = kwargs.get("dataset_id")
+    workspace_id = kwargs.get("workspace_id")
     existing_dataset_name = ""
+    token = "Token " + token
+    logger.debug(f"token: {token}")
     if dataset_id:
         logger.debug(f"searching for metadata of dataset {dataset_id}")
-        result = dataset_info(dataset_id, base_url)
+        result = dataset_info(dataset_id, base_url, token)
         if result == "Metadata not found":
             raise Exception(f"Dataset id {dataset_id} invalid.")
         logger.info(f"Upload to existing dataset id: {dataset_id}")
@@ -207,6 +214,7 @@ def upload_from_collection(**kwargs):
         logger.info(f"max_retry: {max_retry} times")
         logger.info(f"retry_interval: {retry_interval} sec")
         logger.info(f"timeout: {timeout} sec")
+        logger.info(f"workspace: {workspace_id}")
         if exclude:
             logger.info(f"excluding keywords: {str(exclude)}")
         if include:
@@ -267,7 +275,6 @@ def upload_from_collection(**kwargs):
         return
 
     logger.info(base_url)
-    token = "Token " + token
     if not dataset_id:
         if metadata_json:
             with open(metadata_json) as f:
@@ -317,6 +324,7 @@ def upload_from_collection(**kwargs):
         dataset_details.append(
             (
                 dataset_id,
+                workspace_id,
                 dataset_name,
                 image_location,
                 base_url,