From e1d67761ff75eec2525be5c42a67548e290d112d Mon Sep 17 00:00:00 2001 From: joanlyq Date: Thu, 22 Aug 2024 18:33:42 +1000 Subject: [PATCH 1/7] updating to accommodate workspace implementation --- src/geonadir_upload_cli/cli.py | 23 ++++++++++++++++++++++- src/geonadir_upload_cli/dataset.py | 9 ++++++--- src/geonadir_upload_cli/parallel.py | 6 +++++- src/geonadir_upload_cli/upload.py | 6 ++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/geonadir_upload_cli/cli.py b/src/geonadir_upload_cli/cli.py index 903ceb7..38e1c68 100644 --- a/src/geonadir_upload_cli/cli.py +++ b/src/geonadir_upload_cli/cli.py @@ -61,6 +61,7 @@ def cli(): ) @click.password_option( "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -135,6 +136,12 @@ def cli(): help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \ Leave default or set 0 to skip dataset existence check and upload to new dataset insetad." ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def local_upload(**kwargs): """upload local images """ @@ -157,7 +164,8 @@ def local_upload(**kwargs): help="Base url of geonadir api.", ) @click.password_option( - "--token", "-t", + "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -267,6 +275,12 @@ def local_upload(**kwargs): help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \ Leave default or set 0 to skip dataset existence check and upload to new dataset insetad." ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def collection_upload(**kwargs): """upload dataset from valid STAC collection object """ @@ -290,6 +304,7 @@ def collection_upload(**kwargs): ) @click.password_option( "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -400,6 +415,12 @@ def collection_upload(**kwargs): required=False, help="Retry interval second for uploading single image.", ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def catalog_upload(**kwargs): """upload dataset from valid STAC catalog object """ diff --git a/src/geonadir_upload_cli/dataset.py b/src/geonadir_upload_cli/dataset.py index f07e14d..1d11380 100644 --- a/src/geonadir_upload_cli/dataset.py +++ b/src/geonadir_upload_cli/dataset.py @@ -63,7 +63,7 @@ def create_dataset(payload_data, base_url, token): return dataset_id -def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, retry_interval, timeout): +def upload_images(dataset_name, dataset_id, workspace_id, img_dir, base_url, token, max_retry, retry_interval, timeout): """ Upload images from a directory to a dataset. @@ -122,7 +122,8 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, upload_time = end_time - start_time df = pd.DataFrame( { - "Project ID": dataset_id, + "Workspace ID": workspace_id, + "Dataset ID": dataset_id, "Dataset Name": dataset_name, "Image Name": file_path, "Response Code": response_code, @@ -144,6 +145,7 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, def upload_images_from_collection( dataset_name, dataset_id, + workspace_id, collection, base_url, token, @@ -221,7 +223,8 @@ def upload_images_from_collection( upload_time = end_time - start_time df = pd.DataFrame( { - "Project ID": dataset_id, + "Workspace ID": workspace_id, + "Dataset ID": dataset_id, "Dataset Name": dataset_name, "Image Name": file_path, "Response Code": response_code, diff --git a/src/geonadir_upload_cli/parallel.py b/src/geonadir_upload_cli/parallel.py index f032ae1..509229e 100644 --- a/src/geonadir_upload_cli/parallel.py +++ b/src/geonadir_upload_cli/parallel.py @@ -21,6 +21,7 @@ def process_thread( dataset_id, + workspace_id, dataset_name, img_dir, base_url, @@ -74,7 +75,8 @@ def process_thread( payload_data = { "dataset_name": dataset_name, "is_private": private, - "is_published": True + "is_published": True, + "workspace": workspace_id } # retrieve metadata from STAC collection if applicable @@ -141,6 +143,7 @@ def process_thread( result_df = upload_images_from_collection( dataset_name, dataset_id, + workspace_id, img_dir, base_url, token, @@ -153,6 +156,7 @@ def process_thread( result_df = upload_images( dataset_name, dataset_id, + workspace_id, img_dir, base_url, token, diff --git a/src/geonadir_upload_cli/upload.py b/src/geonadir_upload_cli/upload.py index 5da2351..39eb8fd 100644 --- a/src/geonadir_upload_cli/upload.py +++ b/src/geonadir_upload_cli/upload.py @@ -55,6 +55,7 @@ def normal_upload(**kwargs): retry_interval = kwargs.get("retry_interval") timeout = kwargs.get("timeout") dataset_id = kwargs.get("dataset_id") + workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") @@ -79,6 +80,7 @@ def normal_upload(**kwargs): logger.info(f"max_retry: {max_retry} times") logger.info(f"retry_interval: {retry_interval} sec") logger.info(f"timeout: {timeout} sec") + logger.info(f"workspace: {workspace_id} sec") for count, i in enumerate(item): logger.info(f"--item {count + 1}:") dataset_name, image_location = i @@ -136,6 +138,7 @@ def normal_upload(**kwargs): dataset_details.append( ( dataset_id, + workspace_id, dataset_name, image_location, base_url, @@ -178,6 +181,7 @@ def upload_from_collection(**kwargs): retry_interval = kwargs.get("retry_interval") timeout = kwargs.get("timeout") dataset_id = kwargs.get("dataset_id") + workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") @@ -207,6 +211,7 @@ def upload_from_collection(**kwargs): logger.info(f"max_retry: {max_retry} times") logger.info(f"retry_interval: {retry_interval} sec") logger.info(f"timeout: {timeout} sec") + logger.info(f"workspace: {workspace_id}") if exclude: logger.info(f"excluding keywords: {str(exclude)}") if include: @@ -317,6 +322,7 @@ def upload_from_collection(**kwargs): dataset_details.append( ( dataset_id, + workspace_id, dataset_name, image_location, base_url, From 011ca858d4d09bcb9743c6a6a2e7826fcdfbcbc7 Mon Sep 17 00:00:00 2001 From: joanlyq Date: Thu, 22 Aug 2024 18:46:57 +1000 Subject: [PATCH 2/7] update README instructions --- .gitignore | 1 + README.md | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index de6f411..3679738 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ app/__pycache__ build/ .pytest_cache __version__.py +*.csv # Node artifact files node_modules/ diff --git a/README.md b/README.md index c982b70..05900a9 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. @@ -148,7 +152,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. @@ -259,7 +267,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. From ed24f7e7c1d37f3aa866041698b76a2cc5f41109 Mon Sep 17 00:00:00 2001 From: joanlyq Date: Thu, 22 Aug 2024 18:54:02 +1000 Subject: [PATCH 3/7] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 05900a9..beaeef1 100644 --- a/README.md +++ b/README.md @@ -357,7 +357,7 @@ Options: An example of privately uploading `./testimage` as dataset **test1** and `C:\tmp\testimage` as **test2** with metadata file in `./sample_metadata.json` (see next section), generating the output csv files in the current folder, and trigger the orthomosaic process when uploading is finished: ```bash -geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o +geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o -t ********* -w **** ``` The metadata specified in the json file will override the global settings, e.g. `is_private`. From 89b63533dd225baccdd306a96219736a779decd7 Mon Sep 17 00:00:00 2001 From: joanlyq Date: Mon, 26 Aug 2024 17:10:58 +1000 Subject: [PATCH 4/7] Allow checking non FARGEO metadata with a token --- .gitignore | 1 + src/geonadir_upload_cli/cli.py | 10 +++++++++- src/geonadir_upload_cli/dataset.py | 6 +++++- src/geonadir_upload_cli/upload.py | 12 +++++++----- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 3679738..7551630 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ build/ .pytest_cache __version__.py *.csv +tests/api_request_test.py # Node artifact files node_modules/ diff --git a/src/geonadir_upload_cli/cli.py b/src/geonadir_upload_cli/cli.py index 38e1c68..763065e 100644 --- a/src/geonadir_upload_cli/cli.py +++ b/src/geonadir_upload_cli/cli.py @@ -519,6 +519,11 @@ def range_dataset(**kwargs): help="Whether output csv is created. Generate output at the specified path. Default is false. \ If flagged without specifing output folder, default is the current path of your terminal.", ) +@click.password_option( + "--token", "-t", + required=True, + help="Token for authentication if user want to check the non-FAIRGeo dataset.", +) @click.argument('project-id') def get_dataset_info(**kwargs): """get metadata of dataset given dataset id @@ -526,7 +531,10 @@ def get_dataset_info(**kwargs): base_url = kwargs.get("base_url") project_id = kwargs.get("project_id") output = kwargs.get("output_folder", None) - result = dataset_info(project_id, base_url) + token = kwargs.get("token") + token = "Token " + token + logger.debug(f"token: {token}") + result = dataset_info(project_id, base_url, token) print(json.dumps(result, indent=4)) if output: path = os.path.join(output, "data.json") diff --git a/src/geonadir_upload_cli/dataset.py b/src/geonadir_upload_cli/dataset.py index 1d11380..02f4907 100644 --- a/src/geonadir_upload_cli/dataset.py +++ b/src/geonadir_upload_cli/dataset.py @@ -347,7 +347,7 @@ def search_datasets(search_str, base_url): return response.json() -def dataset_info(project_id, base_url): +def dataset_info(project_id, base_url, token): """show dataset info of given id. return 'Metadata not found' if not found. sample output: { @@ -400,6 +400,9 @@ def dataset_info(project_id, base_url): """ logger.info(f"getting GN dataset info for {project_id}") logger.debug(f"url: {base_url}/api/metadata/") + headers = { + "Authorization": token + } payload = { "project_id": project_id } @@ -407,6 +410,7 @@ def dataset_info(project_id, base_url): response = requests.get( f"{base_url}/api/metadata/", + headers=headers, params=payload, timeout=180, ) diff --git a/src/geonadir_upload_cli/upload.py b/src/geonadir_upload_cli/upload.py index 39eb8fd..d5ea23c 100644 --- a/src/geonadir_upload_cli/upload.py +++ b/src/geonadir_upload_cli/upload.py @@ -57,9 +57,11 @@ def normal_upload(**kwargs): dataset_id = kwargs.get("dataset_id") workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" + token = "Token " + token + logger.debug(f"token: {token}") if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") - result = dataset_info(dataset_id, base_url) + result = dataset_info(dataset_id, base_url, token) if result == "Metadata not found": raise Exception(f"Dataset id {dataset_id} invalid.") logger.info(f"Upload to existing dataset id: {dataset_id}") @@ -106,8 +108,7 @@ def normal_upload(**kwargs): return logger.info(base_url) - token = "Token " + token - logger.debug(f"token: {token}") + if metadata_json: with open(metadata_json) as f: metadata = json.load(f) @@ -183,9 +184,11 @@ def upload_from_collection(**kwargs): dataset_id = kwargs.get("dataset_id") workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" + token = "Token " + token + logger.debug(f"token: {token}") if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") - result = dataset_info(dataset_id, base_url) + result = dataset_info(dataset_id, base_url, token) if result == "Metadata not found": raise Exception(f"Dataset id {dataset_id} invalid.") logger.info(f"Upload to existing dataset id: {dataset_id}") @@ -272,7 +275,6 @@ def upload_from_collection(**kwargs): return logger.info(base_url) - token = "Token " + token if not dataset_id: if metadata_json: with open(metadata_json) as f: From e52309c83c6ee63a573f432e5f12c383766b922e Mon Sep 17 00:00:00 2001 From: joanlyq Date: Mon, 26 Aug 2024 17:11:26 +1000 Subject: [PATCH 5/7] update README --- README.md | 2 ++ src/geonadir_upload_cli/cli.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index beaeef1..ffa8da1 100644 --- a/README.md +++ b/README.md @@ -519,6 +519,8 @@ PS C:\Users\uqtlan> geonadir-cli range-dataset -- 24 -34 29 -27 Usage: `geonadir-cli get-dataset-info ` +If the dataset is not available on FAIRGEO, user can provide a `-t ` to check metadata that they have access to. + sample usage and output: ```bash diff --git a/src/geonadir_upload_cli/cli.py b/src/geonadir_upload_cli/cli.py index 763065e..e2e5892 100644 --- a/src/geonadir_upload_cli/cli.py +++ b/src/geonadir_upload_cli/cli.py @@ -519,9 +519,10 @@ def range_dataset(**kwargs): help="Whether output csv is created. Generate output at the specified path. Default is false. \ If flagged without specifing output folder, default is the current path of your terminal.", ) -@click.password_option( +@click.option( "--token", "-t", - required=True, + required=False, + default="", help="Token for authentication if user want to check the non-FAIRGeo dataset.", ) @click.argument('project-id') From f1b27c41ab4b3d6771c3441893f4d98db0daa004 Mon Sep 17 00:00:00 2001 From: joanlyq Date: Wed, 25 Sep 2024 07:19:08 +1000 Subject: [PATCH 6/7] update gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7551630..f683509 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ build/ .pytest_cache __version__.py *.csv -tests/api_request_test.py +tests/* # Node artifact files node_modules/ From 4d716f95cb83e709a570150eb95f60ce7ff58ec0 Mon Sep 17 00:00:00 2001 From: joanlyq Date: Wed, 25 Sep 2024 07:52:06 +1000 Subject: [PATCH 7/7] update pyproject --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 27a64e3..bf794eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ requires-python = ">=3.8.0" # >= 3.6.0 license = {text = "MIT"} # Apache 2.0 ? authors = [ { name = "T Lan", email = "t.lan@uq.edu.au" }, + { name = "Joan Li", email = "joan@geonadir.com" }, ] keywords=[] dependencies = [ @@ -34,7 +35,7 @@ classifiers = [ dynamic = ["version"] [project.urls] -homepage = "https://github.com/ternaustralia/geonadir-upload-cli" +homepage = "https://github.com/geonadir/geonadir-upload-cli" [project.scripts] geonadir-cli = "geonadir_upload_cli.cli:cli"