diff --git a/.gitignore b/.gitignore index de6f411..f683509 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ app/__pycache__ build/ .pytest_cache __version__.py +*.csv +tests/* # Node artifact files node_modules/ diff --git a/README.md b/README.md index c982b70..ffa8da1 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. @@ -148,7 +152,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. @@ -259,7 +267,11 @@ Options: - `-t, --token`: The user token for authentication. - - When not specified in command, there will be a password prompt for it. (recommended for security’s sake) + - When not specified in command, the upload will not proceed. + +- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to. + + - When not specified in command, the upload will not proceed. - `-p, --private / --public`: Whether datasets are private. @@ -345,7 +357,7 @@ Options: An example of privately uploading `./testimage` as dataset **test1** and `C:\tmp\testimage` as **test2** with metadata file in `./sample_metadata.json` (see next section), generating the output csv files in the current folder, and trigger the orthomosaic process when uploading is finished: ```bash -geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o +geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o -t ********* -w **** ``` The metadata specified in the json file will override the global settings, e.g. `is_private`. @@ -507,6 +519,8 @@ PS C:\Users\uqtlan> geonadir-cli range-dataset -- 24 -34 29 -27 Usage: `geonadir-cli get-dataset-info ` +If the dataset is not available on FAIRGEO, user can provide a `-t ` to check metadata that they have access to. + sample usage and output: ```bash diff --git a/pyproject.toml b/pyproject.toml index 27a64e3..bf794eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ requires-python = ">=3.8.0" # >= 3.6.0 license = {text = "MIT"} # Apache 2.0 ? authors = [ { name = "T Lan", email = "t.lan@uq.edu.au" }, + { name = "Joan Li", email = "joan@geonadir.com" }, ] keywords=[] dependencies = [ @@ -34,7 +35,7 @@ classifiers = [ dynamic = ["version"] [project.urls] -homepage = "https://github.com/ternaustralia/geonadir-upload-cli" +homepage = "https://github.com/geonadir/geonadir-upload-cli" [project.scripts] geonadir-cli = "geonadir_upload_cli.cli:cli" diff --git a/src/geonadir_upload_cli/cli.py b/src/geonadir_upload_cli/cli.py index 903ceb7..e2e5892 100644 --- a/src/geonadir_upload_cli/cli.py +++ b/src/geonadir_upload_cli/cli.py @@ -61,6 +61,7 @@ def cli(): ) @click.password_option( "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -135,6 +136,12 @@ def cli(): help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \ Leave default or set 0 to skip dataset existence check and upload to new dataset insetad." ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def local_upload(**kwargs): """upload local images """ @@ -157,7 +164,8 @@ def local_upload(**kwargs): help="Base url of geonadir api.", ) @click.password_option( - "--token", "-t", + "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -267,6 +275,12 @@ def local_upload(**kwargs): help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \ Leave default or set 0 to skip dataset existence check and upload to new dataset insetad." ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def collection_upload(**kwargs): """upload dataset from valid STAC collection object """ @@ -290,6 +304,7 @@ def collection_upload(**kwargs): ) @click.password_option( "--token", "-t", + required=True, help="User token for authentication.", ) @click.option( @@ -400,6 +415,12 @@ def collection_upload(**kwargs): required=False, help="Retry interval second for uploading single image.", ) +@click.option( + "--workspace-id", "-w", + required=True, + type=click.IntRange(0, max_open=True), + help="Please enter the workspace you'd like to upload to" +) def catalog_upload(**kwargs): """upload dataset from valid STAC catalog object """ @@ -498,6 +519,12 @@ def range_dataset(**kwargs): help="Whether output csv is created. Generate output at the specified path. Default is false. \ If flagged without specifing output folder, default is the current path of your terminal.", ) +@click.option( + "--token", "-t", + required=False, + default="", + help="Token for authentication if user want to check the non-FAIRGeo dataset.", +) @click.argument('project-id') def get_dataset_info(**kwargs): """get metadata of dataset given dataset id @@ -505,7 +532,10 @@ def get_dataset_info(**kwargs): base_url = kwargs.get("base_url") project_id = kwargs.get("project_id") output = kwargs.get("output_folder", None) - result = dataset_info(project_id, base_url) + token = kwargs.get("token") + token = "Token " + token + logger.debug(f"token: {token}") + result = dataset_info(project_id, base_url, token) print(json.dumps(result, indent=4)) if output: path = os.path.join(output, "data.json") diff --git a/src/geonadir_upload_cli/dataset.py b/src/geonadir_upload_cli/dataset.py index f07e14d..02f4907 100644 --- a/src/geonadir_upload_cli/dataset.py +++ b/src/geonadir_upload_cli/dataset.py @@ -63,7 +63,7 @@ def create_dataset(payload_data, base_url, token): return dataset_id -def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, retry_interval, timeout): +def upload_images(dataset_name, dataset_id, workspace_id, img_dir, base_url, token, max_retry, retry_interval, timeout): """ Upload images from a directory to a dataset. @@ -122,7 +122,8 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, upload_time = end_time - start_time df = pd.DataFrame( { - "Project ID": dataset_id, + "Workspace ID": workspace_id, + "Dataset ID": dataset_id, "Dataset Name": dataset_name, "Image Name": file_path, "Response Code": response_code, @@ -144,6 +145,7 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, def upload_images_from_collection( dataset_name, dataset_id, + workspace_id, collection, base_url, token, @@ -221,7 +223,8 @@ def upload_images_from_collection( upload_time = end_time - start_time df = pd.DataFrame( { - "Project ID": dataset_id, + "Workspace ID": workspace_id, + "Dataset ID": dataset_id, "Dataset Name": dataset_name, "Image Name": file_path, "Response Code": response_code, @@ -344,7 +347,7 @@ def search_datasets(search_str, base_url): return response.json() -def dataset_info(project_id, base_url): +def dataset_info(project_id, base_url, token): """show dataset info of given id. return 'Metadata not found' if not found. sample output: { @@ -397,6 +400,9 @@ def dataset_info(project_id, base_url): """ logger.info(f"getting GN dataset info for {project_id}") logger.debug(f"url: {base_url}/api/metadata/") + headers = { + "Authorization": token + } payload = { "project_id": project_id } @@ -404,6 +410,7 @@ def dataset_info(project_id, base_url): response = requests.get( f"{base_url}/api/metadata/", + headers=headers, params=payload, timeout=180, ) diff --git a/src/geonadir_upload_cli/parallel.py b/src/geonadir_upload_cli/parallel.py index f032ae1..509229e 100644 --- a/src/geonadir_upload_cli/parallel.py +++ b/src/geonadir_upload_cli/parallel.py @@ -21,6 +21,7 @@ def process_thread( dataset_id, + workspace_id, dataset_name, img_dir, base_url, @@ -74,7 +75,8 @@ def process_thread( payload_data = { "dataset_name": dataset_name, "is_private": private, - "is_published": True + "is_published": True, + "workspace": workspace_id } # retrieve metadata from STAC collection if applicable @@ -141,6 +143,7 @@ def process_thread( result_df = upload_images_from_collection( dataset_name, dataset_id, + workspace_id, img_dir, base_url, token, @@ -153,6 +156,7 @@ def process_thread( result_df = upload_images( dataset_name, dataset_id, + workspace_id, img_dir, base_url, token, diff --git a/src/geonadir_upload_cli/upload.py b/src/geonadir_upload_cli/upload.py index 5da2351..d5ea23c 100644 --- a/src/geonadir_upload_cli/upload.py +++ b/src/geonadir_upload_cli/upload.py @@ -55,10 +55,13 @@ def normal_upload(**kwargs): retry_interval = kwargs.get("retry_interval") timeout = kwargs.get("timeout") dataset_id = kwargs.get("dataset_id") + workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" + token = "Token " + token + logger.debug(f"token: {token}") if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") - result = dataset_info(dataset_id, base_url) + result = dataset_info(dataset_id, base_url, token) if result == "Metadata not found": raise Exception(f"Dataset id {dataset_id} invalid.") logger.info(f"Upload to existing dataset id: {dataset_id}") @@ -79,6 +82,7 @@ def normal_upload(**kwargs): logger.info(f"max_retry: {max_retry} times") logger.info(f"retry_interval: {retry_interval} sec") logger.info(f"timeout: {timeout} sec") + logger.info(f"workspace: {workspace_id} sec") for count, i in enumerate(item): logger.info(f"--item {count + 1}:") dataset_name, image_location = i @@ -104,8 +108,7 @@ def normal_upload(**kwargs): return logger.info(base_url) - token = "Token " + token - logger.debug(f"token: {token}") + if metadata_json: with open(metadata_json) as f: metadata = json.load(f) @@ -136,6 +139,7 @@ def normal_upload(**kwargs): dataset_details.append( ( dataset_id, + workspace_id, dataset_name, image_location, base_url, @@ -178,10 +182,13 @@ def upload_from_collection(**kwargs): retry_interval = kwargs.get("retry_interval") timeout = kwargs.get("timeout") dataset_id = kwargs.get("dataset_id") + workspace_id = kwargs.get("workspace_id") existing_dataset_name = "" + token = "Token " + token + logger.debug(f"token: {token}") if dataset_id: logger.debug(f"searching for metadata of dataset {dataset_id}") - result = dataset_info(dataset_id, base_url) + result = dataset_info(dataset_id, base_url, token) if result == "Metadata not found": raise Exception(f"Dataset id {dataset_id} invalid.") logger.info(f"Upload to existing dataset id: {dataset_id}") @@ -207,6 +214,7 @@ def upload_from_collection(**kwargs): logger.info(f"max_retry: {max_retry} times") logger.info(f"retry_interval: {retry_interval} sec") logger.info(f"timeout: {timeout} sec") + logger.info(f"workspace: {workspace_id}") if exclude: logger.info(f"excluding keywords: {str(exclude)}") if include: @@ -267,7 +275,6 @@ def upload_from_collection(**kwargs): return logger.info(base_url) - token = "Token " + token if not dataset_id: if metadata_json: with open(metadata_json) as f: @@ -317,6 +324,7 @@ def upload_from_collection(**kwargs): dataset_details.append( ( dataset_id, + workspace_id, dataset_name, image_location, base_url,