Skip to content

Commit

Permalink
Merge pull request #1 from Geonadir/workspace
Browse files Browse the repository at this point in the history
adding flag to point to specific workspace
  • Loading branch information
joanlyq authored Sep 24, 2024
2 parents 33ab005 + 4d716f9 commit ea56908
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ app/__pycache__
build/
.pytest_cache
__version__.py
*.csv
tests/*

# Node artifact files
node_modules/
Expand Down
22 changes: 18 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ Options:

- `-t, --token`: The user token for authentication.

- When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
- When not specified in command, the upload will not proceed.

- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.

- When not specified in command, the upload will not proceed.

- `-p, --private / --public`: Whether datasets are private.

Expand Down Expand Up @@ -148,7 +152,11 @@ Options:

- `-t, --token`: The user token for authentication.

- When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
- When not specified in command, the upload will not proceed.

- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.

- When not specified in command, the upload will not proceed.

- `-p, --private / --public`: Whether datasets are private.

Expand Down Expand Up @@ -259,7 +267,11 @@ Options:

- `-t, --token`: The user token for authentication.

- When not specified in command, there will be a password prompt for it. (recommended for security’s sake)
- When not specified in command, the upload will not proceed.

- `-w, --workspace-id`: The workspace that user would like the data to be uploaded to.

- When not specified in command, the upload will not proceed.

- `-p, --private / --public`: Whether datasets are private.

Expand Down Expand Up @@ -345,7 +357,7 @@ Options:
An example of privately uploading `./testimage` as dataset **test1** and `C:\tmp\testimage` as **test2** with metadata file in `./sample_metadata.json` (see next section), generating the output csv files in the current folder, and trigger the orthomosaic process when uploading is finished:

```bash
geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o
geonadir-cli local-upload -i test1 testimage -i test2 C:\tmp\testimage -p -m sample_metadata.json -o -t ********* -w ****
```

The metadata specified in the json file will override the global settings, e.g. `is_private`.
Expand Down Expand Up @@ -507,6 +519,8 @@ PS C:\Users\uqtlan> geonadir-cli range-dataset -- 24 -34 29 -27

Usage: `geonadir-cli get-dataset-info <DATASET_ID>`

If the dataset is not available on FAIRGEO, user can provide a `-t <TOKEN>` to check metadata that they have access to.

sample usage and output:

```bash
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ requires-python = ">=3.8.0" # >= 3.6.0
license = {text = "MIT"} # Apache 2.0 ?
authors = [
{ name = "T Lan", email = "[email protected]" },
{ name = "Joan Li", email = "[email protected]" },
]
keywords=[]
dependencies = [
Expand All @@ -34,7 +35,7 @@ classifiers = [
dynamic = ["version"]

[project.urls]
homepage = "https://github.com/ternaustralia/geonadir-upload-cli"
homepage = "https://github.com/geonadir/geonadir-upload-cli"

[project.scripts]
geonadir-cli = "geonadir_upload_cli.cli:cli"
Expand Down
34 changes: 32 additions & 2 deletions src/geonadir_upload_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def cli():
)
@click.password_option(
"--token", "-t",
required=True,
help="User token for authentication.",
)
@click.option(
Expand Down Expand Up @@ -135,6 +136,12 @@ def cli():
help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \
Leave default or set 0 to skip dataset existence check and upload to new dataset insetad."
)
@click.option(
"--workspace-id", "-w",
required=True,
type=click.IntRange(0, max_open=True),
help="Please enter the workspace you'd like to upload to"
)
def local_upload(**kwargs):
"""upload local images
"""
Expand All @@ -157,7 +164,8 @@ def local_upload(**kwargs):
help="Base url of geonadir api.",
)
@click.password_option(
"--token", "-t",
"--token", "-t",
required=True,
help="User token for authentication.",
)
@click.option(
Expand Down Expand Up @@ -267,6 +275,12 @@ def local_upload(**kwargs):
help="Existing Geonadir dataset id to be uploaded to. Only works when dataset id is valid. \
Leave default or set 0 to skip dataset existence check and upload to new dataset insetad."
)
@click.option(
"--workspace-id", "-w",
required=True,
type=click.IntRange(0, max_open=True),
help="Please enter the workspace you'd like to upload to"
)
def collection_upload(**kwargs):
"""upload dataset from valid STAC collection object
"""
Expand All @@ -290,6 +304,7 @@ def collection_upload(**kwargs):
)
@click.password_option(
"--token", "-t",
required=True,
help="User token for authentication.",
)
@click.option(
Expand Down Expand Up @@ -400,6 +415,12 @@ def collection_upload(**kwargs):
required=False,
help="Retry interval second for uploading single image.",
)
@click.option(
"--workspace-id", "-w",
required=True,
type=click.IntRange(0, max_open=True),
help="Please enter the workspace you'd like to upload to"
)
def catalog_upload(**kwargs):
"""upload dataset from valid STAC catalog object
"""
Expand Down Expand Up @@ -498,14 +519,23 @@ def range_dataset(**kwargs):
help="Whether output csv is created. Generate output at the specified path. Default is false. \
If flagged without specifing output folder, default is the current path of your terminal.",
)
@click.option(
"--token", "-t",
required=False,
default="",
help="Token for authentication if user want to check the non-FAIRGeo dataset.",
)
@click.argument('project-id')
def get_dataset_info(**kwargs):
"""get metadata of dataset given dataset id
"""
base_url = kwargs.get("base_url")
project_id = kwargs.get("project_id")
output = kwargs.get("output_folder", None)
result = dataset_info(project_id, base_url)
token = kwargs.get("token")
token = "Token " + token
logger.debug(f"token: {token}")
result = dataset_info(project_id, base_url, token)
print(json.dumps(result, indent=4))
if output:
path = os.path.join(output, "data.json")
Expand Down
15 changes: 11 additions & 4 deletions src/geonadir_upload_cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def create_dataset(payload_data, base_url, token):
return dataset_id


def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry, retry_interval, timeout):
def upload_images(dataset_name, dataset_id, workspace_id, img_dir, base_url, token, max_retry, retry_interval, timeout):
"""
Upload images from a directory to a dataset.
Expand Down Expand Up @@ -122,7 +122,8 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry,
upload_time = end_time - start_time
df = pd.DataFrame(
{
"Project ID": dataset_id,
"Workspace ID": workspace_id,
"Dataset ID": dataset_id,
"Dataset Name": dataset_name,
"Image Name": file_path,
"Response Code": response_code,
Expand All @@ -144,6 +145,7 @@ def upload_images(dataset_name, dataset_id, img_dir, base_url, token, max_retry,
def upload_images_from_collection(
dataset_name,
dataset_id,
workspace_id,
collection,
base_url,
token,
Expand Down Expand Up @@ -221,7 +223,8 @@ def upload_images_from_collection(
upload_time = end_time - start_time
df = pd.DataFrame(
{
"Project ID": dataset_id,
"Workspace ID": workspace_id,
"Dataset ID": dataset_id,
"Dataset Name": dataset_name,
"Image Name": file_path,
"Response Code": response_code,
Expand Down Expand Up @@ -344,7 +347,7 @@ def search_datasets(search_str, base_url):
return response.json()


def dataset_info(project_id, base_url):
def dataset_info(project_id, base_url, token):
"""show dataset info of given id. return 'Metadata not found' if not found.
sample output:
{
Expand Down Expand Up @@ -397,13 +400,17 @@ def dataset_info(project_id, base_url):
"""
logger.info(f"getting GN dataset info for {project_id}")
logger.debug(f"url: {base_url}/api/metadata/")
headers = {
"Authorization": token
}
payload = {
"project_id": project_id
}
logger.debug(f"params: {payload}")

response = requests.get(
f"{base_url}/api/metadata/",
headers=headers,
params=payload,
timeout=180,
)
Expand Down
6 changes: 5 additions & 1 deletion src/geonadir_upload_cli/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

def process_thread(
dataset_id,
workspace_id,
dataset_name,
img_dir,
base_url,
Expand Down Expand Up @@ -74,7 +75,8 @@ def process_thread(
payload_data = {
"dataset_name": dataset_name,
"is_private": private,
"is_published": True
"is_published": True,
"workspace": workspace_id
}

# retrieve metadata from STAC collection if applicable
Expand Down Expand Up @@ -141,6 +143,7 @@ def process_thread(
result_df = upload_images_from_collection(
dataset_name,
dataset_id,
workspace_id,
img_dir,
base_url,
token,
Expand All @@ -153,6 +156,7 @@ def process_thread(
result_df = upload_images(
dataset_name,
dataset_id,
workspace_id,
img_dir,
base_url,
token,
Expand Down
18 changes: 13 additions & 5 deletions src/geonadir_upload_cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,13 @@ def normal_upload(**kwargs):
retry_interval = kwargs.get("retry_interval")
timeout = kwargs.get("timeout")
dataset_id = kwargs.get("dataset_id")
workspace_id = kwargs.get("workspace_id")
existing_dataset_name = ""
token = "Token " + token
logger.debug(f"token: {token}")
if dataset_id:
logger.debug(f"searching for metadata of dataset {dataset_id}")
result = dataset_info(dataset_id, base_url)
result = dataset_info(dataset_id, base_url, token)
if result == "Metadata not found":
raise Exception(f"Dataset id {dataset_id} invalid.")
logger.info(f"Upload to existing dataset id: {dataset_id}")
Expand All @@ -79,6 +82,7 @@ def normal_upload(**kwargs):
logger.info(f"max_retry: {max_retry} times")
logger.info(f"retry_interval: {retry_interval} sec")
logger.info(f"timeout: {timeout} sec")
logger.info(f"workspace: {workspace_id} sec")
for count, i in enumerate(item):
logger.info(f"--item {count + 1}:")
dataset_name, image_location = i
Expand All @@ -104,8 +108,7 @@ def normal_upload(**kwargs):
return

logger.info(base_url)
token = "Token " + token
logger.debug(f"token: {token}")

if metadata_json:
with open(metadata_json) as f:
metadata = json.load(f)
Expand Down Expand Up @@ -136,6 +139,7 @@ def normal_upload(**kwargs):
dataset_details.append(
(
dataset_id,
workspace_id,
dataset_name,
image_location,
base_url,
Expand Down Expand Up @@ -178,10 +182,13 @@ def upload_from_collection(**kwargs):
retry_interval = kwargs.get("retry_interval")
timeout = kwargs.get("timeout")
dataset_id = kwargs.get("dataset_id")
workspace_id = kwargs.get("workspace_id")
existing_dataset_name = ""
token = "Token " + token
logger.debug(f"token: {token}")
if dataset_id:
logger.debug(f"searching for metadata of dataset {dataset_id}")
result = dataset_info(dataset_id, base_url)
result = dataset_info(dataset_id, base_url, token)
if result == "Metadata not found":
raise Exception(f"Dataset id {dataset_id} invalid.")
logger.info(f"Upload to existing dataset id: {dataset_id}")
Expand All @@ -207,6 +214,7 @@ def upload_from_collection(**kwargs):
logger.info(f"max_retry: {max_retry} times")
logger.info(f"retry_interval: {retry_interval} sec")
logger.info(f"timeout: {timeout} sec")
logger.info(f"workspace: {workspace_id}")
if exclude:
logger.info(f"excluding keywords: {str(exclude)}")
if include:
Expand Down Expand Up @@ -267,7 +275,6 @@ def upload_from_collection(**kwargs):
return

logger.info(base_url)
token = "Token " + token
if not dataset_id:
if metadata_json:
with open(metadata_json) as f:
Expand Down Expand Up @@ -317,6 +324,7 @@ def upload_from_collection(**kwargs):
dataset_details.append(
(
dataset_id,
workspace_id,
dataset_name,
image_location,
base_url,
Expand Down

0 comments on commit ea56908

Please sign in to comment.