Skip to content

Commit

Permalink
X
Browse files Browse the repository at this point in the history
  • Loading branch information
John Major committed Nov 18, 2024
1 parent 19e479c commit 8a05b40
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 40 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ Similar to `pytest`, but more extensive. Largely useful in development work. Th
- `python tx.py 2 `.

### Run the bloom UI
- `source source run_bloomui.sh`
- `source run_bloomui.sh`

### Run the pgadmin UI
- `source bloom_lims/env/install_pgadmin.sh`
Expand Down
113 changes: 92 additions & 21 deletions bloom_lims/bobjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from .logging_config import setup_logging
from datetime import datetime, timedelta, date, UTC

from bloom_lims.db import BLOOMdb3


os.makedirs("logs", exist_ok=True)

Expand Down Expand Up @@ -671,7 +673,7 @@ def get_by_euid(self, euid):
else:
return combined_result[0]

# This is the mechanism for finding the database object(s) which math the template reference pattern
# This is the mechanism for finding the database object(s) which match the template reference pattern
# V2... why?
def query_instance_by_component_v2(
self, super_type=None, btype=None, b_sub_type=None, version=None
Expand Down Expand Up @@ -2828,6 +2830,8 @@ def create_file(
):
file_properties = {"properties": file_metadata}

import_or_remote = file_metadata['import_or_remote']

new_file = self.create_instance(
self.query_template_by_component_v2("file", "file", "generic", "1.0")[
0
Expand Down Expand Up @@ -2876,7 +2880,7 @@ def create_file(
if file_data or url or full_path_to_file or s3_uri:
try:
new_file = self.add_file_data(
new_file.euid, file_data, file_name, url, full_path_to_file, s3_uri, addl_tags=addl_tags
new_file.euid, file_data, file_name, url, full_path_to_file, s3_uri, addl_tags=addl_tags, import_or_remote=import_or_remote
)
except Exception as e:
logging.exception(f"Error adding file data: {e}")
Expand All @@ -2885,8 +2889,8 @@ def create_file(
self.session.commit()
raise Exception(e)
else:
logging.warning(f"No data provided for file creation: {file_data, url}")
new_file.bstatus = "no file data provided"
logging.warning(f"No data provided for file creation, or import skipped ({import_or_remote}): {file_data, url}")
new_file.bstatus = f"no file data provided or {import_or_remote} is not 'import'"
self.session.commit()

if create_locked:
Expand All @@ -2895,14 +2899,32 @@ def create_file(
return new_file


def sanitize_tag(self, value):
"""Sanitize the tag value to conform to AWS tag requirements."""
def sanitize_tag(self, value, is_key=False):
"""
Sanitize the tag key or value to conform to AWS tag requirements by replacing disallowed characters.
sanitized_value = urllib.parse.quote(value, safe='_.:/=+-@ ')
# Trim the string to the maximum allowed length (256 characters for tag values)
sanitized_value = sanitized_value[:255]
Parameters:
- value (str): The tag key or value to sanitize.
- is_key (bool): If True, sanitize as a tag key (128-character limit).
If False, sanitize as a tag value (256-character limit).
return sanitized_value # sanitized_value if sanitized_value != value else value
Returns:
- str: Sanitized tag key or value.
"""
# AWS tag key or value allowed characters
allowed_characters_regex = r'[^a-zA-Z0-9 _\.:/=+\-@]'

# Replace disallowed characters with '_'
sanitized_value = re.sub(allowed_characters_regex, '_', value)

# Trim leading and trailing spaces (not allowed by AWS)
sanitized_value = sanitized_value.strip()

# Enforce maximum length
max_length = 128 if is_key else 256
sanitized_value = sanitized_value[:max_length]

return sanitized_value

def format_addl_tags(self, add_tags):
if not isinstance(add_tags, dict):
Expand All @@ -2927,11 +2949,15 @@ def add_file_data(
full_path_to_file=None,
s3_uri=None,
addl_tags={},
import_or_remote=None
):
file_instance = self.get_by_euid(euid)
s3_bucket_name = file_instance.json_addl["properties"]["current_s3_bucket_name"]
file_properties = {}

if import_or_remote in ["Remote", "remote"] and (file_data is not None or url is not None or full_path_to_file is not None):
raise ValueError("Remote file management is only supported with internal S3 URI.")

addl_tag_string = self.format_addl_tags(addl_tags)
if len(addl_tag_string) > 0:
addl_tag_string = f"&{addl_tag_string}"
Expand All @@ -2954,6 +2980,8 @@ def add_file_data(
# Check if a file with the same EUID already exists in the bucket
s3_key_path = "/".join(s3_key.split("/")[:-1])
s3_key_path = s3_key_path + "/" if len(s3_key_path) > 0 else ""


existing_files = self.s3_client.list_objects_v2(
Bucket=s3_bucket_name, Prefix=f"{s3_key_path}{euid}."
)
Expand All @@ -2965,17 +2993,52 @@ def add_file_data(
f"A file with EUID {euid} already exists in bucket {s3_bucket_name} {s3_key_path}."
)

if import_or_remote in ["Remote", "remote"] and s3_uri is not None:
# Check if a remote file with the same metadata already exists

search_criteria = {"properties": {"current_s3_uri": s3_uri}}
existing_euids = self.search_objs_by_addl_metadata(search_criteria,True,super_type="file", btype="file",b_sub_type="generic")

if len(existing_euids) > 0:
raise Exception(f"Remote file with URI {s3_uri} already exists in the database as {existing_euids}.")

# Store metadata for the remote file
file_properties = {
"remote_s3_uri": s3_uri,
"original_file_name": file_name,
"name": file_name,
"original_file_size_bytes": None, # Size is unknown for remote files
"original_file_suffix": file_suffix,
"original_file_data_type": "remote",
"file_type": file_suffix,
"current_s3_uri": s3_uri,
"original_s3_uri": s3_uri,
"current_s3_key": "/".join(s3_uri.split("/")[3:]),
"current_s3_bucket_name": s3_uri.split("/")[2],
}

_update_recursive(file_instance.json_addl["properties"], file_properties)
flag_modified(file_instance, "json_addl")
self.session.commit()
return file_instance

try:
if file_data:
file_data.seek(0) # Ensure the file pointer is at the beginning
file_size = len(file_data.read())
file_data.seek(0) # Reset the file pointer after reading
self.s3_client.put_object(
Bucket=s3_bucket_name,
Key=s3_key,
Body=file_data,
Tagging=f"creating_service=dewey&original_file_name={self.sanitize_tag(file_name)}&original_file_path=N/A&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}"
)

try:
self.s3_client.put_object(
Bucket=s3_bucket_name,
Key=s3_key,
Body=file_data,
Tagging=f"creating_service=dewey&original_file_name={self.sanitize_tag(file_name)}&original_file_path=N/A&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}"
)

except Exception as e:
self.logger.exception(f"Error uploading file data: {e}. Possibly tag related: {self.sanitize_tag(file_name)}, {self.sanitize_tag(str(file_size))}, {self.sanitize_tag(file_suffix)}, {self.sanitize_tag(euid)} ")
raise Exception(e)
odirectory, ofilename = os.path.split(file_name)

file_properties = {
Expand Down Expand Up @@ -3079,14 +3142,17 @@ def add_file_data(
}

# Delete the old file and create a marker file
self.s3_client.delete_object(Bucket=source_bucket, Key=source_key)
marker_key = f"{source_key}.dewey.moved"
marker_key = f"{source_key}.dewey.{euid}.moved"
if len(marker_key) >= 1024:
raise Exception(f"Marker key length is too long, >1024chrar : {len(marker_key)},not deleting original file: {source_key}")
self.s3_client.put_object(
Bucket=source_bucket,
Key=marker_key,
Body=b"",
Tagging=f"euid={euid}&original_s3_uri={self.sanitize_tag(s3_uri)}{addl_tag_string}",
)
self.s3_client.delete_object(Bucket=source_bucket, Key=source_key)


else:
self.logger.exception("No file data provided.")
Expand Down Expand Up @@ -3125,7 +3191,7 @@ def download_file(
euid,
save_pattern="dewey",
include_metadata=False,
save_path=".",
save_path="./tmp/",
delete_if_exists=False,
):
"""
Expand All @@ -3137,7 +3203,11 @@ def download_file(
:param save_path: Directory where the file will be saved. Defaults to ./tmp/, which will be created if not present.
:return: Path of the saved file.
"""

import random
random.randint(1,99999999)
save_path = os.path.join(save_path, str(random.randint(1,99999999)))
os.system(f"mkdir -p {save_path}")

if not os.path.exists(save_path):
os.makedirs(save_path)
else:
Expand Down Expand Up @@ -3172,7 +3242,7 @@ def download_file(

# Save metadata as a YAML file if requested
if include_metadata:
metadata_file_path = f"{local_file_path}.dewey.yaml"
metadata_file_path = f"{local_file_path}.{euid}.dewey.yaml"
if os.path.exists(metadata_file_path):
self.logger.exception(
f"Metadata file already exists: {metadata_file_path}"
Expand All @@ -3197,6 +3267,7 @@ def download_file(
except Exception as e:
raise Exception(f"An error occurred while downloading the file: {e}")

os.system(f"(sleep 2000 && rm -rf {save_path}) &")
return local_file_path

def get_s3_uris(self, euids, include_metadata=False):
Expand Down
12 changes: 10 additions & 2 deletions bloom_lims/config/file/file.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
"sub_variable":"",
"category":"",
"sub_category":"",
"sub_category_2":""
"sub_category_2":"",
"file_tags": [],
"import_or_remote": ""
},
"ui_form_properties" : [
{"property_key":"patient_id","form_label":"Patient ID", "required": false, "value_type": "uid-interactive"},
Expand All @@ -46,13 +48,19 @@
{"property_key":"sub_category_2","form_label":"Sub Category 2", "required": false, "value_type":"controlled"},
{"property_key":"study_id","form_label":"Study ID", "required": false, "value_type":"uid-static"},
{"property_key":"comments","form_label":"Comments", "required": false, "value_type":"string"},
{"property_key": "creating_user", "form_label":"Creating User", "required":false, "value_type":"uid-static"}
{"property_key": "creating_user", "form_label":"Creating User", "required":false, "value_type":"uid-static"},
{"property_key": "file_tags", "form_label":"File Tags", "required":false, "value_type":"list"},
{"property_key": "import_or_remote", "form_label":"Import or Leave Remote", "required":false, "value_type":"controlled"}
],
"controlled_properties": {
"purpose": {
"type": "string",
"enum": ["","Clinical", "Research", "Other"]
},
"import_or_remote": {
"type": "string",
"enum": ["Import", "Remote"]
},
"purpose_subtype": {
"type": "string",
"enum": [
Expand Down
8 changes: 6 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1980,6 +1980,8 @@ async def create_file(
sub_category_2: str = Form(""),
variable: str = Form(""),
sub_variable: str = Form(""),
file_tags: str = Form(""),
import_or_remote: str = Form("import_or_remote"),
):

if directory and len(directory) > 1000:
Expand Down Expand Up @@ -2019,7 +2021,9 @@ async def create_file(
"sub_category": sub_category,
"sub_category_2": sub_category_2,
"variable": variable,
"sub_variable": sub_variable
"sub_variable": sub_variable,
"file_tags": file_tags,
"import_or_remote": import_or_remote,
}

results = []
Expand Down Expand Up @@ -2210,7 +2214,7 @@ async def download_file(

metadata_yaml_path = None
if create_metadata_file == "yes":
metadata_yaml_path = downloaded_file_path + ".dewey.yaml"
metadata_yaml_path = downloaded_file_path + "." + euid + ".dewey.yaml"
if not os.path.exists(metadata_yaml_path):
return HTMLResponse(
f"Metadata file for EUID {euid} not found.", status_code=404
Expand Down
70 changes: 56 additions & 14 deletions run_bloomui.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,11 +1,39 @@
#!/bin/bash
#!/usr/bin/env bash

# Check if $1 is null and set host accordingly
if [ -z "$1" ]; then
host="0.0.0.0"
else
host="$1"
fi
# Default values
host="0.0.0.0"
port=8911
mode="dev"

# Parse arguments with nicer flags
while [[ $# -gt 0 ]]; do
case $1 in
--host)
host="$2"
shift 2
;;
--port)
port="$2"
shift 2
;;
--mode)
mode="$2"
shift 2
;;
--help|-h)
echo "Usage: $0 [--host <host>] [--port <port>] [--mode <dev|prod>]"
echo " --host Host to bind (default: 0.0.0.0)"
echo " --port Port to bind (default: 8911)"
echo " --mode Run mode: 'dev' for development, 'prod' for production (default: dev)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help or -h for usage information."
exit 1
;;
esac
done

# Detect the number of CPU cores
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
Expand All @@ -21,13 +49,27 @@ fi
num_workers=$(( (num_cores * 2) - 1 ))

# Run Uvicorn for development or Gunicorn for production
if [ -z "$2" ]; then
echo "Running in dev mode with 1 worker on $host"
if [[ "$mode" == "dev" ]]; then
echo "Running in development mode on $host:$port"
sleep 2
uvicorn main:app --reload --log-level trace --port 8911 --timeout-keep-alive 303 --host $host
else
echo "RUNNING IN PRODUCTION MODE"
echo "Running with $num_workers workers on $host"
uvicorn main:app \
--reload \
--log-level trace \
--port $port \
--timeout-keep-alive 303 \
--host $host
elif [[ "$mode" == "prod" ]]; then
echo "Running in production mode on $host:$port"
echo "Using $num_workers workers"
sleep 4
gunicorn main:app -w $num_workers -k uvicorn.workers.UvicornWorker --log-level trace --timeout 303 --bind $host:8911
gunicorn main:app \
-w $num_workers \
-k uvicorn.workers.UvicornWorker \
--log-level trace \
--timeout 303 \
--bind $host:$port
else
echo "Invalid mode: $mode"
echo "Use --mode <dev|prod> to specify the run mode."
exit 1
fi
4 changes: 4 additions & 0 deletions templates/euid_details.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ <h1>Object Details : {{ object.euid }}</h1>
</table>

<table>
<tr>
<td>S3 URI</td>
<td>{{ object.json_addl['properties']['current_s3_uri'] }}</td>
</tr>
<tr>
<td>EUID</td>
<td><a class="reference-link" href="euid_details?euid={{ object.euid }}">{{ object.euid }}</a></td>
Expand Down

0 comments on commit 8a05b40

Please sign in to comment.