From c3b030566d24eb320d154dac1f8c1f418ad61c95 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Thu, 25 Apr 2024 16:52:03 -0500 Subject: [PATCH 01/11] Update the trait endpoints --- src/modules/site-v2/base/views/data/data.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/modules/site-v2/base/views/data/data.py b/src/modules/site-v2/base/views/data/data.py index f20054e4..2b7253d2 100644 --- a/src/modules/site-v2/base/views/data/data.py +++ b/src/modules/site-v2/base/views/data/data.py @@ -80,7 +80,7 @@ def protocols(): # # Submit Trait # -@data_bp.route('/submit-trait') +@data_bp.route('/trait/start-submit') @jwt_required() def submit_trait_start(): """ Submit Trait start page """ @@ -92,7 +92,7 @@ def submit_trait_start(): # # Submit Trait Form # -@data_bp.route('/submit-trait/new-submission', methods=['GET', 'POST']) +@data_bp.route('/trait/create', methods=['GET', 'POST']) @jwt_required() def submit_trait_form(): """ Trait Submission Form """ @@ -230,7 +230,7 @@ def submit_trait_form(): # # File Upload # -@data_bp.route('/submit-trait/parse-file', methods=['POST']) +@data_bp.route('/trait/parse-file', methods=['POST']) @jwt_required() def parse_trait_file(): """ Parse the trait file and return the data """ @@ -259,3 +259,5 @@ def parse_trait_file(): except Exception as ex: logger.error(f'Failed to parse the file: {ex}') return jsonify({ 'message': 'Failed to parse the file. Please try again later.' }), 500 + + From 2e21cc919bfb4210ba96e2b1e215fba0508dce51 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Tue, 30 Apr 2024 11:30:20 -0500 Subject: [PATCH 02/11] Modularize trait submission pipeline --- src/modules/site-v2/base/utils/trait.py | 121 +++++++++++++++++ src/modules/site-v2/base/views/data/data.py | 140 +++----------------- 2 files changed, 139 insertions(+), 122 deletions(-) create mode 100644 src/modules/site-v2/base/utils/trait.py diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py new file mode 100644 index 00000000..fcf3c6b8 --- /dev/null +++ b/src/modules/site-v2/base/utils/trait.py @@ -0,0 +1,121 @@ +import bleach +import csv + +from caendr.models.error import FileUploadError +from caendr.models.datastore import TraitFile, Species +from caendr.models.status import PublishStatus +from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase +from caendr.services.cloud.storage import upload_blob_from_file_object, check_blob_exists +from caendr.services.cloud.postgresql import rollback_on_error_handler +from caendr.services.logger import logger +from caendr.services.validate import validate_file, StrainValidator, NumberValidator +from caendr.utils.data import unique_id +from caendr.utils.env import get_env_var +from caendr.utils.local_files import LocalUploadFile +from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS + +MODULE_DB_OPERATIONS_BUCKET_NAME = get_env_var('MODULE_DB_OPERATIONS_BUCKET_NAME') +MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH = get_env_var('MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH') + + +def add_trait(form_data, user): + """ + Add a trait to the database by permoforming the following operations: + 1. Create a new TraitFile object with the user submitted data and save it to Datastore. + 2. Seed the trait data to Phenotype Metadata SQL table. + 3. Save the file to GCP bucket. + 4. Parse and seed the file data to Phenotype Database SQL table. + On the failure of any of the above operations rolls back to the initial state and return an error message. + """ + try: + # Create a new TraitFile oject + tf = TraitFile(unique_id()) + + # Create a unique filename for file uload + hashed_filename = f'{unique_id()}.tsv' + tf.set_properties(**{ + # User submitted data + 'trait_name_user': bleach.clean(form_data.trait_name_user.data), + 'trait_name_display_1': bleach.clean(form_data.trait_name_display_1.data), + 'trait_name_display_2': bleach.clean(form_data.trait_name_display_2.data), + 'trait_name_display_3': bleach.clean(form_data.trait_name_display_2.data), + 'filename': bleach.clean(form_data.file.data.filename), + 'species': bleach.clean(form_data.species.data), + 'description_short': bleach.clean(form_data.description_short.data), + 'description_long': bleach.clean(form_data.description_long.data), + 'units': bleach.clean(form_data.units.data), + 'tags': [ bleach.clean(tag) for tag in form_data.tags.data ], + 'institution': bleach.clean(form_data.institution.data), + 'source_lab': bleach.clean(form_data.source_lab.data), + 'protocols': bleach.clean(form_data.protocols.data), + 'publication': bleach.clean(form_data.publication.data), + + # Internally used data + 'dataset': 'public', + 'publish_status': PublishStatus.UPLOADED, + 'is_bulk_file': False, + 'hashed_filename': hashed_filename, + }) + + tf.set_user(user) + + # Save te TraitFile object to Datastore + tf.save() + except Exception as ex: + logger.error(f'Failed to create a trait file {form_data.trait_name_user.data}: {ex}') + return {'message': 'Failed to submit a form. Please try again later.'}, 500 + + # Seed to Phenotype Metadata SQL table + with rollback_on_error_handler(): + new_trait = PhenotypeMetadata() + new_trait.add_trait(tf) + + # Save file to GCP bucket + species_name = Species.get(form_data.species.data).name + blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{hashed_filename}' + + # Check if the file already exists + if check_blob_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name): + return {'message': 'File already exists.'}, 400 + else: + upload_blob_from_file_object(MODULE_DB_OPERATIONS_BUCKET_NAME, form_data.file.data, blob_name) + + # Reset the file pointer + form_data.file.data.seek(0) + + try: + # Seed the file data to Phenotype Database SQL table + with LocalUploadFile(form_data.file.data, valid_file_extensions=TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS) as file: + # Validate the file + try: + validate_file(file, [ + StrainValidator( 'strain', species=tf['species'], force_unique=True, force_unique_msgs={} ), + NumberValidator( None, accept_float=True, accept_na=True ), + ]) + except Exception as ex: + return {'message': f'Failed to validate the file: {ex.msg}'}, 400 + + # Parse the trait file + trait_data_list = [] + with open(file) as f: + for idx, row in enumerate( csv.reader(f, delimiter='\t') ): + if idx == 0: + continue + else: + trait_data = { + 'trait_name': tf['trait_name_user'], + 'strain_name': row[0], + 'trait_value': row[1], + 'metadata_id': tf.name + } + trait_data_list.append(trait_data) + + with rollback_on_error_handler(): + trait_data = PhenotypeDatabase() + trait_data.add_trait_data(trait_data_list) + + except FileUploadError as ex: + logger.error(f'Failed to upload a file {form_data.file.data.filename}: {ex}') + return {'message': 'Failed to submit a form. Please try again later.'}, 500 + + return {'message': 'Trait submitted successfully.'}, 200 \ No newline at end of file diff --git a/src/modules/site-v2/base/views/data/data.py b/src/modules/site-v2/base/views/data/data.py index 2b7253d2..25e253e9 100644 --- a/src/modules/site-v2/base/views/data/data.py +++ b/src/modules/site-v2/base/views/data/data.py @@ -1,26 +1,21 @@ import yaml -import bleach import csv -from flask import render_template, Blueprint, redirect, url_for, request, flash, jsonify, abort +from flask import render_template, Blueprint, redirect, url_for, request, flash, jsonify from extensions import cache from config import config -from caendr.models.error import EnvVarError, FileUploadError -from caendr.models.datastore import TraitFile, Species -from caendr.models.status import PublishStatus -from caendr.services.cloud.storage import get_blob, upload_blob_from_file_object, check_blob_exists -from caendr.services.logger import logger -from caendr.services.validate import validate_file, StrainValidator, NumberValidator -from caendr.utils.data import unique_id -from base.utils.auth import jwt_required, get_current_user -from caendr.utils.env import get_env_var -from caendr.utils.local_files import LocalUploadFile -from base.forms import TraitSubmissionForm -from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS -from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase - - +from caendr.models.error import EnvVarError, FileUploadError +from caendr.models.datastore import Species +from caendr.services.cloud.storage import get_blob +from caendr.services.logger import logger +from caendr.services.validate import validate_file, StrainValidator, NumberValidator +from caendr.utils.env import get_env_var +from caendr.utils.local_files import LocalUploadFile +from base.utils.auth import jwt_required, get_current_user +from base.utils.trait import add_trait +from base.forms import TraitSubmissionForm +from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS @@ -112,111 +107,13 @@ def submit_trait_form(): flash('Please fill out all required fields.', 'warning') else: - try: - # Create a new TraitFile object - tf = TraitFile(unique_id()) - - # Create a unique filename for file upload - hashed_filename = f'{unique_id()}.tsv' - - tf.set_properties(**{ - # User submitted data - 'trait_name_user': bleach.clean(form.trait_name_user.data), - 'trait_name_display_1': bleach.clean(form.trait_name_display_1.data), - 'trait_name_display_2': bleach.clean(form.trait_name_display_2.data), - 'trait_name_display_3': bleach.clean(form.trait_name_display_2.data), - 'filename': bleach.clean(form.file.data.filename), - 'species': bleach.clean(form.species.data), - 'description_short': bleach.clean(form.description_short.data), - 'description_long': bleach.clean(form.description_long.data), - 'units': bleach.clean(form.units.data), - 'tags': [ bleach.clean(tag) for tag in form.tags.data ], - 'institution': bleach.clean(form.institution.data), - 'source_lab': bleach.clean(form.source_lab.data), - 'protocols': bleach.clean(form.protocols.data), - 'publication': bleach.clean(form.publication.data), - - # Internally used data - 'dataset': 'public', - 'publish_status': PublishStatus.UPLOADED, - 'is_bulk_file': False, - 'hashed_filename': hashed_filename, - }) - - tf.set_user(user) - - # Save the TraitFile object to Datastore - tf.save() - - # Seed to Phenotype Metadata SQL table - new_trait = PhenotypeMetadata() - new_trait.add_trait(tf) - - except Exception as ex: - logger.error(f'Failed to create a trait file {form.trait_name_user.data}: {ex}') - flash('Failed to submit a form. Please try again later.', 'danger') - abort(500) - - # Save file to GCP bucket - species_name = Species.get(form.species.data).name - blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{hashed_filename}' - - # Check if the file already exists - if check_blob_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name): - flash('File already exists.', 'danger') + # Add the trait to the database + resp, code = add_trait(form, user) + if code != 200: + flash(resp['message'], 'danger') else: - upload_blob_from_file_object(MODULE_DB_OPERATIONS_BUCKET_NAME, form.file.data, blob_name) - - # Reset the file pointer - form.file.data.seek(0) - - try: - # Seed the file data to Phenotype Database SQL table - with LocalUploadFile(form.file.data, valid_file_extensions=TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS) as file: - - # Validate the file - try: - validate_file(file, [ - StrainValidator( 'strain', species=tf['species'], force_unique=True, force_unique_msgs={} ), - NumberValidator( None, accept_float=True, accept_na=True ), - ]) - except Exception as ex: - flash(f'Failed to validate the file: {ex.msg}', 'danger') - return render_template('data/submit-trait-form.html', **{ - # Page Info - 'title': 'Phenotype Database Trait Submission', - 'tool_alt_parent_breadcrumb': {"title": "Submit Trait", "url": url_for('data.submit_trait_start')}, - - # Data - 'form': form, - }) - - # Parse the trait file - trait_data_list = [] - with open(file) as f: - for idx, row in enumerate( csv.reader(f, delimiter='\t') ): - if idx == 0: - continue - else: - trait_data = { - 'trait_name': tf['trait_name_user'], - 'strain_name': row[0], - 'trait_value': row[1], - 'metadata_id': tf.name - } - trait_data_list.append(trait_data) - - trait_data = PhenotypeDatabase() - trait_data.add_trait_data(trait_data_list) - - except FileUploadError as ex: - logger.error(f'Failed to upload a file {form.file.data.filename}: {ex}') - flash('Failed to submit a form. Please try again later.', 'danger') - abort(500) - - flash('Trait submitted successfully.', 'success') - # TODO: change the redirect to MTL - return redirect(url_for('data.submit_trait_start')) + flash('Trait submitted successfully.', 'success') + return redirect(url_for('data.submit_trait_start')) return render_template('data/submit-trait-form.html', **{ # Page Info @@ -260,4 +157,3 @@ def parse_trait_file(): logger.error(f'Failed to parse the file: {ex}') return jsonify({ 'message': 'Failed to parse the file. Please try again later.' }), 500 - From 4bd0126bca79574137cdf6b940ed9eab8ea514c9 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 13:46:37 -0500 Subject: [PATCH 03/11] Function for deleting entries from the PhenotypeDatabase --- src/pkg/caendr/caendr/models/sql/phenotype.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/pkg/caendr/caendr/models/sql/phenotype.py b/src/pkg/caendr/caendr/models/sql/phenotype.py index e0e0053b..7dd2fdb6 100644 --- a/src/pkg/caendr/caendr/models/sql/phenotype.py +++ b/src/pkg/caendr/caendr/models/sql/phenotype.py @@ -1,4 +1,4 @@ -from caendr.services.cloud.postgresql import db +from caendr.services.cloud.postgresql import db, rollback_on_error from caendr.models.sql.dict_serializable import DictSerializable class PhenotypeDatabase(DictSerializable, db.Model): @@ -21,3 +21,14 @@ def add_trait_data(self, trait_data): """ db.session.bulk_insert_mappings(PhenotypeDatabase, trait_data) db.session.commit() + + @classmethod + @rollback_on_error + def delete_by_metadata_id(cls, metadata_id): + """ + Deletes entries from the Phenotype Database table for the given trait + """ + + del_statement = PhenotypeDatabase.__table__.delete().where(PhenotypeDatabase.metadata_id == metadata_id) + db.session.execute(del_statement) + db.session.commit() \ No newline at end of file From 3af7525cc6270abfd7fd8e04ba3245e85d490197 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 13:48:49 -0500 Subject: [PATCH 04/11] Function for deleting an entry from PhenotypeMetadata --- src/pkg/caendr/caendr/models/sql/phenotype_metadata.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py b/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py index abe5029b..db54f5f6 100644 --- a/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py +++ b/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py @@ -50,7 +50,7 @@ def to_json_with_values(self): return json_trait - def add_trait(self, trait_obj): + def add(self, trait_obj): new_trait = PhenotypeMetadata( id = trait_obj.name, trait_name_user = trait_obj['trait_name_user'], @@ -74,4 +74,8 @@ def add_trait(self, trait_obj): is_bulk_file = trait_obj['is_bulk_file'] ) db.session.add(new_trait) + db.session.commit() + + def delete(self): + db.session.delete(self) db.session.commit() \ No newline at end of file From b96595d18c0d3fd589ea8793398ff122082faad0 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 13:50:01 -0500 Subject: [PATCH 05/11] Adding a rollback on trait submission error --- src/modules/site-v2/base/utils/trait.py | 69 +++++++++++++++++---- src/modules/site-v2/base/views/data/data.py | 28 ++++----- 2 files changed, 69 insertions(+), 28 deletions(-) diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py index fcf3c6b8..d83ffadf 100644 --- a/src/modules/site-v2/base/utils/trait.py +++ b/src/modules/site-v2/base/utils/trait.py @@ -5,10 +5,12 @@ from caendr.models.datastore import TraitFile, Species from caendr.models.status import PublishStatus from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase -from caendr.services.cloud.storage import upload_blob_from_file_object, check_blob_exists +from caendr.services.cloud.storage import upload_blob_from_file_object, check_blob_exists, get_blob_if_exists from caendr.services.cloud.postgresql import rollback_on_error_handler from caendr.services.logger import logger from caendr.services.validate import validate_file, StrainValidator, NumberValidator +from caendr.services.cloud.datastore import delete_ds_entity_by_ref +from caendr.api.phenotype import get_trait from caendr.utils.data import unique_id from caendr.utils.env import get_env_var from caendr.utils.local_files import LocalUploadFile @@ -25,7 +27,7 @@ def add_trait(form_data, user): 2. Seed the trait data to Phenotype Metadata SQL table. 3. Save the file to GCP bucket. 4. Parse and seed the file data to Phenotype Database SQL table. - On the failure of any of the above operations rolls back to the initial state and return an error message. + On the failure of any of the above operations rolls back to the initial state and returns an error message. """ try: # Create a new TraitFile oject @@ -64,11 +66,16 @@ def add_trait(form_data, user): except Exception as ex: logger.error(f'Failed to create a trait file {form_data.trait_name_user.data}: {ex}') return {'message': 'Failed to submit a form. Please try again later.'}, 500 - - # Seed to Phenotype Metadata SQL table - with rollback_on_error_handler(): - new_trait = PhenotypeMetadata() - new_trait.add_trait(tf) + + try: + # Seed to Phenotype Metadata SQL table + with rollback_on_error_handler(): + new_trait = PhenotypeMetadata() + new_trait.add(tf) + except Exception as ex: + rollback_submission_on_error(tf.name) + logger.error(f'Failed to seed the trait to the database: {ex}') + return {'message': 'Failed to submit a form. Please try again later.'}, 500 # Save file to GCP bucket species_name = Species.get(form_data.species.data).name @@ -93,6 +100,8 @@ def add_trait(form_data, user): NumberValidator( None, accept_float=True, accept_na=True ), ]) except Exception as ex: + rollback_submission_on_error(tf.name, blob_name) + logger.error(f'Failed to validate the file: {ex.msg}') return {'message': f'Failed to validate the file: {ex.msg}'}, 400 # Parse the trait file @@ -109,13 +118,49 @@ def add_trait(form_data, user): 'metadata_id': tf.name } trait_data_list.append(trait_data) - - with rollback_on_error_handler(): - trait_data = PhenotypeDatabase() - trait_data.add_trait_data(trait_data_list) + try: + with rollback_on_error_handler(): + trait_data = PhenotypeDatabase() + trait_data.add_trait_data('hello') + except Exception as ex: + rollback_submission_on_error(tf.name, blob_name) + logger.error(f'Failed to seed the file data to the database: {ex}') except FileUploadError as ex: + rollback_submission_on_error(tf.name, blob_name) logger.error(f'Failed to upload a file {form_data.file.data.filename}: {ex}') return {'message': 'Failed to submit a form. Please try again later.'}, 500 - return {'message': 'Trait submitted successfully.'}, 200 \ No newline at end of file + return {'message': 'Trait submitted successfully.'}, 200 + + +def rollback_submission_on_error(trait_id, blob_name=None): + """ + Rollback the trait submission on error by performing the following operations: + 1. Delete the TraitFile object from Datastore. + 2. Delete the trait from Phenotype Metadata SQL table. + 3. Delete the file from GCP bucket. + 4. Delete the file data from Phenotype Database SQL table. + """ + tf = TraitFile.get_ds(trait_id) + if tf is None: + logger.error(f'Failed to retrieve the trait file {trait_id}') + return + + # Delete the previousely created TraitFile object + delete_ds_entity_by_ref(TraitFile.kind, tf.name) + + # Delete a new trait from Phenotype Metadata SQL table + trait = get_trait(tf.name) + if trait: + trait.delete() + + # Delete the file from GCP bucket + if blob_name: + blob = get_blob_if_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name) + if blob: + blob.delete() + + # Delete the file data from Phenotype Database SQL table + PhenotypeDatabase.delete_by_metadata_id(tf.name) + diff --git a/src/modules/site-v2/base/views/data/data.py b/src/modules/site-v2/base/views/data/data.py index 25e253e9..ec947aee 100644 --- a/src/modules/site-v2/base/views/data/data.py +++ b/src/modules/site-v2/base/views/data/data.py @@ -5,22 +5,18 @@ from extensions import cache from config import config -from caendr.models.error import EnvVarError, FileUploadError -from caendr.models.datastore import Species -from caendr.services.cloud.storage import get_blob -from caendr.services.logger import logger -from caendr.services.validate import validate_file, StrainValidator, NumberValidator -from caendr.utils.env import get_env_var -from caendr.utils.local_files import LocalUploadFile -from base.utils.auth import jwt_required, get_current_user -from base.utils.trait import add_trait -from base.forms import TraitSubmissionForm -from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS - - - -MODULE_DB_OPERATIONS_BUCKET_NAME = get_env_var('MODULE_DB_OPERATIONS_BUCKET_NAME') -MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH = get_env_var('MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH') +from caendr.models.error import EnvVarError, FileUploadError +from caendr.models.datastore import Species +from caendr.services.cloud.storage import get_blob +from caendr.services.logger import logger +from caendr.services.validate import validate_file, StrainValidator, NumberValidator +from caendr.utils.local_files import LocalUploadFile +from base.utils.auth import jwt_required, get_current_user +from base.utils.trait import add_trait +from base.forms import TraitSubmissionForm +from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS + + data_bp = Blueprint( 'data', __name__, template_folder='templates' From ac20fa11075cd7f4005ab6c76a46a3f68ad0bb5b Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 13:53:11 -0500 Subject: [PATCH 06/11] Removing testing vars --- src/modules/site-v2/base/utils/trait.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py index d83ffadf..8eda8eff 100644 --- a/src/modules/site-v2/base/utils/trait.py +++ b/src/modules/site-v2/base/utils/trait.py @@ -121,7 +121,7 @@ def add_trait(form_data, user): try: with rollback_on_error_handler(): trait_data = PhenotypeDatabase() - trait_data.add_trait_data('hello') + trait_data.add_trait_data(trait_data_list) except Exception as ex: rollback_submission_on_error(tf.name, blob_name) logger.error(f'Failed to seed the file data to the database: {ex}') From cfcc1cb3f94f015cf32a978d11ee26caa2409f32 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 13:59:15 -0500 Subject: [PATCH 07/11] Changing 'hashed_filename' -> 'filename_hash' --- src/modules/site-v2/base/utils/trait.py | 12 ++++++------ .../models/datastore/file_record_entity.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py index 8eda8eff..1dd6698a 100644 --- a/src/modules/site-v2/base/utils/trait.py +++ b/src/modules/site-v2/base/utils/trait.py @@ -34,7 +34,7 @@ def add_trait(form_data, user): tf = TraitFile(unique_id()) # Create a unique filename for file uload - hashed_filename = f'{unique_id()}.tsv' + filename_hash = f'{unique_id()}.tsv' tf.set_properties(**{ # User submitted data 'trait_name_user': bleach.clean(form_data.trait_name_user.data), @@ -53,10 +53,10 @@ def add_trait(form_data, user): 'publication': bleach.clean(form_data.publication.data), # Internally used data - 'dataset': 'public', - 'publish_status': PublishStatus.UPLOADED, - 'is_bulk_file': False, - 'hashed_filename': hashed_filename, + 'dataset': 'public', + 'publish_status': PublishStatus.UPLOADED, + 'is_bulk_file': False, + 'filename_hash': filename_hash, }) tf.set_user(user) @@ -79,7 +79,7 @@ def add_trait(form_data, user): # Save file to GCP bucket species_name = Species.get(form_data.species.data).name - blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{hashed_filename}' + blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{filename_hash}' # Check if the file already exists if check_blob_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name): diff --git a/src/pkg/caendr/caendr/models/datastore/file_record_entity.py b/src/pkg/caendr/caendr/models/datastore/file_record_entity.py index b08cda23..e6c37dcd 100644 --- a/src/pkg/caendr/caendr/models/datastore/file_record_entity.py +++ b/src/pkg/caendr/caendr/models/datastore/file_record_entity.py @@ -24,7 +24,7 @@ def get_props_set(cls): return { *super().get_props_set(), 'filename', - 'hashed_filename', + 'filename_hash', } @@ -82,24 +82,24 @@ def filename(self, v): return self._set_raw_prop('filename', v) @property - def hashed_filename(self) -> TokenizedString: + def filename_hash(self) -> TokenizedString: ''' The hashed name of the file. Returns as a `TokenizedString`. ''' - if self._get_raw_prop('hashed_filename') is None: + if self._get_raw_prop('filename_hash') is None: return None - return TokenizedString( self._get_raw_prop('hashed_filename') ) + return TokenizedString( self._get_raw_prop('filename_hash') ) - @hashed_filename.setter - def hashed_filename(self, v): + @filename_hash.setter + def filename_hash(self, v): ''' Save the hashed name of the file itself. Saves internally as a raw string. ''' if isinstance(v, TokenizedString): v = v.raw_string if not (isinstance(v, str) or v is None): - raise ValueError(f'Cannot set prop "hashed_filename" to "{v}" (type {type(v)}): must be a string') - return self._set_raw_prop('hashed_filename', v) + raise ValueError(f'Cannot set prop "filename_hash" to "{v}" (type {type(v)}): must be a string') + return self._set_raw_prop('filename_hash', v) # @@ -121,7 +121,7 @@ def get_filepath_hashed(self, schema: BlobURISchema = None, check_if_exists: boo ''' if check_if_exists and not self.check_exists(**kwargs): return None - return generate_blob_uri( self.bucket, self.prefix.get_string(**kwargs), self['hashed_filename'].get_string(**kwargs), schema=schema ) + return generate_blob_uri( self.bucket, self.prefix.get_string(**kwargs), self['filename_hash'].get_string(**kwargs), schema=schema ) def get_filepath_template(self, schema: BlobURISchema = None) -> TokenizedString: From 4ef3096c658aadbd507cd252de95181cc723305d Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Wed, 1 May 2024 14:04:04 -0500 Subject: [PATCH 08/11] Adding general Exception for the file upload --- src/modules/site-v2/base/utils/trait.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py index 1dd6698a..e6c3ef10 100644 --- a/src/modules/site-v2/base/utils/trait.py +++ b/src/modules/site-v2/base/utils/trait.py @@ -131,6 +131,11 @@ def add_trait(form_data, user): logger.error(f'Failed to upload a file {form_data.file.data.filename}: {ex}') return {'message': 'Failed to submit a form. Please try again later.'}, 500 + except Exception as ex: + rollback_submission_on_error(tf.name, blob_name) + logger.error(f'Failed to upload the file data to the database: {ex}') + return {'message': 'Failed to submit a form. Please try again later.'}, 500 + return {'message': 'Trait submitted successfully.'}, 200 From d94bf6173d7cfda13e8224c7badddbdc7ae7ff9b Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Mon, 6 May 2024 15:15:08 -0500 Subject: [PATCH 09/11] Refactor trait file dataset handling and add DatasetType enum --- src/modules/site-v2/base/utils/trait.py | 4 +-- .../caendr/models/datastore/__init__.py | 1 + .../caendr/models/datastore/trait_file.py | 25 +++++++++++++++---- .../caendr/models/sql/phenotype_metadata.py | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/modules/site-v2/base/utils/trait.py b/src/modules/site-v2/base/utils/trait.py index e6c3ef10..1375c773 100644 --- a/src/modules/site-v2/base/utils/trait.py +++ b/src/modules/site-v2/base/utils/trait.py @@ -2,7 +2,7 @@ import csv from caendr.models.error import FileUploadError -from caendr.models.datastore import TraitFile, Species +from caendr.models.datastore import TraitFile, Species, DatasetType from caendr.models.status import PublishStatus from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase from caendr.services.cloud.storage import upload_blob_from_file_object, check_blob_exists, get_blob_if_exists @@ -53,7 +53,7 @@ def add_trait(form_data, user): 'publication': bleach.clean(form_data.publication.data), # Internally used data - 'dataset': 'public', + 'dataset': DatasetType.PUBLIC, 'publish_status': PublishStatus.UPLOADED, 'is_bulk_file': False, 'filename_hash': filename_hash, diff --git a/src/pkg/caendr/caendr/models/datastore/__init__.py b/src/pkg/caendr/caendr/models/datastore/__init__.py index 3461d2f7..aebc57a4 100644 --- a/src/pkg/caendr/caendr/models/datastore/__init__.py +++ b/src/pkg/caendr/caendr/models/datastore/__init__.py @@ -22,6 +22,7 @@ from .browser_track import BrowserTrackDefault # Subclasses FileRecordEntity (from BrowserTrack) from .browser_track import BrowserTrackTemplate # Subclasses FileRecordEntity (from BrowserTrack) from .trait_file import TraitFile # Subclasses FileRecordEntity, PublishableEntity, SpeciesEntity, UserOwnedEntity +from .trait_file import DatasetType # Job template classes from .job_entity import JobEntity # Subclasses StatusEntity; imports Container diff --git a/src/pkg/caendr/caendr/models/datastore/trait_file.py b/src/pkg/caendr/caendr/models/datastore/trait_file.py index 477d61a5..f268a37a 100644 --- a/src/pkg/caendr/caendr/models/datastore/trait_file.py +++ b/src/pkg/caendr/caendr/models/datastore/trait_file.py @@ -1,4 +1,5 @@ from typing import Tuple, Optional +from enum import Enum from caendr.utils.env import get_env_var @@ -10,12 +11,16 @@ DB_BUCKET_NAME = get_env_var('MODULE_DB_OPERATIONS_BUCKET_NAME') +class DatasetType(Enum): + """ Identifier for trait files folder in GCP Buckets """ + CAENDR = 'caendr' + PUBLIC = 'public' + ZHANG = 'zhang' class TraitFile(FileRecordEntity, PublishableEntity, SpeciesEntity, UserOwnedEntity): kind = 'trait_file' - # # Properties # @@ -74,9 +79,9 @@ def bucket(self): @property def prefix(self): - if self.dataset == 'public': - return TokenizedString(join_path('trait_files', self['dataset'], '${SPECIES}', '${USER_ID}')) - return TokenizedString(join_path('trait_files', self['dataset'], '${SPECIES}')) + if self.dataset == DatasetType.PUBLIC: + return TokenizedString(join_path('trait_files', self['dataset'].value, '${SPECIES}', '${USER_ID}')) + return TokenizedString(join_path('trait_files', self['dataset'].value, '${SPECIES}')) # @@ -85,7 +90,7 @@ def prefix(self): # The species is always determined by this entity itself, so we fill it in instead of letting the calling function supply it def get_filepath(self, schema: BlobURISchema = None, check_if_exists: bool = False): - if self.dataset == 'public': + if self.dataset == DatasetType.PUBLIC: return super().get_filepath_hashed(schema=schema, check_if_exists=check_if_exists, SPECIES=self['species'].name, USER_ID=self['username']) return super().get_filepath(schema=schema, check_if_exists=check_if_exists, SPECIES=self['species'].name) @@ -111,3 +116,13 @@ def display_name(self) -> Tuple[str, Optional[str], Optional[str]]: Combines `trait_name_display_1`, `trait_name_display_2`, and `trait_name_display_3` into a single tuple. ''' return self['trait_name_display_1'], self['trait_name_display_2'], self['trait_name_display_3'] + + @property + def dataset(self): + return self._get_enum_prop(DatasetType, 'dataset', None) + + @dataset.setter + def dataset(self, val): + if isinstance(val, str): + val = val.upper() + return self._set_enum_prop(DatasetType, 'dataset', val) \ No newline at end of file diff --git a/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py b/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py index db54f5f6..ead86917 100644 --- a/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py +++ b/src/pkg/caendr/caendr/models/sql/phenotype_metadata.py @@ -70,7 +70,7 @@ def add(self, trait_obj): tags = ', '.join(trait_obj['tags']), created_on = datetime.now(timezone.utc), modified_on = datetime.now(timezone.utc), - dataset = trait_obj['dataset'], + dataset = trait_obj['dataset'].value, is_bulk_file = trait_obj['is_bulk_file'] ) db.session.add(new_trait) From 640f8f002e1e2a30d087d72f8b3e9f55d1c1d050 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Mon, 6 May 2024 15:26:02 -0500 Subject: [PATCH 10/11] Decouple file upload handling --- src/modules/site-v2/templates/data/submit-trait-form.html | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/modules/site-v2/templates/data/submit-trait-form.html b/src/modules/site-v2/templates/data/submit-trait-form.html index 03a6d04c..cb8556c9 100644 --- a/src/modules/site-v2/templates/data/submit-trait-form.html +++ b/src/modules/site-v2/templates/data/submit-trait-form.html @@ -124,6 +124,10 @@

Trait Information

// Upload the file and display the content $('#file-upload').on('click', function(e) { e.preventDefault() + handleFileUpload() + }) + + function handleFileUpload() { const file = $('#file')[0].files[0] const species = $('#speciesSelect').val() if (file && species) { @@ -153,8 +157,8 @@

Trait Information

} } }) - } - }) + } + } function display_file_content(file) { $('#file-content').parent().removeClass('d-none') From 15798ce7eb7b7ad089651dc5731f1e0e03546727 Mon Sep 17 00:00:00 2001 From: Orzu Tursunova Date: Mon, 6 May 2024 15:34:16 -0500 Subject: [PATCH 11/11] Change checking title names to 'request.blueprint' in the breadcrumbs --- src/modules/site-v2/templates/_includes/breadcrumb.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/site-v2/templates/_includes/breadcrumb.html b/src/modules/site-v2/templates/_includes/breadcrumb.html index 4f79e6e1..619e10a3 100644 --- a/src/modules/site-v2/templates/_includes/breadcrumb.html +++ b/src/modules/site-v2/templates/_includes/breadcrumb.html @@ -11,7 +11,7 @@ {% elif alt_parent_breadcrumb %} {% elif tool_alt_parent_breadcrumb %} - {% if tool_alt_parent_breadcrumb["title"] == 'Strain Catalog' or tool_alt_parent_breadcrumb["title"] == 'Submit Trait'%} + {% if request.blueprint == 'request_strains' or request.blueprint == 'data'%} {% endif %}