Skip to content

Commit

Permalink
Merge pull request #481 from AndersenLab/feature/submit-trait
Browse files Browse the repository at this point in the history
[Trait Submission Form] Changes for the previous PR
  • Loading branch information
r-vieira authored May 9, 2024
2 parents 643d52d + 15798ce commit 5507b0d
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 141 deletions.
171 changes: 171 additions & 0 deletions src/modules/site-v2/base/utils/trait.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import bleach
import csv

from caendr.models.error import FileUploadError
from caendr.models.datastore import TraitFile, Species, DatasetType
from caendr.models.status import PublishStatus
from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase
from caendr.services.cloud.storage import upload_blob_from_file_object, check_blob_exists, get_blob_if_exists
from caendr.services.cloud.postgresql import rollback_on_error_handler
from caendr.services.logger import logger
from caendr.services.validate import validate_file, StrainValidator, NumberValidator
from caendr.services.cloud.datastore import delete_ds_entity_by_ref
from caendr.api.phenotype import get_trait
from caendr.utils.data import unique_id
from caendr.utils.env import get_env_var
from caendr.utils.local_files import LocalUploadFile
from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS

MODULE_DB_OPERATIONS_BUCKET_NAME = get_env_var('MODULE_DB_OPERATIONS_BUCKET_NAME')
MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH = get_env_var('MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH')


def add_trait(form_data, user):
"""
Add a trait to the database by permoforming the following operations:
1. Create a new TraitFile object with the user submitted data and save it to Datastore.
2. Seed the trait data to Phenotype Metadata SQL table.
3. Save the file to GCP bucket.
4. Parse and seed the file data to Phenotype Database SQL table.
On the failure of any of the above operations rolls back to the initial state and returns an error message.
"""
try:
# Create a new TraitFile oject
tf = TraitFile(unique_id())

# Create a unique filename for file uload
filename_hash = f'{unique_id()}.tsv'
tf.set_properties(**{
# User submitted data
'trait_name_user': bleach.clean(form_data.trait_name_user.data),
'trait_name_display_1': bleach.clean(form_data.trait_name_display_1.data),
'trait_name_display_2': bleach.clean(form_data.trait_name_display_2.data),
'trait_name_display_3': bleach.clean(form_data.trait_name_display_2.data),
'filename': bleach.clean(form_data.file.data.filename),
'species': bleach.clean(form_data.species.data),
'description_short': bleach.clean(form_data.description_short.data),
'description_long': bleach.clean(form_data.description_long.data),
'units': bleach.clean(form_data.units.data),
'tags': [ bleach.clean(tag) for tag in form_data.tags.data ],
'institution': bleach.clean(form_data.institution.data),
'source_lab': bleach.clean(form_data.source_lab.data),
'protocols': bleach.clean(form_data.protocols.data),
'publication': bleach.clean(form_data.publication.data),

# Internally used data
'dataset': DatasetType.PUBLIC,
'publish_status': PublishStatus.UPLOADED,
'is_bulk_file': False,
'filename_hash': filename_hash,
})

tf.set_user(user)

# Save te TraitFile object to Datastore
tf.save()
except Exception as ex:
logger.error(f'Failed to create a trait file {form_data.trait_name_user.data}: {ex}')
return {'message': 'Failed to submit a form. Please try again later.'}, 500

try:
# Seed to Phenotype Metadata SQL table
with rollback_on_error_handler():
new_trait = PhenotypeMetadata()
new_trait.add(tf)
except Exception as ex:
rollback_submission_on_error(tf.name)
logger.error(f'Failed to seed the trait to the database: {ex}')
return {'message': 'Failed to submit a form. Please try again later.'}, 500

# Save file to GCP bucket
species_name = Species.get(form_data.species.data).name
blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{filename_hash}'

# Check if the file already exists
if check_blob_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name):
return {'message': 'File already exists.'}, 400
else:
upload_blob_from_file_object(MODULE_DB_OPERATIONS_BUCKET_NAME, form_data.file.data, blob_name)

# Reset the file pointer
form_data.file.data.seek(0)

try:
# Seed the file data to Phenotype Database SQL table
with LocalUploadFile(form_data.file.data, valid_file_extensions=TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS) as file:
# Validate the file
try:
validate_file(file, [
StrainValidator( 'strain', species=tf['species'], force_unique=True, force_unique_msgs={} ),
NumberValidator( None, accept_float=True, accept_na=True ),
])
except Exception as ex:
rollback_submission_on_error(tf.name, blob_name)
logger.error(f'Failed to validate the file: {ex.msg}')
return {'message': f'Failed to validate the file: {ex.msg}'}, 400

# Parse the trait file
trait_data_list = []
with open(file) as f:
for idx, row in enumerate( csv.reader(f, delimiter='\t') ):
if idx == 0:
continue
else:
trait_data = {
'trait_name': tf['trait_name_user'],
'strain_name': row[0],
'trait_value': row[1],
'metadata_id': tf.name
}
trait_data_list.append(trait_data)
try:
with rollback_on_error_handler():
trait_data = PhenotypeDatabase()
trait_data.add_trait_data(trait_data_list)
except Exception as ex:
rollback_submission_on_error(tf.name, blob_name)
logger.error(f'Failed to seed the file data to the database: {ex}')

except FileUploadError as ex:
rollback_submission_on_error(tf.name, blob_name)
logger.error(f'Failed to upload a file {form_data.file.data.filename}: {ex}')
return {'message': 'Failed to submit a form. Please try again later.'}, 500

except Exception as ex:
rollback_submission_on_error(tf.name, blob_name)
logger.error(f'Failed to upload the file data to the database: {ex}')
return {'message': 'Failed to submit a form. Please try again later.'}, 500

return {'message': 'Trait submitted successfully.'}, 200


def rollback_submission_on_error(trait_id, blob_name=None):
"""
Rollback the trait submission on error by performing the following operations:
1. Delete the TraitFile object from Datastore.
2. Delete the trait from Phenotype Metadata SQL table.
3. Delete the file from GCP bucket.
4. Delete the file data from Phenotype Database SQL table.
"""
tf = TraitFile.get_ds(trait_id)
if tf is None:
logger.error(f'Failed to retrieve the trait file {trait_id}')
return

# Delete the previousely created TraitFile object
delete_ds_entity_by_ref(TraitFile.kind, tf.name)

# Delete a new trait from Phenotype Metadata SQL table
trait = get_trait(tf.name)
if trait:
trait.delete()

# Delete the file from GCP bucket
if blob_name:
blob = get_blob_if_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name)
if blob:
blob.delete()

# Delete the file data from Phenotype Database SQL table
PhenotypeDatabase.delete_by_metadata_id(tf.name)

136 changes: 15 additions & 121 deletions src/modules/site-v2/base/views/data/data.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,23 @@
import yaml
import bleach
import csv

from flask import render_template, Blueprint, redirect, url_for, request, flash, jsonify, abort
from flask import render_template, Blueprint, redirect, url_for, request, flash, jsonify
from extensions import cache
from config import config

from caendr.models.error import EnvVarError, FileUploadError
from caendr.models.datastore import TraitFile, Species
from caendr.models.status import PublishStatus
from caendr.services.cloud.storage import get_blob, upload_blob_from_file_object, check_blob_exists
from caendr.models.datastore import Species
from caendr.services.cloud.storage import get_blob
from caendr.services.logger import logger
from caendr.services.validate import validate_file, StrainValidator, NumberValidator
from caendr.utils.data import unique_id
from base.utils.auth import jwt_required, get_current_user
from caendr.utils.env import get_env_var
from caendr.utils.local_files import LocalUploadFile
from base.utils.auth import jwt_required, get_current_user
from base.utils.trait import add_trait
from base.forms import TraitSubmissionForm
from constants import TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS
from caendr.models.sql import PhenotypeMetadata, PhenotypeDatabase





MODULE_DB_OPERATIONS_BUCKET_NAME = get_env_var('MODULE_DB_OPERATIONS_BUCKET_NAME')
MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH = get_env_var('MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH')

data_bp = Blueprint(
'data', __name__, template_folder='templates'
)
Expand Down Expand Up @@ -80,7 +71,7 @@ def protocols():
#
# Submit Trait
#
@data_bp.route('/submit-trait')
@data_bp.route('/trait/start-submit')
@jwt_required()
def submit_trait_start():
""" Submit Trait start page """
Expand All @@ -92,7 +83,7 @@ def submit_trait_start():
#
# Submit Trait Form
#
@data_bp.route('/submit-trait/new-submission', methods=['GET', 'POST'])
@data_bp.route('/trait/create', methods=['GET', 'POST'])
@jwt_required()
def submit_trait_form():
""" Trait Submission Form """
Expand All @@ -112,111 +103,13 @@ def submit_trait_form():
flash('Please fill out all required fields.', 'warning')

else:
try:
# Create a new TraitFile object
tf = TraitFile(unique_id())

# Create a unique filename for file upload
hashed_filename = f'{unique_id()}.tsv'

tf.set_properties(**{
# User submitted data
'trait_name_user': bleach.clean(form.trait_name_user.data),
'trait_name_display_1': bleach.clean(form.trait_name_display_1.data),
'trait_name_display_2': bleach.clean(form.trait_name_display_2.data),
'trait_name_display_3': bleach.clean(form.trait_name_display_2.data),
'filename': bleach.clean(form.file.data.filename),
'species': bleach.clean(form.species.data),
'description_short': bleach.clean(form.description_short.data),
'description_long': bleach.clean(form.description_long.data),
'units': bleach.clean(form.units.data),
'tags': [ bleach.clean(tag) for tag in form.tags.data ],
'institution': bleach.clean(form.institution.data),
'source_lab': bleach.clean(form.source_lab.data),
'protocols': bleach.clean(form.protocols.data),
'publication': bleach.clean(form.publication.data),

# Internally used data
'dataset': 'public',
'publish_status': PublishStatus.UPLOADED,
'is_bulk_file': False,
'hashed_filename': hashed_filename,
})

tf.set_user(user)

# Save the TraitFile object to Datastore
tf.save()

# Seed to Phenotype Metadata SQL table
new_trait = PhenotypeMetadata()
new_trait.add_trait(tf)

except Exception as ex:
logger.error(f'Failed to create a trait file {form.trait_name_user.data}: {ex}')
flash('Failed to submit a form. Please try again later.', 'danger')
abort(500)

# Save file to GCP bucket
species_name = Species.get(form.species.data).name
blob_name = f'{MODULE_DB_OPERATIONS_TRAITFILE_PUBLIC_FILEPATH}/{species_name}/{user.name}/{hashed_filename}'

# Check if the file already exists
if check_blob_exists(MODULE_DB_OPERATIONS_BUCKET_NAME, blob_name):
flash('File already exists.', 'danger')
# Add the trait to the database
resp, code = add_trait(form, user)
if code != 200:
flash(resp['message'], 'danger')
else:
upload_blob_from_file_object(MODULE_DB_OPERATIONS_BUCKET_NAME, form.file.data, blob_name)

# Reset the file pointer
form.file.data.seek(0)

try:
# Seed the file data to Phenotype Database SQL table
with LocalUploadFile(form.file.data, valid_file_extensions=TOOL_INPUT_DATA_VALID_FILE_EXTENSIONS) as file:

# Validate the file
try:
validate_file(file, [
StrainValidator( 'strain', species=tf['species'], force_unique=True, force_unique_msgs={} ),
NumberValidator( None, accept_float=True, accept_na=True ),
])
except Exception as ex:
flash(f'Failed to validate the file: {ex.msg}', 'danger')
return render_template('data/submit-trait-form.html', **{
# Page Info
'title': 'Phenotype Database Trait Submission',
'tool_alt_parent_breadcrumb': {"title": "Submit Trait", "url": url_for('data.submit_trait_start')},

# Data
'form': form,
})

# Parse the trait file
trait_data_list = []
with open(file) as f:
for idx, row in enumerate( csv.reader(f, delimiter='\t') ):
if idx == 0:
continue
else:
trait_data = {
'trait_name': tf['trait_name_user'],
'strain_name': row[0],
'trait_value': row[1],
'metadata_id': tf.name
}
trait_data_list.append(trait_data)

trait_data = PhenotypeDatabase()
trait_data.add_trait_data(trait_data_list)

except FileUploadError as ex:
logger.error(f'Failed to upload a file {form.file.data.filename}: {ex}')
flash('Failed to submit a form. Please try again later.', 'danger')
abort(500)

flash('Trait submitted successfully.', 'success')
# TODO: change the redirect to MTL
return redirect(url_for('data.submit_trait_start'))
flash('Trait submitted successfully.', 'success')
return redirect(url_for('data.submit_trait_start'))

return render_template('data/submit-trait-form.html', **{
# Page Info
Expand All @@ -230,7 +123,7 @@ def submit_trait_form():
#
# File Upload
#
@data_bp.route('/submit-trait/parse-file', methods=['POST'])
@data_bp.route('/trait/parse-file', methods=['POST'])
@jwt_required()
def parse_trait_file():
""" Parse the trait file and return the data """
Expand Down Expand Up @@ -259,3 +152,4 @@ def parse_trait_file():
except Exception as ex:
logger.error(f'Failed to parse the file: {ex}')
return jsonify({ 'message': 'Failed to parse the file. Please try again later.' }), 500

2 changes: 1 addition & 1 deletion src/modules/site-v2/templates/_includes/breadcrumb.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{% elif alt_parent_breadcrumb %}
<li class="breadcrumb-item">{{ alt_parent_breadcrumb["title"] }}</li>
{% elif tool_alt_parent_breadcrumb %}
{% if tool_alt_parent_breadcrumb["title"] == 'Strain Catalog' or tool_alt_parent_breadcrumb["title"] == 'Submit Trait'%}
{% if request.blueprint == 'request_strains' or request.blueprint == 'data'%}
<li class="breadcrumb-item">{{request.blueprint|replace('_', ' ')|title}}</li>
{% endif %}
<li class="breadcrumb-item"><a href="{{ tool_alt_parent_breadcrumb.url }}">{{ tool_alt_parent_breadcrumb["title"] }}</a></li>
Expand Down
8 changes: 6 additions & 2 deletions src/modules/site-v2/templates/data/submit-trait-form.html
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ <h3 class="pb-3">Trait Information</h3>
// Upload the file and display the content
$('#file-upload').on('click', function(e) {
e.preventDefault()
handleFileUpload()
})

function handleFileUpload() {
const file = $('#file')[0].files[0]
const species = $('#speciesSelect').val()
if (file && species) {
Expand Down Expand Up @@ -153,8 +157,8 @@ <h3 class="pb-3">Trait Information</h3>
}
}
})
}
})
}
}

function display_file_content(file) {
$('#file-content').parent().removeClass('d-none')
Expand Down
1 change: 1 addition & 0 deletions src/pkg/caendr/caendr/models/datastore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .browser_track import BrowserTrackDefault # Subclasses FileRecordEntity (from BrowserTrack)
from .browser_track import BrowserTrackTemplate # Subclasses FileRecordEntity (from BrowserTrack)
from .trait_file import TraitFile # Subclasses FileRecordEntity, PublishableEntity, SpeciesEntity, UserOwnedEntity
from .trait_file import DatasetType

# Job template classes
from .job_entity import JobEntity # Subclasses StatusEntity; imports Container
Expand Down
Loading

0 comments on commit 5507b0d

Please sign in to comment.