Skip to content

Commit

Permalink
Merge pull request #300 from AmpliconSuite/aggregator_integration
Browse files Browse the repository at this point in the history
Aggregator integration
  • Loading branch information
jluebeck authored Oct 23, 2024
2 parents 3fc4a0e + 54b1d85 commit 557b9e5
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 180 deletions.
68 changes: 55 additions & 13 deletions caper/caper/aggregator_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,24 +63,33 @@ def read_name_remap(name_remap_file):

def unzip_file(fp, dest_root):
"""
unzips file based on zip type
Unzips file based on zip type.
Ensures proper extraction of all files, including nested directories.
"""
try:
if fp.endswith(".tar.gz"):
zip_name = os.path.basename(fp).replace(".tar.gz", "")
destination = f'{dest_root}/{zip_name}'
with tarfile.open(fp, 'r') as output_zip:
output_zip.extractall(destination)
output_zip.close()
destination = os.path.join(dest_root, zip_name)
os.makedirs(destination, exist_ok=True) # Ensure destination exists
# Open and extract tar.gz
with tarfile.open(fp, 'r:gz') as tar_ref:
for member in tar_ref.getmembers():
if member.isreg():
member.name = os.path.basename(member.name)
tar_ref.extract(member, destination)

elif fp.endswith(".zip"):
zip_name = os.path.basename(fp).replace(".zip", "")
destination = f'{dest_root}/{zip_name}'
destination = os.path.join(dest_root, zip_name)
os.makedirs(destination, exist_ok=True) # Ensure destination exists
# Open and extract zip
with zipfile.ZipFile(fp, 'r') as zip_ref:
zip_ref.extractall(destination)
zip_ref.close()

print(f'Just extracted: {fp} to {destination}!')

except Exception as e:
print(e)
print(f"Error occurred while extracting {fp}: {e}")


def clean_dirs(dlist):
Expand Down Expand Up @@ -120,16 +129,24 @@ def __init__(self, filelist, output_name, run_classifier, ref, py3_path, output_
self.py3_path = py3_path
self.name_remap = read_name_remap(name_remap_file)

self.feature_beds = defaultdict(str)

self.unzip()
if self.run_classifier == "Yes":
self.run_amp_classifier()
self.samp_AA_dct, self.samp_ckit_dct = defaultdict(str), defaultdict(str)
self.samp_mdata_dct, self.run_mdata_dct = defaultdict(str), defaultdict(str)
self.locate_dirs_and_metadata_jsons()
self.sample_to_ac_location_dct = self.aggregate_tables()
self.json_modifications()
complete = self.json_modifications()
self.cleanup()


if complete:
self.complete = True
else:
self.complete = False

def unzip(self):
"""
Unzips the zip files, and get directories for files within
Expand Down Expand Up @@ -217,6 +234,9 @@ def locate_dirs_and_metadata_jsons(self):
elif f.endswith("_sample_metadata.json"):
implied_sname = rchop(f, "_sample_metadata.json")
self.samp_mdata_dct[implied_sname] = fp + "/" + f
elif f.endswith("_intervals.bed"):
self.feature_beds[f] = os.path.join(fp, f)


def run_amp_classifier(self):
"""
Expand Down Expand Up @@ -365,15 +385,13 @@ def cleanup(self):
"""
Zips the aggregate results, and deletes files for cleanup
"""
print(self.samp_AA_dct.values())
clean_dirs(self.samp_AA_dct.values())
# self.clean_files(self.samp_ckit_dct.values())
print("Creating tar.gz...")
self.aggregated_filename = f'{self.output_name}.tar.gz'
self.tardir(f'{self.ROOT_FP}/results', f'{self.output_name}.tar.gz')
print('cleaning directories now ... ')
clean_dirs([f'{self.ROOT_FP}/results']) # ./extracted_from_zips



# def find_file(self, basename):
Expand Down Expand Up @@ -413,7 +431,9 @@ def json_modifications(self):
sys.stderr.write(str(ref_genomes) + "\n")
sys.stderr.write("ERROR! Multiple reference genomes detected in project.\n AmpliconRepository only "
"supports single-reference projects currently. Exiting.\n")
sys.exit(1)
# sys.exit(1)
return None


potential_str_lsts = [
'Location',
Expand All @@ -429,6 +449,7 @@ def json_modifications(self):
# update each path in run.json by finding them in outputs folder
# separately for feature bed file because location is different
feat_basename = os.path.basename(sample_dct['Feature BED file'])

cfiles = os.listdir(self.sample_to_ac_location_dct[sample])
cbf_hits = [x for x in cfiles if x.endswith("_classification_bed_files") and not x.startswith("._")]
if cbf_hits:
Expand All @@ -439,13 +460,19 @@ def json_modifications(self):
feat_file = feat_file.replace(f'{self.ROOT_FP}/results/', "")
sample_dct['Feature BED file'] = feat_file
else:

if not feat_file.endswith("/NA"):
print(f'Feature: "Feature BED file" {feat_file} doesnt exist for sample {sample_dct["Sample name"]}')

sample_dct['Feature BED file'] = "Not Provided"


else:
sample_dct['Feature BED file'] = "Not Provided"
fp_finding = self.feature_beds[feat_basename]
if fp_finding and os.path.exists(fp_finding):
sample_dct['Feature BED file'] = fp_finding
else:
sample_dct['Feature BED file'] = "Not Provided"

features_of_interest = [
'CNV BED file',
Expand All @@ -454,12 +481,15 @@ def json_modifications(self):
'AA summary file',
'Run metadata JSON',
'Sample metadata JSON',
'Feature BED file',
]

for feature in features_of_interest:
if feature in sample_dct and sample_dct[feature]:
feat_basename = os.path.basename(sample_dct[feature])
feat_file = f'{self.sample_to_ac_location_dct[sample]}/files/{feat_basename}'
if not os.path.exists(feat_file):
feat_file = f'{self.sample_to_ac_location_dct[sample]}/{feat_basename}'
if feature == "CNV BED file" and any([feat_file.endswith(x) for x in ["AA_CNV_SEEDS.bed", "CNV_CALLS_pre_filtered.bed", "Not provided", "Not Provided"]]):
cnvkit_dir = self.samp_ckit_dct[sample_dct['Sample name']]
if cnvkit_dir:
Expand Down Expand Up @@ -527,6 +557,8 @@ def json_modifications(self):
aggregate = pd.DataFrame.from_records(flattened_samples)
aggregate.to_csv(f'{self.ROOT_FP}/results/aggregated_results.csv')
aggregate.to_html(f'{self.ROOT_FP}/results/aggregated_results.html')

return 'Completed json mods'

def clean_by_suffix(self, suffix, dir):
if suffix and dir and not dir == "/" and not suffix == "*":
Expand All @@ -539,6 +571,16 @@ def clean_by_suffix(self, suffix, dir):
# except FileNotFoundError:
# pass

def find_file_by_basename(directory, basename):
# Walk through the directory
print(f'file to find: {basename}')
for root, dirs, files in os.walk(directory, topdown=True):
# Check if any file matches the given basename
for file in files:
if os.path.basename(file) == basename:
return os.path.join(root, file)
return None


# TODO: VALIDATE IS NEVER USED!
def validate():
Expand Down
10 changes: 10 additions & 0 deletions caper/caper/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ class UpdateForm(forms.ModelForm):
help_text=
'Click checkbox to acknowledge and accept the terms of the license agreement.',
)

replace_project = forms.BooleanField(
label = format_html(
"Replace Project? If ticked, will replace the entire project with the file you upload."
),
required = False,
widget = forms.CheckboxInput(),
help_text = 'The default behavior is to add samples to the current project.'
)

class Meta:
model = Run
Expand All @@ -68,6 +77,7 @@ def __init__(self, *args, **kwargs):
self.fields['publication_link'].widget.attrs.update({'placeholder': 'Optional: Provide a PMID or link to a publication'})
self.fields['project_members'].required = False
self.fields['project_members'].widget.attrs.update({'placeholder': 'Optional: List of additional email addresses or AmpliconRepository usernames separated by spaces or commas'})
self.fields['replace_project'].widget.attrs.update({'id': 'custom_id_replace_project'})
# self.fields['file'].required = False


Expand Down
5 changes: 3 additions & 2 deletions caper/caper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@
],
'AUTH_PARAMS': {'access_type': 'online'},
'APP': {
'client_id': '789453891819-hk9q466oq5ba8i2ur4pk8d0of2f056sc.apps.googleusercontent.com',
#'client_id': '789453891819-hk9q466oq5ba8i2ur4pk8d0of2f056sc.apps.googleusercontent.com',
'client_id': '715102420712-3c11l0918iers60ca8eifnnpuihu88sm.apps.googleusercontent.com',
'secret': GOOGLE_SECRET_KEY,
}
},
Expand All @@ -204,7 +205,7 @@
'urn:globus:auth:scope:transfer.api.globus.org:all'
],
'APP': {
'client_id': '6524a9d1-a235-4c4f-9e20-c70e77f6e34b',
'client_id': '61af67c2-8697-4afd-a5df-2a46a8ef17df',
'secret': GLOBUS_SECRET_KEY,
}
}
Expand Down
74 changes: 52 additions & 22 deletions caper/caper/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,6 @@ def project_page(request, project_name, message=''):
## if flag is unfinished, render a loading page:

project = validate_project(get_one_project(project_name), project_name)
print(project.keys())
print(project['private'])
if 'FINISHED?' in project and project['FINISHED?'] == False:
return render(request, "pages/loading.html", {"project_name":project_name})

Expand Down Expand Up @@ -974,7 +972,7 @@ def sample_download(request, project_name, sample_name):
png_id = feature['AA_PNG_file']
else:
png_id = False
if feature['AA_PNG_file'] != 'Not Provided':
if feature['AA_directory'] != 'Not Provided':
aa_directory_id = feature['AA_directory']
else:
aa_directory_id = False
Expand Down Expand Up @@ -1277,21 +1275,39 @@ def download_file(url, save_path):

print(f"File downloaded successfully and saved to {save_path}")



def edit_project_page(request, project_name):

if request.method == "POST":
project = get_one_project(project_name)
old_alias_name = None
if 'alias_name' in project:

old_alias_name = project['alias_name']
print(f'THE OLD ALIAS NAME SHOULD BE: {old_alias_name}')
# no edits for non-project members
if not is_user_a_project_member(project, request):
return HttpResponse("Project does not exist")

form = UpdateForm(request.POST, request.FILES)
## give the new project the old project alias.
if form.data['alias'] == '':
if old_alias_name:
mutable_data = form.data.copy() # Make a mutable copy of the form's data
mutable_data['alias'] = old_alias_name # Set the alias to the new value
form.data = mutable_data
## update old project so its alias is set to None, and the alias is set to the new project
query = {'_id': ObjectId(project_name)}
new_val = { "$set": {'alias_name' : None}}
collection_handle.update_one(query, new_val)

form_dict = form_to_dict(form)

form_dict['project_members'] = create_user_list(form_dict['project_members'], get_current_user(request))
print('UPDATED FORM ALIAS')
print(form_dict['alias'])
print(form.data)

form_dict['project_members'] = create_user_list(form_dict['project_members'], get_current_user(request))
# lets notify users (if their preferences request it) if project membership has changed
new_membership = form_dict['project_members']
old_membership = project['project_members']
Expand All @@ -1316,16 +1332,27 @@ def edit_project_page(request, project_name):
## build download URL
url = f'http://127.0.0.1:8000/project/{project["linkid"]}/download'
download_path = project_data_path+'/download.tar.gz'

try:
## try to download old project file
download = download_file(url, download_path)
print(f"PREVIOUS FILE FPS LIST: {file_fps}")
file_fps.append(os.path.join('download.tar.gz'))
### if replace project, don't download old project
try:
if request.POST['replace_project'] == 'on':
print('Replacing project with new uploaded file')
except:
download = download_file(url, download_path)
file_fps.append(os.path.join('download.tar.gz'))
print(f"AFTERS FILE FPS LIST: {file_fps}")

print(f'aggregating on: {file_fps}')
agg = Aggregator(file_fps, project_data_path, 'No', "", 'python3', output_directory = f'{temp_proj_id}')
if agg.complete != True:
## redirect to edit page if aggregator fails
alert_message = "Edit project failed. Please ensure all uploaded samples have the same reference genome and are valid AmplionSuite results."
return render(request, 'pages/edit_project.html',
{'project': project,
'run': form,
'alert_message': alert_message,
'all_alias' :get_all_alias()})
## after running aggregator, replace the requests file with the aggregated file:
with open(agg.aggregated_filename, 'rb') as f:
uploaded_file = SimpleUploadedFile(
Expand Down Expand Up @@ -1377,7 +1404,7 @@ def edit_project_page(request, project_name):
{'project': project,
'run': form,
'alert_message': alert_message,
'all_alias' :get_all_alias()})
'all_alias' :json.dumps(get_all_alias())})
# JTL 081823 Not sure what these next 4 lines are about? An earlier plan to change the project file?
# leaving them alone for now but they smell like dead code
if 'file' in form_dict:
Expand All @@ -1393,9 +1420,15 @@ def edit_project_page(request, project_name):
if runs != 0:
current_runs.update(runs)
query = {'_id': ObjectId(project_name)}
try:
alias_name = form_dict['alias']
print(alias_name)
except:
print('no alias to be found')

new_val = { "$set": {'project_name':new_project_name, 'runs' : current_runs, 'description': form_dict['description'], 'date': get_date(),
'private': form_dict['private'], 'project_members': form_dict['project_members'], 'publication_link': form_dict['publication_link'],
'Oncogenes': get_project_oncogenes(current_runs)}}
'Oncogenes': get_project_oncogenes(current_runs), 'alias_name' : alias_name}}
if form.is_valid():
print('im here')
collection_handle.update_one(query, new_val)
Expand All @@ -1404,16 +1437,13 @@ def edit_project_page(request, project_name):
raise Http404()
else:
return HttpResponse("Project does not exist")


else:
project = get_one_project(project_name)
prev_versions, prev_ver_msg = previous_versions(project)
if prev_ver_msg:
messages.error(request, "Redirected to latest version, editing of old versions not allowed. ")
return redirect('project_page', project_name = prev_versions[0]['linkid'])

print(prev_ver_msg)
# split up the project members and remove the empties
members = project['project_members']
try:
Expand All @@ -1424,9 +1454,6 @@ def edit_project_page(request, project_name):
memberString = ', '.join(members)
form = UpdateForm(initial={"project_name": project['project_name'],"description": project['description'],"private":project['private'],"project_members": memberString,"publication_link": publication_link})



print(project['alias_name'])
return render(request, "pages/edit_project.html",
{'project': project,
'run': form,
Expand Down Expand Up @@ -1716,7 +1743,6 @@ def extract_project_files(tarfile, file_location, project_data_path, project_id)
for feature in features:
# logging.debug(feature['Sample name'])
if len(feature) > 0:

# get paths
key_names = ['Feature BED file', 'CNV BED file', 'AA PDF file', 'AA PNG file', 'Sample metadata JSON',
'AA directory', 'cnvkit directory']
Expand All @@ -1725,10 +1751,8 @@ def extract_project_files(tarfile, file_location, project_data_path, project_id)
path_var = feature[k]
with open(f'{project_data_path}/results/{path_var}', "rb") as file_var:
id_var = fs_handle.put(file_var)

except:
id_var = "Not Provided"

feature[k] = id_var

# Now update the project with the updated runs
Expand Down Expand Up @@ -1929,7 +1953,13 @@ def create_project(request):
file_fps.append(file.name)
file.close()
agg = Aggregator(file_fps, project_data_path, 'No', "", 'python3', output_directory = f'{temp_proj_id}')

if agg.complete != True:
## redirect to edit page if aggregator fails
alert_message = "Create project failed. Please ensure all uploaded samples have the same reference genome and are valid AmplionSuite results."
return render(request, 'pages/create_project.html',
{'run': form,
'alert_message': alert_message,
'all_alias':json.dumps(get_all_alias())})
## after running aggregator, replace the requests file with the aggregated file:
with open(agg.aggregated_filename, 'rb') as f:
uploaded_file = SimpleUploadedFile(
Expand Down
Loading

0 comments on commit 557b9e5

Please sign in to comment.