Skip to content

Commit

Permalink
FIX: FIX Barcoding for the Fastq_extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
alihamraoui committed Aug 21, 2024
1 parent 4045146 commit ab76466
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 22 deletions.
2 changes: 2 additions & 0 deletions toulligqc/extractor_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,8 @@ def add_image_to_result(quiet, image_list, start_time, image):
def timeISO_to_float(iso_datetime, format):
"""
"""
if '+' in iso_datetime:
iso_datetime = iso_datetime.split('+')[0]
dt = datetime.strptime(iso_datetime, format)
unix_timestamp = dt.timestamp()
return unix_timestamp
Expand Down
66 changes: 44 additions & 22 deletions toulligqc/fastq_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,32 +119,45 @@ def graph_generation(self, result_dict):

add_image_to_result(self.quiet, images, time.time(), pgg.read_count_histogram(result_dict, self.images_directory))
add_image_to_result(self.quiet, images, time.time(), pgg.read_length_scatterplot(self.dataframe_dict, self.images_directory))

if self.rich:
add_image_to_result(self.quiet, images, time.time(), pgg.yield_plot(self.dataframe_1d, self.images_directory))
add_image_to_result(self.quiet, images, time.time(), pgg.read_quality_multiboxplot(self.dataframe_dict, self.images_directory))
add_image_to_result(self.quiet, images, time.time(), pgg.allphred_score_frequency(self.dataframe_dict, self.images_directory))

if self.rich:
add_image_to_result(self.quiet, images, time.time(), pgg.plot_performance(self.dataframe_1d, self.images_directory))
add_image_to_result(self.quiet, images, time.time(), pgg.twod_density(self.dataframe_dict, self.images_directory))

if self.rich:
add_image_to_result(self.quiet, images, time.time(), pgg.sequence_length_over_time(self.dataframe_dict, self.images_directory))
add_image_to_result(self.quiet, images, time.time(), pgg.phred_score_over_time(self.dataframe_dict, result_dict, self.images_directory))
if self.is_barcode:
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
self.barcode_selection,
self.images_directory))

read_fail = self.dataframe_dict["read.fail.barcoded"]
if not (len(read_fail) == 1 and read_fail["other barcodes"] == 0):
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_fail(self.dataframe_dict,
self.barcode_selection,
self.images_directory))

add_image_to_result(self.quiet, images, time.time(), pgg.barcode_length_boxplot(self.dataframe_dict,
self.images_directory))

add_image_to_result(self.quiet, images, time.time(), pgg.barcoded_phred_score_frequency(self.dataframe_dict,
self.images_directory))
if self.is_barcode:
if "barcode_alias" in self.config_dictionary:
barcode_alias = self.config_dictionary['barcode_alias']
else:
barcode_alias = None

add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
self.barcode_selection,
self.images_directory,
barcode_alias))

read_fail = self.dataframe_dict["read.fail.barcoded"]
if not (len(read_fail) == 1 and read_fail["other barcodes"] == 0):
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_fail(self.dataframe_dict,
self.barcode_selection,
self.images_directory,
barcode_alias))

add_image_to_result(self.quiet, images, time.time(), pgg.barcode_length_boxplot(self.dataframe_dict,
self.images_directory,
barcode_alias))

add_image_to_result(self.quiet, images, time.time(), pgg.barcoded_phred_score_frequency(self.dataframe_dict,
self.images_directory,
barcode_alias))
return images


Expand Down Expand Up @@ -211,7 +224,7 @@ def extract(self, result_dict):
"pass.reads.sequence.length")
describe_dict(self, result_dict, self.dataframe_dict["fail.reads.sequence.length"],
"fail.reads.sequence.length")
if self.is_barcode:
if self.rich and self.is_barcode:
extract_barcode_info(self, result_dict,
self.barcode_selection,
self.dataframe_dict,
Expand Down Expand Up @@ -258,8 +271,9 @@ def _load_fastq_data(self):
columns = ['sequence_length', 'mean_qscore', 'passes_filtering']
if self.rich:
columns.extend(['start_time', 'channel'])
if self.is_barcode:
columns.append('barcode_arrangement')

if self.is_barcode:
columns.append('barcode_arrangement')

fq_df = pd.DataFrame(fq_df, columns=columns)

Expand All @@ -271,8 +285,10 @@ def _load_fastq_data(self):
fq_df["start_time"] = fq_df["start_time"] - fq_df["start_time"].min()
fq_df['start_time'] = fq_df['start_time'].astype(np.float64)
fq_df['channel'] = fq_df['channel'].astype(np.int16)
if self.is_barcode:
fq_df['barcode_arrangement'] = fq_df['barcode_arrangement'].astype("category")

if self.is_barcode:
fq_df['barcode_arrangement'] = fq_df['barcode_arrangement'].astype("category")

return fq_df


Expand Down Expand Up @@ -346,8 +362,11 @@ def check_fastq(self):
self.is_barcode = False
if 'model_version_id' not in metadata:
metadata['model_version_id'] = 'Unknow'
run_info = []
try:
return metadata['runid'] , metadata['sampleid'] , metadata['model_version_id']
sample_id = 'sample_id' if 'sample_id' in metadata else 'sampleid'
run_id = 'run_id' if 'run_id' in metadata else 'runid'
return metadata[run_id] , metadata[sample_id] , metadata['model_version_id']
except:
return None

Expand All @@ -356,7 +375,10 @@ def _extract_info_from_name(self, name):
"""
"""
metadata = dict(x.split("=") for x in name.split(" ")[1:])
start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%SZ')
try:
start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%SZ')
except:
start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%S.%f')
if self.is_barcode:
return start_time, metadata['ch'], metadata['barcode']
return start_time, metadata['ch']

0 comments on commit ab76466

Please sign in to comment.