Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pVACview percentile plots to include percentiles from elution and immunogenicity algorithms #1149

Open
wants to merge 10 commits into
base: aggregate_incl
Choose a base branch
from
Binary file modified docs/images/screenshots/pvacview-additional_info_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 5 additions & 4 deletions docs/pvacview/pvacseq_module/pvacseq_features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ There are five different tabs in this section of the app, providing peptide-leve

- :bold:`IC50 Plot:`

Shown in this tab are violin plots of the individual IC50-based binding affinity predictions of the MT and WT peptides for HLA alleles were the MT binds well to.
These peptides each have up to 8 binding algorithm scores for Class I alleles or up to 4 algorithm scores for Class II alleles.
Shown in this tab are violin plots of the individual IC50-based binding affinity predictions of the MT and WT peptides for HLA alleles that have predictions
included in the aggregate report. These peptides each have up to 8 binding algorithm scores for Class I alleles or up to 4 algorithm scores for Class II alleles.

.. figure:: ../../images/screenshots/pvacview-additional_info_1.png
:width: 1000px
Expand All @@ -157,8 +157,9 @@ There are five different tabs in this section of the app, providing peptide-leve

- :bold:`%ile Plot:`

Shown in this tab are violin plots of the individual percentile-based binding affinity predictions of the MT and WT peptides for HLA alleles were the MT binds well to.
These peptides each have up to 8 binding algorithm scores for Class I alleles or up to 4 algorithm scores for Class II alleles.
Shown in this tab are violin plots of the individual percentile-based binding affinity and elution predictions
of the MT and WT peptides for HLA alleles that have predictions included in the aggregate report. These peptides
each have up to 10 binding and elution algorithm scores for Class I alleles or up to 4 algorithm scores for Class II alleles.

.. figure:: ../../images/screenshots/pvacview-additional_info_2.png
:width: 1000px
Expand Down
32 changes: 23 additions & 9 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_unique_peptide_hla_counts(self, included_df):
raise Exception("Must implement method in child class")

@abstractmethod
def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms):
def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
raise Exception("Must implement method in child class")

@abstractmethod
Expand Down Expand Up @@ -92,7 +92,7 @@ def sort_table(self, df):
def copy_pvacview_r_files(self):
raise Exception("Must implement method in child class")

def get_best_mut_line(self, df, key, prediction_algorithms, el_algorithms, vaf_clonal):
def get_best_mut_line(self, df, key, prediction_algorithms, el_algorithms, percentile_algorithms, vaf_clonal):
#order by best median score and get best ic50 peptide
best = self.get_best_binder(df)

Expand All @@ -104,7 +104,7 @@ def get_best_mut_line(self, df, key, prediction_algorithms, el_algorithms, vaf_c
hla = dict(map(lambda x : (x, hla_counts[x]) if x in hla_counts else (x, ""), self.hla_types))
#get a list of all unique gene/transcript/aa_change combinations
#store a count of all unique peptides that passed
(peptides, anno_count) = self.get_included_df_metrics(included_df, prediction_algorithms, el_algorithms)
(peptides, anno_count) = self.get_included_df_metrics(included_df, prediction_algorithms, el_algorithms, percentile_algorithms)
included_peptide_count = self.calculate_unique_peptide_count(included_df)
good_binder_count = self.calculate_good_binder_count(included_df)
else:
Expand Down Expand Up @@ -155,6 +155,14 @@ def determine_used_el_algorithms(self):
prediction_algorithms.append(algorithm)
return prediction_algorithms

def determine_used_percentile_algorithms(self, prediction_algorithms, el_algorithms):
headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
percentile_algorithms = []
for algorithm in prediction_algorithms + el_algorithms:
if "{} MT Percentile".format(algorithm) in headers or "{} Percentile".format(algorithm) in headers:
percentile_algorithms.append(algorithm)
return percentile_algorithms

def determine_columns_used_for_aggregation(self, prediction_algorithms, el_algorithms):
used_columns = [
"Chromosome", "Start", "Stop", "Reference", "Variant",
Expand Down Expand Up @@ -204,6 +212,7 @@ def execute(self):
prediction_algorithms = self.determine_used_prediction_algorithms()
epitope_lengths = self.determine_used_epitope_lengths()
el_algorithms = self.determine_used_el_algorithms()
percentile_algorithms = self.determine_used_percentile_algorithms(prediction_algorithms, el_algorithms)
used_columns = self.determine_columns_used_for_aggregation(prediction_algorithms, el_algorithms)
dtypes = self.set_column_types(prediction_algorithms)

Expand Down Expand Up @@ -243,7 +252,7 @@ def execute(self):

for key in keys:
(df, key_str) = self.get_sub_df(all_epitopes_df, key)
(best_mut_line, metrics_for_key) = self.get_best_mut_line(df, key_str, prediction_algorithms, el_algorithms, vaf_clonal)
(best_mut_line, metrics_for_key) = self.get_best_mut_line(df, key_str, prediction_algorithms, el_algorithms, percentile_algorithms, vaf_clonal)
data.append(best_mut_line)
metrics[key_str] = metrics_for_key
peptide_table = pd.DataFrame(data=data)
Expand Down Expand Up @@ -573,7 +582,7 @@ def replace_nas(self, items):
def round_to_ints(self, items):
return [round(x) if (type(x) == float and not pd.isna(x)) else x for x in items]

def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms):
def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
peptides = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
included_peptides = included_df["MT Epitope Seq"].unique()
included_transcripts = included_df['annotation'].unique()
Expand All @@ -596,9 +605,10 @@ def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorit
included_df_peptide_annotation = included_df_annotation[included_df_annotation['MT Epitope Seq'] == peptide]
if len(included_df_peptide_annotation) > 0:
individual_ic50_calls = { 'algorithms': prediction_algorithms }
individual_percentile_calls = { 'algorithms': prediction_algorithms }
individual_ic50_percentile_calls = { 'algorithms': prediction_algorithms }
individual_el_calls = { 'algorithms': el_algorithms }
individual_el_percentile_calls = { 'algorithms': el_algorithms }
individual_percentile_calls = { 'algorithms': percentile_algorithms }
anchor_fails = []
for peptide_type, top_score_metric in zip(['MT', 'WT'], [self.mt_top_score_metric, self.wt_top_score_metric]):
ic50s = {}
Expand All @@ -607,13 +617,15 @@ def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorit
percentile_calls = {}
el_calls = {}
el_percentile_calls = {}
all_percentile_calls = {}
for index, line in included_df_peptide_annotation.to_dict(orient='index').items():
ic50s[line['HLA Allele']] = line['{} {} IC50 Score'.format(top_score_metric, peptide_type)]
percentiles[line['HLA Allele']] = line['{} {} Percentile'.format(top_score_metric, peptide_type)]
ic50_calls[line['HLA Allele']] = self.replace_nas([line["{} {} IC50 Score".format(algorithm, peptide_type)] for algorithm in prediction_algorithms])
percentile_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in prediction_algorithms])
el_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Score".format(algorithm, peptide_type)] for algorithm in el_algorithms])
el_percentile_calls[line['HLA Allele']] = self.replace_nas(['NA' if algorithm in ['MHCflurryEL Processing', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno'] else line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in el_algorithms])
all_percentile_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in percentile_algorithms])
if peptide_type == 'MT' and not self.is_anchor_residue_pass(line):
anchor_fails.append(line['HLA Allele'])
sorted_ic50s = []
Expand All @@ -630,9 +642,10 @@ def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorit
results[peptide]['ic50s_{}'.format(peptide_type)] = self.replace_nas(sorted_ic50s)
results[peptide]['percentiles_{}'.format(peptide_type)] = self.replace_nas(sorted_percentiles)
individual_ic50_calls[peptide_type] = ic50_calls
individual_percentile_calls[peptide_type] = percentile_calls
individual_ic50_percentile_calls[peptide_type] = percentile_calls
individual_el_calls[peptide_type] = el_calls
individual_el_percentile_calls[peptide_type] = el_percentile_calls
individual_percentile_calls[peptide_type] = all_percentile_calls
results[peptide]['hla_types'] = sorted(self.hla_types)
results[peptide]['mutation_position'] = "NA" if pd.isna(included_df_peptide_annotation.iloc[0]['Mutation Position']) else str(included_df_peptide_annotation.iloc[0]['Mutation Position'])
results[peptide]['problematic_positions'] = str(included_df_peptide_annotation.iloc[0]['Problematic Positions']) if 'Problematic Positions' in included_df_peptide_annotation.iloc[0] else 'None'
Expand All @@ -641,9 +654,10 @@ def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorit
else:
results[peptide]['anchor_fails'] = 'None'
results[peptide]['individual_ic50_calls'] = individual_ic50_calls
results[peptide]['individual_percentile_calls'] = individual_percentile_calls
results[peptide]['individual_ic50_percentile_calls'] = individual_ic50_percentile_calls
results[peptide]['individual_el_calls'] = individual_el_calls
results[peptide]['individual_el_percentile_calls'] = individual_el_percentile_calls
results[peptide]['individual_percentile_calls'] = individual_percentile_calls
wt_peptide = included_df_peptide_annotation.iloc[0]['WT Epitope Seq']
if pd.isna(wt_peptide):
variant_type = included_df_peptide_annotation.iloc[0]['Variant Type']
Expand Down Expand Up @@ -891,7 +905,7 @@ def sort_included_df(self, df):
def get_unique_peptide_hla_counts(self, included_df):
return pd.DataFrame(included_df.groupby(['HLA Allele', 'Epitope Seq']).size().reset_index())

def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms):
def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
return (None, "NA")

def calculate_unique_peptide_count(self, included_df):
Expand Down
Loading