From cc5932f8975010cac24cd9b62fc74ebf752c6b6c Mon Sep 17 00:00:00 2001 From: Goosang Yu Date: Thu, 25 Jan 2024 14:15:01 +0900 Subject: [PATCH] =?UTF-8?q?=E2=9C=85=20SynonymousPE=20update?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- genet/design/functional.py | 128 +++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 35 deletions(-) diff --git a/genet/design/functional.py b/genet/design/functional.py index a32d633..741f8b2 100644 --- a/genet/design/functional.py +++ b/genet/design/functional.py @@ -144,11 +144,13 @@ def __init__(self, # step 2: pegRNA의 strand 방향에 따라 synonymous Mut 생성 함수 결정 if self.wt_seq in self.ref_seq: self.strand = '+' - self.rtt_frame = (frame - self.edit_pos + 1) % 3 # rtt 시작점의 frame, LHA 길이를 이용한 계산 + #self.rtt_frame = (frame - self.edit_pos + 1) % 3 # rtt 시작점의 frame, LHA 길이를 이용한 계산 + self.rtt_frame = (frame - (self.edit_pos - 1) % 3) % 3 #frame - (self.edit_pos - 1)가 음수가 되었을 때 결과 값이 다름. elif reverse_complement(self.wt_seq) in self.ref_seq: self.strand = '-' - self.rtt_frame = (self.edit_pos + frame) % 3 # revcom_rtt_dna의 3' end가 위치하는 지점의 frame. 시작점이 거기이기 때문. + #self.rtt_frame = (self.edit_pos + frame) % 3 # revcom_rtt_dna의 3' end가 위치하는 지점의 frame. 시작점이 거기이기 때문. + self.rtt_frame = (self.edit_pos + frame - 1) % 3 else: raise ValueError('Reference sequence is not matched with pegRNA information!\nPlease chech your ref_seq') @@ -228,19 +230,21 @@ def generate(self, rtt_frame:int, strand:str) -> pd.DataFrame: if strand == '+': codon_le = rtt_frame - codon_re = (3 - (rtt_frame + self.rtt_len)) % 3 + #codon_re = (3 - (rtt_frame + self.rtt_len)) % 3 + codon_re = -(rtt_frame + self.rtt_len) % 3 codon_RTT = self.ed_seq[21-codon_le:21+self.rtt_len+codon_re] dict_codon_Pos = {codon_RTT: [i for i in range(codon_le, len(codon_RTT)-codon_re)]} else: - codon_re = abs(self.frame - ep) % 3 # added abs() + #codon_re = (self.frame - ep) % 3 + codon_re = 2 - rtt_frame codon_le = -(codon_re + self.rtt_len) % 3 codon_RTT = self.ed_seq[21-codon_re:21+self.rtt_len+codon_le] codon_RTT = reverse_complement(codon_RTT) - + dict_codon_Pos = {codon_RTT: [i for i in range(codon_le, len(codon_RTT) - codon_re)]} #due to revcom sequence ### 결과가 안 맞으면, 여기까지 (codon_RTT)를 다시 한번 살펴보기, 특히 (-) strand! ####### @@ -281,10 +285,13 @@ def generate(self, rtt_frame:int, strand:str) -> pd.DataFrame: # priority 결정하는 부분 ########################################## # 1/ Edit class에 따라서 분류하고, 각 class에 따라 값을 할당 - if mut_pos in [5, 6]: edit_class = 'PAM_edit'; priority = 1 + if (mut_pos in [5, 6]) and (rtt_dna_mut[4:6] not in ['GG', 'GA', 'AG']): + if mut_pos < ep : edit_class = 'PAM_edit_LHA'; priority = 1 + else : edit_class = 'PAM_edit_RHA'; priority = 1 + elif mut_pos < ep : edit_class = 'LHA_edit'; priority = 2 + ep - mut_pos elif mut_pos > ep : edit_class = 'RHA_edit'; priority = 3 + ep + mut_pos - + # 2/ GC contents가 변화하면 값 증가 if gc_fraction(codon) != gc_fraction(mut_codon): priority += 1 @@ -300,11 +307,20 @@ def generate(self, rtt_frame:int, strand:str) -> pd.DataFrame: aa_mut_codon = mut_codon if codon_intron_5 > 0: - aa_wt_codon = codon[(codon_intron_5 // 3) * 3:] + aa_wt_codon = codon[((codon_intron_5 // 3) - 1) * 3:] aa_mut_codon = mut_codon[(codon_intron_5 // 3) * 3:] + + # partial codon 부분의 mutation filtering (+ strand) + partial_len = len(codon) - len(aa_wt_codon) + if snv_pos in [partial_len, partial_len-1]: continue + if codon_intron_3 > 0: - aa_wt_codon = codon[:-(codon_intron_3 // 3) * 3] - aa_mut_codon = mut_codon[:-(codon_intron_3 // 3) * 3] + aa_wt_codon = codon[:-((codon_intron_3 // 3) + 1) * 3] + aa_mut_codon = mut_codon[:-((codon_intron_3 // 3) + 1) * 3] + + # partial codon 부분의 mutation filtering (- strand) + partial_len = len(aa_wt_codon) + if snv_pos in [partial_len, partial_len+1]: continue aa_wt = translate(aa_wt_codon) aa_mut = translate(aa_mut_codon) @@ -338,13 +354,14 @@ def generate(self, rtt_frame:int, strand:str) -> pd.DataFrame: # def End: generate - def stack(self, num:int): + def stack(self, num:int, max_rha_edit:int = 2): """만들 수 있는 Synonymous Mut 들의 조합을 추가로 만들어서, 그 중에서도 synonymous mut이 존재하는지 확인하고, 가능한 조합을 return 하는 method. Args: num (int): Synonymous Mutation을 쌓을 최대 제한 수 + max_rha_edit (int) : 허용할 RHA edit의 최대 제한 수. (지정 안하면 제한 안둠.) Returns: _type_: str, list @@ -369,43 +386,84 @@ def stack(self, num:int): selected_mut_codon = best_syn.Codon_Mut synMut_cnt = 1 - stacked_pos = [] + synMut_RHA_cnt = 1 if best_syn.Edit_class == 'RHA_edit' else 0 + stacked_pos = [best_syn.Mut_pos] for i in range(1, len(syn_dedup)): temp_syn = syn_dedup.iloc[i] + + if best_syn.Edit_class != 'PAM_edit_RHA': + mut_pos = temp_syn.Codon_MutPos - 1 + mut_nt = temp_syn.Codon_Mut[mut_pos] - mut_pos = temp_syn.Codon_MutPos - 1 - mut_nt = temp_syn.Codon_Mut[mut_pos] + stacked_mut_codon = list(selected_mut_codon) + stacked_mut_codon[mut_pos] = mut_nt + stacked_mut_codon = ''.join(stacked_mut_codon) - stacked_mut_codon = list(selected_mut_codon) - stacked_mut_codon[mut_pos] = mut_nt - stacked_mut_codon = ''.join(stacked_mut_codon) + # Silent Mut check + aa_mut_codon = temp_syn.Codon_Mut - # Silent Mut check - aa_mut_codon = temp_syn.Codon_Mut + if codon_intron_5 > 0: + aa_mut_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] + if codon_intron_3 > 0: + aa_mut_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] - if codon_intron_5 > 0: - aa_mut_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] - if codon_intron_3 > 0: - aa_mut_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] + aa_origin = translate(aa_origin_codon) + aa_mut = translate(aa_mut_codon) - aa_origin = translate(aa_origin_codon) - aa_mut = translate(aa_mut_codon) + if aa_origin == aa_mut: + + # 만약 조건에 맞는 mutation을 찾으면, origin_codon을 업데이트 + selected_mut_codon = stacked_mut_codon + aa_origin_codon = stacked_mut_codon - if aa_origin == aa_mut: - - # 만약 조건에 맞는 mutation을 찾으면, origin_codon을 업데이트 - selected_mut_codon = stacked_mut_codon - aa_origin_codon = stacked_mut_codon + if codon_intron_5 > 0: aa_origin_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] + if codon_intron_3 > 0: aa_origin_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] + + synMut_cnt += 1 + synMut_RHA_cnt += 1 if temp_syn.Edit_class == 'RHA_edit' else 0 + stacked_pos.append(temp_syn.Mut_pos) + + if (synMut_cnt == num) or (synMut_RHA_cnt == max_rha_edit): break + + elif best_syn.Edit_class == 'PAM_edit_RHA': + if temp_syn.Edit_class in 'PAM_edit_RHA': continue + elif temp_syn.Edit_class not in ['PAM_edit_RHA', 'RHA_edit']: + + mut_pos = temp_syn.Codon_MutPos - 1 + mut_nt = temp_syn.Codon_Mut[mut_pos] + + stacked_mut_codon = list(selected_mut_codon) + stacked_mut_codon[mut_pos] = mut_nt + stacked_mut_codon = ''.join(stacked_mut_codon) + + # Silent Mut check + aa_mut_codon = temp_syn.Codon_Mut + + if codon_intron_5 > 0: + aa_mut_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] + if codon_intron_3 > 0: + aa_mut_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] + + aa_origin = translate(aa_origin_codon) + aa_mut = translate(aa_mut_codon) + + if aa_origin == aa_mut: + + # 만약 조건에 맞는 mutation을 찾으면, origin_codon을 업데이트 + selected_mut_codon = stacked_mut_codon + aa_origin_codon = stacked_mut_codon - if codon_intron_5 > 0: aa_origin_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] - if codon_intron_3 > 0: aa_origin_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] + if codon_intron_5 > 0: aa_origin_codon = stacked_mut_codon[(codon_intron_5 // 3) * 3:] + if codon_intron_3 > 0: aa_origin_codon = stacked_mut_codon[:-(codon_intron_3 // 3) * 3] - synMut_cnt += 1 - stacked_pos.append(temp_syn.Mut_pos) + synMut_cnt += 1 + synMut_RHA_cnt += 1 if temp_syn.Edit_class == 'RHA_edit' else 0 + stacked_pos.append(temp_syn.Mut_pos) - if synMut_cnt == num: break + if (synMut_cnt == num) or (synMut_RHA_cnt == max_rha_edit): break + else : break if strand == '+': rtt_dna_mut = selected_mut_codon[self.codon_le:len(selected_mut_codon)-self.codon_re]