From cd34a305e96572ee54ba47c69436981647ed25ea Mon Sep 17 00:00:00 2001 From: xfengnefx <61363437+xfengnefx@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:00:28 -0400 Subject: [PATCH] fix vcf parsing Phased variants is read from vcf file by finding "1|0" or "0|1" substring in each vcf records. This should be done only to the last column. This fix still assumes input is a single sample vcf. --- meth_phaser_post_processing | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/meth_phaser_post_processing b/meth_phaser_post_processing index 02bc943..39c58ad 100755 --- a/meth_phaser_post_processing +++ b/meth_phaser_post_processing @@ -312,26 +312,23 @@ def get_altered_vcf(original_vcf, output_vcf, block_relationship_dfs): if i[1] == 1: altered_vcf_file.writelines(rec) else: - if "1|0" in rec: - if "PS" in rec.split("\t")[-2]: - split_rec = rec.split("\t") + split_rec = rec.split("\t") + if "1|0" in split_rec[-1]: + if "PS" in split_rec[-2]: ps_tag_location = split_rec[-2].split(":").index("PS") start_loc = split_rec[-1].split(":")[ps_tag_location] # print(split_rec, start_loc) split_rec[-1] = split_rec[-1].replace(start_loc, get_altered_block_start_loc(current_chrom_final_block, int(start_loc))) # type: ignore - rec = "\t".join(split_rec) - rec = f"{rec}\n" - altered_vcf_file.writelines(rec.replace("1|0", "0|1")) - elif "0|1" in rec: - if "PS" in rec.split("\t")[-2]: - split_rec = rec.split("\t") + split_rec[-1] = split_rec[-1].replace("1|0", "0|1") + altered_vcf_file.writelines("\t".join(split_rec)+"\n") + elif "0|1" in split_rec[-1]: + if "PS" in split_rec[-2]: ps_tag_location = split_rec[-2].split(":").index("PS") start_loc = split_rec[-1].split(":")[ps_tag_location] # print(split_rec, start_loc) split_rec[-1] = split_rec[-1].replace(start_loc, get_altered_block_start_loc(current_chrom_final_block, int(start_loc))) # type: ignore - rec = "\t".join(split_rec) - rec = f"{rec}\n" - altered_vcf_file.writelines(rec.replace("0|1", "1|0")) + split_rec[-1] = split_rec[-1].replace("0|1", "1|0") + altered_vcf_file.writelines("\t".join(split_rec)+"\n") else: altered_vcf_file.writelines(rec) flpl = [x[0] for x in flipping_list]