From ea48d6fe09ce2760c04eee3a61670f591aa8d189 Mon Sep 17 00:00:00 2001 From: tcezard Date: Mon, 15 Apr 2024 09:17:52 +0100 Subject: [PATCH] Separate RS from multiple lines --- .../gather_clustering_counts/bash/count_rs_for_all_files.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eva-accession-release-automation/gather_clustering_counts/bash/count_rs_for_all_files.sh b/eva-accession-release-automation/gather_clustering_counts/bash/count_rs_for_all_files.sh index 4f8ee1b82..9ed94b2f8 100755 --- a/eva-accession-release-automation/gather_clustering_counts/bash/count_rs_for_all_files.sh +++ b/eva-accession-release-automation/gather_clustering_counts/bash/count_rs_for_all_files.sh @@ -19,7 +19,8 @@ do OUTPUT=tmp_${SC_NAME}_${ASSEMBLY}_${TYPE}.txt if [[ ${INPUT} == *.vcf.gz ]] then - zcat "${INPUT}" | grep -v '^#' | awk -v annotation="${ASSEMBLY}-${SC_NAME}-${TYPE}" '{print $3" "annotation}' > ${OUTPUT} + # There are sometime multiple rs (separated by ;) in one line that needs to be split across multiple lines + zcat "${INPUT}" | grep -v '^#' | awk -v annotation="${ASSEMBLY}-${SC_NAME}-${TYPE}" '{gsub(";","\n",$3); print $3" "annotation}' > ${OUTPUT} elif [[ ${INPUT} == *_unmapped_ids.txt.gz ]] then SC_NAME=$(basename $(dirname ${INPUT}));