Skip to content

Commit

Permalink
Added missing reference genome parameters of ngs-bits tools
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-sturm committed Sep 10, 2024
1 parent 5eb4c2a commit 69b4ffb
Show file tree
Hide file tree
Showing 11 changed files with 25 additions and 21 deletions.
2 changes: 1 addition & 1 deletion src/NGS/an_spliceai.php
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ function annotate_spliceai_scores($in, $vcf_filtered, $out)
$splice_env = get_path("splice_env", true);
exec2("cut -f 2,4,5 -d'\t' {$splice_env}/lib/python3.10/site-packages/spliceai/annotations/".strtolower($build).".txt | sed 's/^/chr/' | sed '1d' > {$spliceai_regions}");
$tmp2 = $parser->tempFile("_spliceai_filtered_regions.vcf");
$parser->exec(get_path("ngs-bits")."/VcfFilter", "-reg {$spliceai_regions} -in {$tmp1} -out {$tmp2}", true);
$parser->exec(get_path("ngs-bits")."/VcfFilter", "-reg {$spliceai_regions} -in {$tmp1} -out {$tmp2} -ref ".genome_fasta($build), true);
$var_count = vcf_variant_count($tmp2);
$parser->log("Variants after SpliceAI transcript regions filter: {$var_count}");

Expand Down
11 changes: 6 additions & 5 deletions src/NGS/an_vep.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,14 @@ function annotation_file_path($rel_path, $is_optional=false)
$local_data = get_path("local_data");
$vep_data_path = "{$local_data}/".basename(get_path("vep_data"))."/"; //the data is copied to the local data folder by 'data_setup' to speed up annotations (and prevent hanging annotation jobs)
$data_folder = get_path("data_folder");
$genome = genome_fasta($build);

$args = array();
$args[] = "-i $in --format vcf"; //input
$args[] = "-o $vep_output --vcf --no_stats --force_overwrite"; //output
$args[] = "--species homo_sapiens --assembly {$build}"; //species
$args[] = "--fork {$threads}"; //speed (--buffer_size did not change run time when between 1000 and 20000)
$args[] = "--offline --cache --dir_cache {$vep_data_path}/ --fasta ".genome_fasta($build); //paths to data
$args[] = "--offline --cache --dir_cache {$vep_data_path}/ --fasta {$genome}"; //paths to data
$args[] = "--transcript_version --domains --failed 1"; //annotation options
$args[] = "--regulatory"; //regulatory features
$fields[] = "BIOTYPE";
Expand Down Expand Up @@ -101,14 +102,14 @@ function annotation_file_path($rel_path, $is_optional=false)
//add consequences (Ensembl)
$gff = get_path("data_folder")."/dbs/Ensembl/Homo_sapiens.GRCh38.112.gff3";
$vcf_output_consequence = $parser->tempFile("_consequence.vcf");
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vep_output} -out {$vcf_output_consequence} -threads {$threads} -tag CSQ2 -gff {$gff}", true);
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vep_output} -out {$vcf_output_consequence} -threads {$threads} -tag CSQ2 -gff {$gff} -ref {$genome}", true);

//add consequences (RefSeq)
if($annotate_refseq_consequences)
{
$gff2 = get_path("data_folder")."/dbs/RefSeq/Homo_sapiens.GRCh38.p14.gff3";
$vcf_output_refseq = $parser->tempFile("_refseq.vcf");
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vcf_output_consequence} -out {$vcf_output_refseq} -threads {$threads} -tag CSQ_REFSEQ -gff {$gff2} -source refseq", true);
$parser->exec(get_path("ngs-bits")."/VcfAnnotateConsequence", " -in {$vcf_output_consequence} -out {$vcf_output_refseq} -threads {$threads} -tag CSQ_REFSEQ -gff {$gff2} -source refseq -ref {$genome}", true);
$vcf_output_consequence = $vcf_output_refseq;
}

Expand All @@ -118,7 +119,7 @@ function annotation_file_path($rel_path, $is_optional=false)

//add MaxEntScan annotation
$vcf_output_mes = $parser->tempFile("_mes.vcf");
$parser->exec(get_path("ngs-bits")."/VcfAnnotateMaxEntScan", "-gff {$gff} -in {$vcf_output_phylop} -out {$vcf_output_mes} -ref ".genome_fasta($build)." -swa -threads {$threads} -min_score 0.0 -decimals 1", true);
$parser->exec(get_path("ngs-bits")."/VcfAnnotateMaxEntScan", "-gff {$gff} -in {$vcf_output_phylop} -out {$vcf_output_mes} -ref {$genome} -swa -threads {$threads} -min_score 0.0 -decimals 1", true);

// create config file
$config_file_path = $parser->tempFile(".config");
Expand Down Expand Up @@ -327,7 +328,7 @@ function annotation_file_path($rel_path, $is_optional=false)
//check vcf file
if($check_lines >= 0)
{
$parser->exec(get_path("ngs-bits")."VcfCheck", "-in $out -lines $check_lines -ref ".genome_fasta($build), true);
$parser->exec(get_path("ngs-bits")."VcfCheck", "-in $out -lines $check_lines -ref {$genome}", true);
}

?>
3 changes: 2 additions & 1 deletion src/NGS/db_check_gender.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@
}

//determine gender from BAM
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-in {$in} -method {$method} {$args} -build ".ngsbits_build($build), true);
$genome = genome_fasta($build);
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-in {$in} -method {$method} {$args} -build ".ngsbits_build($build)." -ref {$genome}", true);
$gender2 = explode("\t", $stdout[1])[1];
if (starts_with($gender2, "unknown"))
{
Expand Down
4 changes: 2 additions & 2 deletions src/NGS/merge_gvcf.php
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@
$pipeline[] = array("zcat", $tmp_vcf);

//filter variants according to variant quality>5
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5");
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -ref $genome");

//split complex variants to primitives
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276
Expand All @@ -193,7 +193,7 @@
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");

//remove invalid variants
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid");
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid -ref $genome");

//normalize all variants and align INDELs to the left
$pipeline[] = array(get_path("ngs-bits")."VcfLeftNormalize", "-stream -ref $genome");
Expand Down
4 changes: 2 additions & 2 deletions src/NGS/vc_clair.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
$pipeline[] = array("zcat", "{$clair_vcf}");

//filter variants according to variant quality>5
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid");
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid -ref $genome");

//split complex variants to primitives
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276
Expand Down Expand Up @@ -143,7 +143,7 @@
//create/copy gvcf:
$pipeline = array();
$pipeline[] = array("zcat", $clair_gvcf);
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid");
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-remove_invalid -ref $genome");
$pipeline[] = array("bgzip", "-c > {$out_gvcf}");
$parser->execPipeline($pipeline, "gVCF post processing");

Expand Down
2 changes: 1 addition & 1 deletion src/NGS/vc_clair_trio.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
$pipeline[] = array("zcat", "{$clair_vcf}");

//filter variants according to variant quality>5
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid");
$pipeline[] = array(get_path("ngs-bits")."VcfFilter", "-qual 5 -remove_invalid -ref $genome");

//split complex variants to primitives
//this step has to be performed before vcfbreakmulti - otherwise mulitallelic variants that contain both 'hom' and 'het' genotypes fail - see NA12878 amplicon test chr2:215632236-215632276
Expand Down
2 changes: 1 addition & 1 deletion src/NGS/vc_viral_load.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@

// mismatches
$region = $fields[0] . ":" . ($fields[1]+1) . "-" . $fields[2];
list($stdout) = $parser->exec(get_path("ngs-bits")."VcfFilter", "-in {$viral_vcf} -reg '{$region}'", false);
list($stdout) = $parser->exec(get_path("ngs-bits")."VcfFilter", "-in {$viral_vcf} -reg '{$region}' -ref $genome", false);
$variant_count = 0;
foreach($stdout as $line)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Pipelines/analyze.php
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@
//filter by target region (extended by 200) and quality 5
$target = $parser->tempFile("_roi_extended.bed");
$parser->exec($ngsbits."BedExtend"," -in ".$sys['target_file']." -n 200 -out $target -fai ".$genome.".fai", true);
$pipeline[] = array($ngsbits."VcfFilter", "-reg {$target} -qual 5 -filter_clear");
$pipeline[] = array($ngsbits."VcfFilter", "-reg {$target} -qual 5 -filter_clear -ref $genome");

//split multi-allelic variants
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");
Expand Down Expand Up @@ -480,7 +480,7 @@
$pipeline[] = array("zcat", $dragen_output_vcf);

//filter by target region and quality 5
$pipeline[] = array($ngsbits."VcfFilter", "-reg chrMT:1-16569 -qual 5 -filter_clear");
$pipeline[] = array($ngsbits."VcfFilter", "-reg chrMT:1-16569 -qual 5 -filter_clear -ref $genome");

//split multi-allelic variants
$pipeline[] = array(get_path("vcflib")."vcfbreakmulti", "");
Expand Down
6 changes: 3 additions & 3 deletions src/Pipelines/somatic_dna.php
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ function($str) { return explode("\t", $str); },
}
else
{
$out = $parser->exec(get_path("ngs-bits") . "/SampleGender", "-in $t_bam -build ".ngsbits_build($sys['build'])." -method sry", true);
$out = $parser->exec(get_path("ngs-bits") . "/SampleGender", "-in $t_bam -build ".ngsbits_build($sys['build'])." -method sry -ref {$ref_genome}", true);
list(,,$cov_sry) = explode("\t", $out[0][1]);

if(is_numeric($cov_sry) && (float)$cov_sry >= 30)
Expand Down Expand Up @@ -739,7 +739,7 @@ function($str) { return explode("\t", $str); },
{
$ngsbits = get_path("ngs-bits");
$pipeline = [
["{$ngsbits}BedAnnotateGC", "-in ".$sys['target_file']." -clear -ref ".genome_fasta($sys['build'])],
["{$ngsbits}BedAnnotateGC", "-in ".$sys['target_file']." -clear -ref {$ref_genome}"],
["{$ngsbits}BedAnnotateGenes", "-out {$target_bed}"],
];
$parser->execPipeline($pipeline, "creating annotated BED file for ClinCNV");
Expand All @@ -755,7 +755,7 @@ function($str) { return explode("\t", $str); },
{
$pipeline = [
[get_path("ngs-bits")."BedChunk", "-in ".$sys['target_file']." -n {$bin_size}"],
[get_path("ngs-bits")."BedAnnotateGC", "-clear -ref ".genome_fasta($sys['build'])],
[get_path("ngs-bits")."BedAnnotateGC", "-clear -ref {$ref_genome}"],
[get_path("ngs-bits")."BedAnnotateGenes", "-out {$target_bed}"]
];
$parser->execPipeline($pipeline, "creating annotated BED file for ClinCNV");
Expand Down
5 changes: 3 additions & 2 deletions src/Pipelines/trio.php
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)

//prepare multi-sample paramters
$sys = load_system($system, $sample_c); //required in case the the system is unset
$genome = genome_fasta($sys['build']);
$args_multisample = [
"-bams $c $f $m",
"-status affected control control",
Expand Down Expand Up @@ -286,7 +287,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
print "Medelian errors: ".number_format(100.0*$vars_mendelian_error/$vars_high_depth, 2)."% (of {$vars_high_depth} high-depth, autosomal variants)\n";

//determine gender of child
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($sys['build']), true);
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($sys['build'])." -ref {$genome}", true);
$gender_data = explode("\t", $stdout[1])[1];
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
print "Gender of child (from data): {$gender_data}\n";
Expand Down Expand Up @@ -336,7 +337,7 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
if ($is_wgs_shallow)
{
//determine gender of child
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method xy -in $c -build ".ngsbits_build($sys['build']), true);
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method xy -in $c -build ".ngsbits_build($sys['build'])." -ref {$genome}", true);
$gender_data = explode("\t", $stdout[1])[1];
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
print "Gender of child (from data): {$gender_data}\n";
Expand Down
3 changes: 2 additions & 1 deletion src/Pipelines/trio_longread.php
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ function fix_gsvar_file($gsvar, $sample_c, $sample_f, $sample_m, $gender_data)
print "Medelian errors: ".number_format(100.0*$vars_mendelian_error/$vars_high_depth, 2)."% (of {$vars_high_depth} high-depth, autosomal variants)\n";

//determine gender of child
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($build), true);
$genome = genome_fasta($build);
list($stdout, $stderr) = $parser->exec(get_path("ngs-bits")."SampleGender", "-method hetx -in $c -build ".ngsbits_build($build)." -ref {$genome}", true);
$gender_data = explode("\t", $stdout[1])[1];
if ($gender_data!="male" && $gender_data!="female") $gender_data = "n/a";
print "Gender of child (from data): {$gender_data}\n";
Expand Down

0 comments on commit 69b4ffb

Please sign in to comment.