diff --git a/.circleci/config.yml b/.circleci/config.yml index 6de20c2..b4aaa09 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,6 +10,16 @@ version: 2.1 jobs: + license_chk: + machine: + # need machine as want to mount a volume + image: ubuntu-2004:202107-02 + steps: + - checkout + - run: + name: Execute skywalking-eyes check of licenses + command: | + docker run --rm -v $(pwd):/github/workspace apache/skywalking-eyes header check build: environment: IMAGE_NAME: quay.io/wtsicgp/cgppindel @@ -55,7 +65,13 @@ workflows: version: 2.1 build_test: jobs: + - license_chk: + filters: + tags: + only: /.+/ - build: + requires: + - license_chk context: - dockerhub-casmservice - quayio-casmservice diff --git a/.github/ISSUE_TEMPLATE/issue-bug-question.md b/.github/ISSUE_TEMPLATE/issue-bug-question.md index 91a1173..2e62079 100644 --- a/.github/ISSUE_TEMPLATE/issue-bug-question.md +++ b/.github/ISSUE_TEMPLATE/issue-bug-question.md @@ -1,16 +1,17 @@ ---- +______________________________________________________________________ + name: Issue/Bug/Question about: Includes checks title: '' labels: '' assignees: '' ---- +______________________________________________________________________ Please ensure that you have check the following before raising a new issue: -* [README](https://github.com/cancerit/cgpPindel/blob/dev/README.md) -* [Wiki](https://github.com/cancerit/cgpPindel/wiki) -* Existing issues (closed too) before raising a new item +- [README](https://github.com/cancerit/cgpPindel/blob/dev/README.md) +- [Wiki](https://github.com/cancerit/cgpPindel/wiki) +- Existing issues (closed too) before raising a new item Please provide as much information as possible. diff --git a/.licenserc.yaml b/.licenserc.yaml new file mode 100644 index 0000000..12af0b6 --- /dev/null +++ b/.licenserc.yaml @@ -0,0 +1,93 @@ +header: + license: + spdx-id: AGPL-3.0-or-later + copyright-owner: Genome Research Ltd + content: | + Copyright (c) 2014-2021 Genome Research Ltd + + Author: CASM/Cancer IT + + This file is part of cgpPindel. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + 1. The usage of a range of years within a copyright statement contained within + this distribution should be interpreted as being equivalent to a list of years + including the first and last year specified and all consecutive years between + them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- + 2009, 2011-2012’ should be interpreted as being identical to a statement that + reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright + statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being + identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, + 2009, 2010, 2011, 2012’. + + pattern: | + Copyright \(c\) [-0-9]+ .+ + + Author: .+ + + This file is part of .+ + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or \(at your option\) any later version. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + 1. The usage of a range of years within a copyright statement contained within + this distribution should be interpreted as being equivalent to a list of years + including the first and last year specified and all consecutive years between + them. For example, a copyright statement that reads ‘Copyright \(c\) 2005, 2007- + 2009, 2011-2012’ should be interpreted as being identical to a statement that + reads ‘Copyright \(c\) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright + statement that reads ‘Copyright \(c\) 2005-2012’ should be interpreted as being + identical to a statement that reads ‘Copyright \(c\) 2005, 2006, 2007, 2008, + 2009, 2010, 2011, 2012’. + + paths: + - '**' + + paths-ignore: + - '.circleci' + - '.coveragerc' + - '.dockerignore' + - '.gitignore' + - '.pre-commit-config.yaml' + - 'CHANGES.md' + - 'Dockerfile' + - 'LICENSE' + - 'pyproject.toml' + - 'README.md' + - '**/*.yaml' + - 'tests/data/**/*' + - '**/*.egg-info/PKG-INFO' + - '.pytest_cache/' + - 'build/' + - 'MANIFEST.in' + - 'tests/htmlcov/' + - '.eggs/' + - 'c++/*.cpp' # not ours, distributed via agreement + - 'perl/t/' + - 'perl/rules/' + - 'perl/pm_to_blib' + - 'perl/blib/' + - 'perl/MANIFEST*' + - 'perl/MYMETA*' + - '.github/' + - 'INSTALL' + - 'Makefile' + + comment: on-failure diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3f4f01d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-added-large-files + args: ['--maxkb=3000'] + - id: check-ast + - id: check-executables-have-shebangs + - id: check-shebang-scripts-are-executable + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: detect-aws-credentials + args: [--allow-missing-credentials] + - id: detect-private-key + - id: end-of-file-fixer + - id: name-tests-test + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/executablebooks/mdformat + rev: 0.7.6 + hooks: + - id: mdformat +- repo: https://github.com/hadolint/hadolint + rev: v2.4.1 + hooks: + - id: hadolint-docker diff --git a/CHANGES.md b/CHANGES.md index 3e9c762..bd42643 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,103 +1,109 @@ # CHANGES +## 3.6.0 +- Addition of `FF019` and `FF020` flags +- New flag rule set `pulldownFfpeRulesFragment.lst` including FF019 and FF020 made + +## 3.5.0 + +- Update to core pindel algorithm to allow complex DI events to have longer inserted sequence than deleted + - Masking real events + ## 3.4.1 -* Updated Dockerfile to use pcap-core 5.4.0 - htslib/samtools 1.11 +- Updated Dockerfile to use pcap-core 5.4.0 - htslib/samtools 1.11 ## 3.4.0 -* Updated Dockerfile to use pcap-core 5.2.2 -* Modified setup script to use build/*.sh +- Updated Dockerfile to use pcap-core 5.2.2 +- Modified setup script to use build/\*.sh ## 3.3.0 -* I/O hardening, see [milestone 3](https://github.com/cancerit/cgpPindel/milestone/3) +- I/O hardening, see [milestone 3](https://github.com/cancerit/cgpPindel/milestone/3) ## 3.2.2 -* Handle Input files that may have no reads at all, specifically an issue when generating a normal panel. +- Handle Input files that may have no reads at all, specifically an issue when generating a normal panel. ## 3.2.1 -* Added Dockerfile and docker documentation +- Added Dockerfile and docker documentation ## 3.2.0 -* Tabix search for high depth/excluded regions now performed in memory using IntervalTrees - * Reduces runtime of input step by ~50% - * Improved disk access profile - * Zero impact on results +- Tabix search for high depth/excluded regions now performed in memory using IntervalTrees + - Reduces runtime of input step by ~50% + - Improved disk access profile + - Zero impact on results ## 3.1.2 -* 3.0.5 introduced species parsing bug causing single word species names to be invalid. +- 3.0.5 introduced species parsing bug causing single word species names to be invalid. ## 3.1.1 -* Fix regression - ability to cope with chromosomes with no events. +- Fix regression - ability to cope with chromosomes with no events. ## 3.1.0 -* Incorporates updated pindel which improves sensitivity -* Internally interpret QCFAIL to determine if whole pair fails +- Incorporates updated pindel which improves sensitivity +- Internally interpret QCFAIL to determine if whole pair fails ## 3.0.6 -* Fixed version tag +- Fixed version tag ## 3.0.5 -* Handles species names with spaces in it -* modified checks for species,assembly and checksum +- Handles species names with spaces in it +- modified checks for species,assembly and checksum ## 3.0.4 -* Output bug for pindel BAM/CRAM corrected. When more than 1 chr in output files had no reads. +- Output bug for pindel BAM/CRAM corrected. When more than 1 chr in output files had no reads. ## 3.0.3 -* Changes to how germline filter determined resulted in dummy germline bed file not being generated as previously. -* This release reinstates the old behaviour. +- Changes to how germline filter determined resulted in dummy germline bed file not being generated as previously. +- This release reinstates the old behaviour. ## 3.0.2 -* Correct example rule files for *Fragment.lst files to use FFnnn filter types +- Correct example rule files for \*Fragment.lst files to use FFnnn filter types ## 3.0.1 -* Update tabix calls to directly use query_full (solves GRCh38 contig name issues). +- Update tabix calls to directly use query_full (solves GRCh38 contig name issues). ## 3.0.0 -* Germline bed file is now merged for adjacent regions (#31) -* More compressed intermediate files (#55) -* Change to `Const::Fast` where appropriate (#41) -* Removed TG VG from genotype. - * Readgroups are always variable, often 1 in data from last few years - * Not used by our filters. -* Supports BAM/CRAM inputs -* Output will be aligned with inputs - * bam vs cram - * bai vs csi -* Although ground work for csi input/output has been done `Bio::DB::HTS` doesn't support csi indexed input yet. - * Created our own fork at [`cancerit/Bio::DB::HTS`][cancerit-biodbhts] so that this could be enabled. - * You will need to install this manually or use one of our images for this functionallity. - * [dockstore-cgpwxs][ds-cgpwxs-git] - * [dockstore-cgpwxs][ds-cgpwgs-git] +- Germline bed file is now merged for adjacent regions (#31) +- More compressed intermediate files (#55) +- Change to `Const::Fast` where appropriate (#41) +- Removed TG VG from genotype. + - Readgroups are always variable, often 1 in data from last few years + - Not used by our filters. +- Supports BAM/CRAM inputs +- Output will be aligned with inputs + - bam vs cram + - bai vs csi +- Although ground work for csi input/output has been done `Bio::DB::HTS` doesn't support csi indexed input yet. + - Created our own fork at [`cancerit/Bio::DB::HTS`][cancerit-biodbhts] so that this could be enabled. + - You will need to install this manually or use one of our images for this functionallity. + - [dockstore-cgpwxs][ds-cgpwxs-git] + - [dockstore-cgpwxs][ds-cgpwgs-git] -[cancerit-biodbhts]: https://github.com/cancerit/Bio-DB-HTS/releases/tag/v2.10-rc1 -[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs -[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs ## 2.2.5 -* Update tabix->query to tabix->query_full +- Update tabix->query to tabix->query_full ## 2.2.4 -* Force sorting of FILTER field to make records easier to diff. -* Fix sorting of final VCF to handle events with same start better when using comparison tools +- Force sorting of FILTER field to make records easier to diff. +- Fix sorting of final VCF to handle events with same start better when using comparison tools ## 2.2.3 @@ -175,3 +181,8 @@ Found 15321 SNPs common to both files. Found 0 SNPs only in main file. Found 0 SNPs only in second file. After +``` + +[cancerit-biodbhts]: https://github.com/cancerit/Bio-DB-HTS/releases/tag/v2.10-rc1 +[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs +[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs diff --git a/Dockerfile b/Dockerfile index 341fba9..2dee6db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,35 +1,37 @@ FROM quay.io/wtsicgp/pcap-core:5.6.1 as builder +# hadolint ignore=DL3002 USER root # ALL tool versions used by opt-build.sh -ENV VER_CGPVCF="v2.2.1" -ENV VER_VCFTOOLS="0.1.16" +# need to keep in sync with setup.sh +ENV VER_CGPVCF="v2.2.1"\ + VER_VCFTOOLS="0.1.16" -RUN apt-get -yq update -RUN apt-get install -yq --no-install-recommends locales -RUN apt-get install -yq --no-install-recommends g++ -RUN apt-get install -yq --no-install-recommends make -RUN apt-get install -yq --no-install-recommends gcc -RUN apt-get install -yq --no-install-recommends pkg-config -RUN apt-get install -yq --no-install-recommends zlib1g-dev - -RUN locale-gen en_US.UTF-8 -RUN update-locale LANG=en_US.UTF-8 +# hadolint ignore=DL3008 +RUN apt-get -yq update \ +&& apt-get install -yq --no-install-recommends locales g++ make gcc pkg-config zlib1g-dev \ +&& locale-gen en_US.UTF-8 \ +&& update-locale LANG=en_US.UTF-8 ENV OPT /opt/wtsi-cgp -ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH -ENV PERL5LIB $OPT/lib/perl5 -ENV LD_LIBRARY_PATH $OPT/lib -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 +ENV PATH=$OPT/bin:$OPT/biobambam2/bin:$PATH \ + PERL5LIB=$OPT/lib/perl5 \ + LD_LIBRARY_PATH=$OPT/lib \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 + +WORKDIR /tmp/build # build tools from other repos -ADD build/opt-build.sh build/ +COPY build/opt-build.sh build/ RUN bash build/opt-build.sh $OPT +COPY build/opt-build-local.sh build/ +COPY c++ c++ +COPY perl perl + # build the tools in this repo, separate to reduce build time on errors -COPY . . RUN bash build/opt-build-local.sh $OPT FROM ubuntu:20.04 @@ -38,8 +40,9 @@ LABEL maintainer="cgphelp@sanger.ac.uk" \ uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" \ description="cgpPindel docker" -RUN apt-get -yq update -RUN apt-get install -yq --no-install-recommends \ +# hadolint ignore=DL3008 +RUN apt-get -yq update \ +&& apt-get install -yq --no-install-recommends \ apt-transport-https \ locales \ curl \ @@ -59,19 +62,18 @@ google-perftools \ unattended-upgrades && \ unattended-upgrade -d -v && \ apt-get remove -yq unattended-upgrades && \ -apt-get autoremove -yq - -RUN locale-gen en_US.UTF-8 -RUN update-locale LANG=en_US.UTF-8 +apt-get autoremove -yq \ +&& rm -rf /var/lib/apt/lists/* \ +&& locale-gen en_US.UTF-8 \ +&& update-locale LANG=en_US.UTF-8 ENV OPT /opt/wtsi-cgp -ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH -ENV PERL5LIB $OPT/lib/perl5 -ENV LD_LIBRARY_PATH $OPT/lib -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 +ENV PATH=$OPT/bin:$OPT/biobambam2/bin:$PATH \ + PERL5LIB=$OPT/lib/perl5 \ + LD_LIBRARY_PATH=$OPT/lib \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 -RUN mkdir -p $OPT COPY --from=builder $OPT $OPT ## USER CONFIGURATION diff --git a/LICENCE b/LICENSE similarity index 99% rename from LICENCE rename to LICENSE index dba13ed..636867c 100644 --- a/LICENCE +++ b/LICENSE @@ -1,7 +1,7 @@ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (c) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -630,7 +630,7 @@ state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - Copyright (C) + Copyright (c) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/README.md b/README.md index 129d63c..1fb4456 100644 --- a/README.md +++ b/README.md @@ -2,41 +2,46 @@ cgpPindel contains the Cancer Genome Projects workflow for [Pindel][pindel-core]. -[![cancerit](https://circleci.com/gh/cancerit/cgpPindel.svg?style=svg)](https://circleci.com/gh/cancerit/cgpPindel) +| Master | Develop | +| --------------------------------------------- | ----------------------------------------------- | +| [![Master Badge][circle-master]][circle-base] | [![Develop Badge][circle-develop]][circle-base] | + +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) The is a lightly modified version of pindel v2.0 with CGP specific processing for: -* Input file generation -* Conversion from pindel text output to: - * tumour and normal BAM alignment files - * VCF - * Application of VCF filters. +- Input file generation +- Conversion from pindel text output to: + - tumour and normal BAM alignment files + - VCF + - Application of VCF filters. + +Details of execution and referencing can be found in the [wiki][cgppindel-wiki] Contents: -* [Docker, Singularity and Dockstore](#docker-singularity-and-dockstore) -* [Dependencies/Install](#dependenciesinstall) -* [Creating a release](#creating-a-release) - * [Preparation](#preparation) - * [Release process](#release-process) - * [Code changes](#code-changes) - * [Testing](#testing) - * [Regression CI](#regression-ci) - * [Public CI](#public-ci) - * [Cutting the release](#cutting-the-release) -* [LICENCE](#licence) +- [Docker, Singularity and Dockstore](#docker-singularity-and-dockstore) +- [Dependencies/Install](#dependenciesinstall) +- [Developers](#developers) + - [Updating licence headers](#updating-licence-headers) + - [Code changes](#code-changes) + - [Testing](#testing) + - [Regression CI](#regression-ci) + - [Public CI](#public-ci) + - [Cutting the release](#cutting-the-release) +- [LICENCE](#licence) ## Docker, Singularity and Dockstore There are pre-built images containing this codebase on quay.io. When pulling an image you must specify the version there is no `latest`. -* [cgpPindel quay.io][quay-repo]: Contained within this repository - * Smallest build required to use cgpPindel - * Not linked to Dockstore (yet) - * Updated most frequently -* [dockstore-cgpwxs][ds-cgpwxs-git]: Contains tools specific to WXS analysis. -* [dockstore-cgpwgs][ds-cgpwgs-git]: Contains additional tools for WGS analysis. +- [cgpPindel quay.io][quay-repo]: Contained within this repository + - Smallest build required to use cgpPindel + - Not linked to Dockstore (yet) + - Updated most frequently +- [dockstore-cgpwxs][ds-cgpwxs-git]: Contains tools specific to WXS analysis. +- [dockstore-cgpwgs][ds-cgpwgs-git]: Contains additional tools for WGS analysis. These were primarily designed for use with dockstore.org but can be used as normal containers. @@ -46,8 +51,8 @@ The docker images are known to work correctly after import into a singularity im When doing a native install please install the following first: -* [PCAP-core v2.0+][pcap-core-rel] -* [cgpVcf v2.0+][cgpvcf-rel] +- [PCAP-core v2.0+][pcap-core-rel] +- [cgpVcf v2.0+][cgpvcf-rel] Please see these for any child dependencies. @@ -63,52 +68,84 @@ are installed into the target area. Please be aware that this expects basic C compilation libraries and tools to be available. -## Creating a release +## Developers -### Preparation +Please use `pre-commit` on this project. You can install to `$HOME/bin` via: -* Commit/push all relevant changes. -* Pull a clean version of the repo and use this for the following steps. +```bash +curl https://pre-commit.com/install-local.py | python - +``` -### Release process +In you checkout please run: -This project is maintained using the [HubFlow][hubflow-docs] methodology. +```bash +pre-commit install +``` + +### Updating licence headers + +Please use [skywalking-eyes](https://github.com/apache/skywalking-eyes). + +Expected workflow: + +```bash +# recent build, change to apache/skywalking-eyes:0.2.0 once released +export DOCKER_IMG=ghcr.io/apache/skywalking-eyes/license-eye +``` + +1. Check state before modifying `.licenserc.yaml`: + - `docker run -it --rm -v $(pwd):/github/workspace $DOCKER_IMG header check` + - You should get some 'valid' here, those without a header as 'invalid' +1. Modify `.licenserc.yaml` +1. Apply the changes: + - `docker run -it --rm -v $(pwd):/github/workspace $DOCKER_IMG header fix` +1. Add/commit changes + +This is executed in the CI pipeline. + +*DO NOT* edit the header in the files, please modify the date component of `content` in `.licenserc.yaml`. The only exception being: + +- `README.md` + +If you need to make more extensive changes to the license carefully test the pattern is functional. + +### Code changes -#### Code changes +This project is maintained using the [HubFlow][hubflow-docs] methodology. 1. Make appropriate changes -2. Update `perl/lib/Sanger/CGP/Pindel.pm` to the correct version (adding rc/beta to end if applicable). -3. Update `CHANGES.md` to show major items. -4. Commit the updated docs and updated module/version. -5. Push commits. +1. Update `perl/lib/Sanger/CGP/Pindel.pm` to the correct version (adding rc/beta to end if applicable). +1. Update `CHANGES.md` to show major items. +1. Commit the updated docs and updated module/version. +1. Push commits. -#### Testing +### Testing -##### Regression CI +#### Regression CI An internal CI system is used to validate each release using real, large scale datasets. -##### Public CI +#### Public CI Circleci is used to: -* Build Docker image (unit tests are part of build) -* Validate expected tools exist -* For tags only: push image to quay.io +- Build Docker image (unit tests are part of build) +- Validate expected tools exist +- For tags only: push image to quay.io CI only runs for: -* Branches with pull-requests -* Default branch (`dev`) -* Tags +- Branches with pull-requests +- Default branch (`dev`) +- Tags #### Cutting the release Internal regression CI processes must be completed prior to this. 1. Check state on [Circleci][circle-repo] -2. Generate the release (add notes to GitHub) -3. Confirm that image has been built on [quay.io][quay-builds] +1. Generate the release (add notes to GitHub) +1. Confirm that image has been pushed to [quay.io][quay-tags] ## LICENCE @@ -144,19 +181,21 @@ identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, ``` -[cgpvcf-rel]: https://github.com/cancerit/cgpVcf/releases -[pcap-core-rel]: https://github.com/cancerit/PCAP-core/releases -[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs -[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs -[pindel-core]: http://gmt.genome.wustl.edu/pindel/current -[hubflow-docs]: https://datasift.github.io/gitflow/ - -[circle-repo]: https://app.circleci.com/pipelines/github/cancerit/cgpPindel -[circle-badge-svg]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=svg - -[circle-badge-link]: https://travis-ci.org/cancerit/cgpPindel.svg?branch=dev + + +[cgppindel-wiki]: https://github.com/cancerit/cgpPindel/wiki +[cgpvcf-rel]: https://github.com/cancerit/cgpVcf/releases +[circle-base]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield +[circle-develop]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield&branch=dev%3B +[circle-master]: https://circleci.com/gh/cancerit/cgpPindel.svg?style=shield&branch=master%3B +[circle-repo]: https://app.circleci.com/pipelines/github/cancerit/cgpPindel +[ds-cgpwgs-git]: https://github.com/cancerit/dockstore-cgpwgs +[ds-cgpwxs-git]: https://github.com/cancerit/dockstore-cgpwxs +[hubflow-docs]: https://datasift.github.io/gitflow/ +[pcap-core-rel]: https://github.com/cancerit/PCAP-core/releases +[pindel-core]: http://gmt.genome.wustl.edu/pindel/current [quay-repo]: https://quay.io/repository/wtsicgp/cgppindel -[quay-builds]: https://quay.io/repository/wtsicgp/cgppindel?tab=builds +[quay-tags]: https://quay.io/repository/wtsicgp/cgppindel?tab=tags diff --git a/c++/filter_pindel_reads.cpp b/c++/filter_pindel_reads.cpp index 690eba1..6d12aaa 100644 --- a/c++/filter_pindel_reads.cpp +++ b/c++/filter_pindel_reads.cpp @@ -937,4 +937,3 @@ void CleanUniquePoints (vector & Input_UP) { Input_UP.clear(); Input_UP = TempUP; } - diff --git a/perl/Makefile.PL b/perl/Makefile.PL index 11411d0..aa083b4 100755 --- a/perl/Makefile.PL +++ b/perl/Makefile.PL @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use ExtUtils::MakeMaker; diff --git a/perl/bin/FlagVcf.pl b/perl/bin/FlagVcf.pl index e07c5fb..f4d3b2f 100755 --- a/perl/bin/FlagVcf.pl +++ b/perl/bin/FlagVcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# my $config_path; BEGIN { diff --git a/perl/bin/pindel.pl b/perl/bin/pindel.pl index 2a3bdc8..fe5f617 100755 --- a/perl/bin/pindel.pl +++ b/perl/bin/pindel.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_2_combined_vcf.pl b/perl/bin/pindel_2_combined_vcf.pl index fb64c6b..0e6a625 100755 --- a/perl/bin/pindel_2_combined_vcf.pl +++ b/perl/bin/pindel_2_combined_vcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; diff --git a/perl/bin/pindel_germ_bed.pl b/perl/bin/pindel_germ_bed.pl index 814f77b..eb4f771 100755 --- a/perl/bin/pindel_germ_bed.pl +++ b/perl/bin/pindel_germ_bed.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_input_gen.pl b/perl/bin/pindel_input_gen.pl index 3fc82ca..d1c8781 100755 --- a/perl/bin/pindel_input_gen.pl +++ b/perl/bin/pindel_input_gen.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_merge_vcf_bam.pl b/perl/bin/pindel_merge_vcf_bam.pl index 2d755b0..e69ef40 100755 --- a/perl/bin/pindel_merge_vcf_bam.pl +++ b/perl/bin/pindel_merge_vcf_bam.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { use Cwd qw(abs_path); diff --git a/perl/bin/pindel_np_from_vcf.pl b/perl/bin/pindel_np_from_vcf.pl index 832258a..c777467 100755 --- a/perl/bin/pindel_np_from_vcf.pl +++ b/perl/bin/pindel_np_from_vcf.pl @@ -1,25 +1,33 @@ #!/usr/bin/perl - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# BEGIN { diff --git a/perl/bin/pindel_np_remsample.pl b/perl/bin/pindel_np_remsample.pl index feb39b6..f517944 100755 --- a/perl/bin/pindel_np_remsample.pl +++ b/perl/bin/pindel_np_remsample.pl @@ -1,4 +1,33 @@ #!/usr/bin/perl +# Copyright (c) 2014-2021 Genome Research Ltd +# +# Author: CASM/Cancer IT +# +# This file is part of cgpPindel. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; use warnings FATAL=>'all'; diff --git a/perl/bin/prep_np_release.pl b/perl/bin/prep_np_release.pl index 8342c6c..8ee44b9 100755 --- a/perl/bin/prep_np_release.pl +++ b/perl/bin/prep_np_release.pl @@ -1,4 +1,33 @@ #!/usr/bin/perl +# Copyright (c) 2014-2021 Genome Research Ltd +# +# Author: CASM/Cancer IT +# +# This file is part of cgpPindel. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# use strict; use warnings FATAL=>'all'; diff --git a/perl/lib/Sanger/CGP/Pindel.pm b/perl/lib/Sanger/CGP/Pindel.pm index 2ccc297..af28e50 100644 --- a/perl/lib/Sanger/CGP/Pindel.pm +++ b/perl/lib/Sanger/CGP/Pindel.pm @@ -1,32 +1,39 @@ -package Sanger::CGP::Pindel; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel; use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '3.5.0'; +our $VERSION = '3.6.0'; our @EXPORT = qw($VERSION); 1; diff --git a/perl/lib/Sanger/CGP/Pindel/Implement.pm b/perl/lib/Sanger/CGP/Pindel/Implement.pm index d765ee5..cccd3c0 100644 --- a/perl/lib/Sanger/CGP/Pindel/Implement.pm +++ b/perl/lib/Sanger/CGP/Pindel/Implement.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::Implement; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::Implement; use strict; use warnings FATAL => 'all'; diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen.pm b/perl/lib/Sanger/CGP/Pindel/InputGen.pm index 1cb7b87..191b8f2 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm index 25746b8..cff68ca 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/Pair.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::Pair; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::Pair; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm index 06699e4..7c8a36d 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/PairToPindel.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::PairToPindel; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::PairToPindel; use strict; use English qw( -no_match_vars ); @@ -135,4 +142,3 @@ sub _self_anchored_to_pindel { 1; __DATA__ - diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm index 89cc621..58e3d73 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/Read.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::Read; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::Read; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm b/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm index 3048d8d..bfae6b7 100644 --- a/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm +++ b/perl/lib/Sanger/CGP/Pindel/InputGen/SamHeader.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::InputGen::SamHeader; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::InputGen::SamHeader; use strict; use English qw( -no_match_vars ); diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm index f20abdc..8584de3 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/BamUtil.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::BamUtil; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::BamUtil; use Sanger::CGP::Pindel; use Sanger::CGP::Pindel::Implement; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm index e140a67..a72f4cd 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecord.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::CombinedRecord; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::CombinedRecord; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm index 79dbb1f..55aa479 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/CombinedRecordGenerator.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::CombinedRecordGenerator; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::CombinedRecordGenerator; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm index 5000ed1..c085646 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecord.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::PindelRecord; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::PindelRecord; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm index f3e9298..49c5dd3 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/PindelRecordParser.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::PindelRecordParser; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::PindelRecordParser; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm b/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm index cba97a4..4b545bc 100644 --- a/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm +++ b/perl/lib/Sanger/CGP/Pindel/OutputGen/VcfConverter.pm @@ -1,26 +1,33 @@ -package Sanger::CGP::Pindel::OutputGen::VcfConverter; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::Pindel::OutputGen::VcfConverter; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm index 64044d0..7a641ab 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/AbstractExe.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::AbstractExe; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::AbstractExe; use FindBin; use Sanger::CGP::Pindel; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm index 13dbed9..42bd264 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FilterRules.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::FilterRules; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::FilterRules; use strict; use Bio::DB::HTS::Tabix; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm index 5b2fdec..f2ee1ae 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/FragmentFilterRules.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::FragmentFilterRules; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::FragmentFilterRules; use strict; use Bio::DB::HTS::Tabix; @@ -87,6 +95,14 @@ my %RULE_DESCS = ('FF001' => { 'tag' =>'INFO/LEN', 'name' => 'FF018', 'desc' => 'Sufficient Depth: Pass if depth > 10', 'test' => \&flag_018}, + 'FF019' => { 'tag' => 'INFO/LEN', + 'name' => 'FF019', + 'desc' => 'Fail when tumour supporting fragments < 3 or tumour fraction of supporting fragments < 0.05', + 'test' => \&flag_019}, + 'FF020' => { 'tag' => 'INFO/LEN', + 'name' => 'FF020', + 'desc' => 'Allow some contamination in matched normal due to FFPR block acquired samples and allow for low level sequencing/PCR artefacts', + 'test' => \&flag_020}, ); our $previous_format_hash; @@ -450,4 +466,58 @@ sub flag_018 { return $FAIL; } +sub flag_019 { + my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; + use_prev($$RECORD[8]); + + my @tum_geno = split(':',$$RECORD[10]); + + if($tum_geno[$previous_format_hash->{'FC'}] < 3){ + return $FAIL; + } + # previous test confirms FC/FD can't be 0, so no div0 check required + if ($tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] < 0.05){ + return $FAIL; + } + + return $PASS; +} + +sub flag_020 { + my ($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF) = @_; + use_prev($$RECORD[8]); + + my @nor_geno = split(':',$$RECORD[9]); + my @tum_geno = split(':',$$RECORD[10]); + + my $nor_fd = $nor_geno[$previous_format_hash->{'FD'}]; + + if($nor_fd < 200 && + $nor_geno[$previous_format_hash->{'FC'}] <= 1 && + $nor_geno[$previous_format_hash->{'FD'}] >= 10 && + $nor_geno[$previous_format_hash->{'FC'}] <= ($tum_geno[$previous_format_hash->{'FC'}] * 0.1) + ){ + return $PASS; + } + + my $tumfc_over_tumfd = $tum_geno[$previous_format_hash->{'FD'}] > 0 ? $tum_geno[$previous_format_hash->{'FC'}] / $tum_geno[$previous_format_hash->{'FD'}] : undef; + my $norfc_over_norfd = $nor_geno[$previous_format_hash->{'FD'}] > 0 ? $nor_geno[$previous_format_hash->{'FC'}] / $nor_geno[$previous_format_hash->{'FD'}] : undef; + + if($nor_fd < 200){ + if(($nor_geno[$previous_format_hash->{'FC'}] == 1 || $nor_geno[$previous_format_hash->{'FC'}] == 2) && + $norfc_over_norfd <= 0.05 && + $tumfc_over_tumfd >= 0.2 + ){ + return $PASS; + } + + }else{ + if($norfc_over_norfd <= 0.02 && $tumfc_over_tumfd >= 0.1){ + return $PASS; + } + } + + return $FAIL; +} + 1; diff --git a/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm b/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm index a5a111e..a6d47ab 100644 --- a/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm +++ b/perl/lib/Sanger/CGP/PindelPostProcessing/VcfSoftFlagger.pm @@ -1,25 +1,33 @@ -package Sanger::CGP::PindelPostProcessing::VcfSoftFlagger; - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. +# +package Sanger::CGP::PindelPostProcessing::VcfSoftFlagger; use strict; use Carp; diff --git a/perl/rules/pulldownFfpeRulesFragment.lst b/perl/rules/pulldownFfpeRulesFragment.lst new file mode 100644 index 0000000..cf50e63 --- /dev/null +++ b/perl/rules/pulldownFfpeRulesFragment.lst @@ -0,0 +1,10 @@ +Sanger::CGP::PindelPostProcessing::FragmentFilterRules +FF001 +FF002 +FF003 +FF004 +FF005 +FF006 +FF007 +FF019 +FF020 diff --git a/perl/t/inputGen.t b/perl/t/inputGen.t index 0c6123f..a540e9e 100644 --- a/perl/t/inputGen.t +++ b/perl/t/inputGen.t @@ -76,4 +76,3 @@ subtest 'reads_to_disk checks' => sub{ }; done_testing(); - diff --git a/perl/t/inputGenRead.t b/perl/t/inputGenRead.t index 357c7ac..62309e8 100644 --- a/perl/t/inputGenRead.t +++ b/perl/t/inputGenRead.t @@ -45,4 +45,3 @@ $obj = new_ok($MODULE, [\$cleaned, 2]); is($obj->frac_pbq_poor, $EXP_FRAC_PBQ, 'Check poor qual PBQ fraction'); done_testing(); - diff --git a/perl/t/vcfPindelFragmentFlagger.t b/perl/t/vcfPindelFragmentFlagger.t new file mode 100644 index 0000000..72c4dcc --- /dev/null +++ b/perl/t/vcfPindelFragmentFlagger.t @@ -0,0 +1,735 @@ +#################################################### +# Copyright (c) 2014-2021 Genome Research Ltd. +# Author: CASM/Cancer IT, cgphelp@sanger.ac.uk +# See LICENCE for details +#################################################### + +use strict; +use warnings; +use Cwd 'abs_path'; +use Test::More; +use Data::Dumper; +use Const::Fast qw(const); + +const my @AVAILABLE_RULES => qw(FF001 FF002 FF003 FF004 FF005 FF006 FF007 FF008 FF009 FF010 FF012 FF015 FF016 FF017 FF018 FF019 FF020); + +my %rule_test_dispatch = ('FF001' => \&_test_FF001, + 'FF002' => \&_test_FF002, + 'FF003' => \&_test_FF003, + 'FF004' => \&_test_FF004, + 'FF005' => \&_test_FF005, + 'FF006' => \&_test_FF006, + 'FF007' => \&_test_FF007, + 'FF008' => \&_test_FF008, + 'FF009' => \&_test_FF009, + 'FF010' => \&_test_FF010, + 'FF012' => \&_test_FF012, + 'FF015' => \&_test_FF015, + 'FF016' => \&_test_FF016, + 'FF017' => \&_test_FF017, + 'FF018' => \&_test_FF018, + 'FF019' => \&_test_FF019, + 'FF020' => \&_test_FF020, + ); + +use_ok('Sanger::CGP::PindelPostProcessing::VcfSoftFlagger'); +use_ok('Sanger::CGP::PindelPostProcessing::FragmentFilterRules'); + +my @rules_found = Sanger::CGP::PindelPostProcessing::FragmentFilterRules::available_rules(); +is_deeply(\@rules_found, \@AVAILABLE_RULES, 'Expected set of rules are implemented'); +for my $flag(@AVAILABLE_RULES) { + $rule_test_dispatch{$flag}(Sanger::CGP::PindelPostProcessing::FragmentFilterRules->rule($flag)); +} + +done_testing(); + +sub _test_FF001{ + my ($filter_hash) = @_; + subtest "Test rule FF001" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 5 5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 6 4'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 4 6'; + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF001 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF001 pWt == pMt"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF001 pWt > pMt"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF001 pWt < pMt"); + }; +} + + +sub _test_FF002{ + my ($filter_hash) = @_; + subtest "Test rule FF002" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF002 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 4 pWt == 1"); + $RECORD = [split("\t",$test2)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 4 pWt == 0"); + + $RECORD = [split("\t",$test3)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF002 length == 5 pWt == 1"); + $RECORD = [split("\t",$test4)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF002 length == 5 pWt == 0"); + } +} + +sub _test_FF003{ + my ($filter_hash) = @_; + subtest "Test rule FF003" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:3'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 4:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:4'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 3:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:2'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:1'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 1:2'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 2:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:2'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PU:NU 0:0 0:0'; + + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF003 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 3 pMtNeg == 0"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 0 pMtNeg == 3"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 4 pMtNeg == 0"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 0 pMtNeg == 4"); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 3 pMtNeg == 4"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF003 pMtPos == 2 pMtNeg == 2"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 2 pMtNeg == 1"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 1 pMtNeg == 2"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 2 pMtNeg == 0"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 0 pMtNeg == 2"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF003 pMtPos == 0 pMtNeg == 0"); + }; +} + + +sub _test_FF004{ + my ($filter_hash) = @_; + subtest "Test rule FF004" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 9:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 10:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:0:0'; + + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:2:3'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:2:2'; + + + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:4:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:8:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:7:0'; + + + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 100:0:0:4'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:0:8'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:100:0:7'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF004 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF004 "); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF004 "); + }; +} + +sub _test_FF005{ + my ($filter_hash) = @_; + subtest "Test rule FF005" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:0:1:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:199:1:0'; + + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:4'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:4:4'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 199:1:4:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 1:199:4:4'; + + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:1:1'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:3:4'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:4'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:4:5'; + + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:1:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:7:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:8:0'; + + my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 200:0:0:1'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:0:7'; + my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR:PU:NU 0:0:0:0 0:200:0:8'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF005 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth < 200"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth < 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 with different pos/neg ratios"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depth > 200 pMtPos == 1 pMtNeg == 1"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 3"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 4"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depth > 200 pMtPos == 4 pMtNeg == 5"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthNeg > 200 pMtPos == 1"); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depthPos > 200 pMtPos == 7"); + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthPos > 200 pMtPos == 8"); + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthPos > 200 pMtNeg == 1"); + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF005 depthNeg > 200 pMtNeg == 7"); + $RECORD = [split("\t",$test16)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF005 depthNeg > 200 pMtNeg == 8"); + }; +} + +sub _test_FF006{ + my ($filter_hash) = @_; + subtest "Test rule FF006" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=9 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=10 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=9 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=10 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=5;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=4;SM=138;S1=10;S2=203.791;REP=11 PP:NP:PB:NB:PD:ND:PR:NR:PU:NU 0:0:0:0:0:0:0:0:0:0 0:0:0:0:0:0:0:0:0:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF006 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 4 rep == 9"); + $RECORD = [split("\t",$test2)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF006 length == 4 rep == 10"); + $RECORD = [split("\t",$test3)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 4 rep == 0"); + $RECORD = [split("\t",$test7)]; + $MATCH = 4; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF006 length == 4 rep == 11"); + $RECORD = [split("\t",$test4)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 9"); + $RECORD = [split("\t",$test5)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 10"); + $RECORD = [split("\t",$test6)]; + $MATCH = 5; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF006 length == 5 rep == 0"); + }; +} + +sub _test_FF007{ + my ($filter_hash) = @_; + subtest "Test rule FF007" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 0 5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 1 5'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 0 6'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 1 6'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 7 100'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 8 100'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FD 9 100'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF007 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt < 6 and rdWt > 8pc"); + + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt == 6 and rdWt < 8pc"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 6 and rdWt > 8pc"); + + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF007 rdMt == 100 and rdWt == 7"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 100 and rdWt == 8"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF007 rdMt == 100 and rdWt > 8"); + }; +} + +sub _test_FF008{ + my ($filter_hash) = @_; + subtest "Test rule FF008" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 6 100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 5 100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 4 100'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 0 100'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/REP',"_test_FF008 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF008 pWt > pMt5%"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt == pMt5%"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt < pMt5% "); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF008 pWt == 0 "); + }; +} + +sub _test_FF009{ + #fail; #is coding filter +} + +sub _test_FF010{ + #fail; #normal panel filter +} + +sub _test_FF011{ + #fail; #microsatelite filter +} + +sub _test_FF012{ + my ($filter_hash) = @_; + subtest "Test rule FF0012" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:9:0 0:0:9:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:0:9 0:0:0:9'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:10:0 0:0:10:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:0:10 0:0:0:10'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:9:0 0:0:10:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 0:0:10:0 0:0:9:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 2:10:10:0 2:10:10:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 1:10:10:0 1:10:10:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 3:10:10:0 3:10:10:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 2:10:10:0 1:10:10:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD:PD:ND 1:10:10:0 2:10:10:0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $MATCH = 1; + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_F012 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 9"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 9 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 dWt == 10 dMt == 10"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 dWt == 10 dMt == 10 with different pos/neg ratios"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 9 dMt == 10"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 dWt == 10 dMt == 9"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 WtFC == 2 WtFD == 10 MtFC == 2 MtFD == 10"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 1 WtFD == 10 MtFC == 1 MtFD == 10"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_F012 WtFC == 3 WtFD == 10 MtFC == 3 MtFD == 10"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 2 WtFD == 10 MtFC == 1 MtFD == 10"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_F012 WtFC == 1 WtFD == 10 MtFC == 2 MtFD == 10"); + } +} + +sub _test_FF015{ + my ($filter_hash) = @_; + subtest "Test rule FF015" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 0 0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC 1 0'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF015 check the correct info tag has been set for the rule"); + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF0015 no wild type at all"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF0015 pWtFC == 1"); + }; +} + +sub _test_FF016{ + my ($filter_hash) = @_; + subtest "Test rule FF016" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 2:3:1:0'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 5:0:1:0'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:5:1:0'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 6:0:1:0'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 4:0:0:0'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:4:0:0'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 5:1:1:0'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 0:5:0:0'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 4:1:0:0'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 0:5:0:0'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 4:1:0:0'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=2 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + + my $test14 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=0 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + my $test15 = '22 16404839 . G GA . . PC=I;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=1 PP:NP:PB:NB 0:0:0:0 1:4:0:0'; + + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/REP',"_test_FF016 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 with different pos/neg ratios"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 6"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 4"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 4 with different pos/neg ratios"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0"); + + $MATCH = 1; + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 1"); + $MATCH = 1; + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 1 pMtPos > 1 pMtPos == 0"); + $MATCH = 1; + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 1 pMtPos == 0 pMtNeg > 1"); + + $MATCH = 2; + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in only one direction"); + $MATCH = 2; + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in both directions"); + $MATCH = 2; + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 2: pindel calls in both direction"); + + $MATCH = 0; + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF016 pMt == 5 bMt > 0 rep == 0: insertion pindel calls in both directions"); + $MATCH = 1; + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF016 pMt == 5 bMt > 0 rep == 1: insertion pindel calls in both directions"); + }; +} + +sub _test_FF017{ + #fail; #simple repeat filter +} + +sub _test_FF018{ + my ($filter_hash) = @_; + subtest "Test rule FF018" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:5'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 6:4 5:5'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 6:4'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:4 5:5'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:4'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:6 5:5'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 PR:NR 5:5 5:6'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF018 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10 with different pos/neg ratios"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 10 with different pos/neg ratios"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF014 pWt == 10 pMt == 9"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF014 pWt == 9 pMt == 10"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 11 pMt == 10"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF014 pWt == 10 pMt == 11"); + }; +} + +sub _test_FF019{ + my ($filter_hash) = @_; + subtest "Test rule FF019" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 5:20'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:60'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:59'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 1:20'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 3:100'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:59 3:60'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 5 pMtFD == 20 pMtFC/pMtFD > 0.05"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 60 pMtFC/pMtFD == 0.05"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 59 pMtFC/pMtFD > 0.05"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF019 pWtFC == 0 pMtFC == 1 pMtFD == 20 pMtFC/pMtFD == 0.05"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF019 pWtFC == 0 pMtFC == 3 pMtFD == 61 pMtFC/pMtFC < 0.05"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF019 pWtFC / PWtFD < 0.05 pMtFC / pMtFD == 0.05"); + }; +} + +sub _test_FF020{ + my ($filter_hash) = @_; + subtest "Test rule FF020" => sub { + my $test1 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 9:100'; + my $test2 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:200 9:100'; + my $test3 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:200 11:100'; + my $test4 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:10 20:99'; + my $test5 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:10 20:101'; + my $test6 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 20:111'; + my $test7 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:9 20:91'; + my $test8 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 20:100'; + my $test9 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 19:100'; + my $test10 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:41 20:100'; + my $test11 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:41 20:99'; + my $test12 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:40 20:99'; + my $test13 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:39 20:99'; + my $test14 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 1:11 9:111'; + my $test15 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 2:100 20:101'; + my $test16 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:10 10:100'; + my $test17 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 0:0 0:0'; + my $test18 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 4:201 1:100'; + my $test19 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 3:201 10:100'; + my $test20 = '22 16404839 . GA G . . PC=D;RS=16404838;RE=16404857;LEN=1;SM=138;S1=10;S2=203.791;REP=18 FC:FD 5:201 9:100'; + + my $sub = $filter_hash->{test}; + + my($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF); + $CHROM = [split("\t",$test1)]->[0]; + $POS = [split("\t",$test1)]->[1]; + $FAIL = 1; + $PASS = 0; + #$VCF = $$opts{vcf}; + + is($filter_hash->{tag}, 'INFO/LEN',"_test_FF019 check the correct info tag has been set for the rule"); + + $RECORD = [split("\t",$test1)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.2"); + $RECORD = [split("\t",$test2)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD < 0.02 tumFC / tumFD < 0.2"); + $RECORD = [split("\t",$test3)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD == 200 WtFC / WtFD > 0.02 tumFC / tumFD > 0.2"); + $RECORD = [split("\t",$test4)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 10 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test5)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 0 WtFD == 10 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test6)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 11 WtFC < MtFC * 0.1);"); + $RECORD = [split("\t",$test7)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 9 WtFC / WtFD > 0.05);"); + $RECORD = [split("\t",$test8)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD == 0.2);"); + $RECORD = [split("\t",$test9)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD < 0.2);"); + $RECORD = [split("\t",$test10)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD == 0.2);"); + $RECORD = [split("\t",$test11)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD < 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test12)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD == 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test13)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 2 WtFC / WtFD > 0.05 MtFC / MtFD > 0.2);"); + $RECORD = [split("\t",$test14)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 11 WtFC > MtFC * 0.1 MtFC / MtFD < 0.2);"); + $RECORD = [split("\t",$test15)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 1 WtFD == 100 WtFC == MtFC * 0.1 MtFC / MtFD < 0.2);"); + $RECORD = [split("\t",$test16)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD < 200 WtFC == 3"); + $RECORD = [split("\t",$test17)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 divid by 0"); + $RECORD = [split("\t",$test18)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD == 0.02 tumFC / tumFD < 0.1"); + $RECORD = [split("\t",$test19)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $PASS,"_test_FF020 WtFD > 200 WtFC / WtFD < 0.02 tumFC / tumFD == 0.1"); + $RECORD = [split("\t",$test20)]; + is($sub->($MATCH,$CHROM,$POS,$FAIL,$PASS,$RECORD,$VCF), $FAIL,"_test_FF020 WtFD > 200 WtFC / WtFD > 0.02 tumFC / tumFD < 0.1"); + }; +} diff --git a/prerelease.sh b/prerelease.sh deleted file mode 100755 index fb6967a..0000000 --- a/prerelease.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. -# -# Author: CASM/Cancer IT -# -# This file is part of cgpPindel. -# -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## - -set -eu # exit on first error or undefined value in subtitution - -# get current directory -INIT_DIR=`pwd` - -rm -rf blib - -# get location of this file -MY_PATH="`dirname \"$0\"`" # relative -MY_PATH="`( cd \"$MY_PATH\" && pwd )`" # absolutized and normalized -if [ -z "$MY_PATH" ] ; then - # error; for some reason, the path is not accessible - # to the script (e.g. permissions re-evaled after suid) - echo Failed to determine location of script >2 - exit 1 # fail -fi -# change into the location of the script -cd $MY_PATH/perl - -echo '### Running perl tests ###' -rm -rf reports docs pm_to_blib blib -cover -delete -export HARNESS_PERL_SWITCHES=-MDevel::Cover=-db,reports,-select='^lib/*\.pm$',-ignore,'^t/' -rm -rf docs -mkdir -p docs/reports_text -prove -w -I lib t - -echo '### Generating test/pod coverage reports ###' -# removed 'condition' from coverage as '||' 'or' doesn't work properly -cover -coverage branch,subroutine,pod -report_c0 50 -report_c1 85 -report_c2 100 -report html_basic reports -silent |& grep -v '^Perltidy' | grep -v '^##' | grep -v '^1:' -# grep on last command to cleanup an oddity in perltidy -cover -coverage branch,subroutine,pod -report text reports -silent > docs/reports_text/coverage.txt -rm -rf reports/structure perl.reports/digests reports/cover.13 reports/runs -cp reports/coverage.html reports/index.html -mv reports docs/reports_html -unset HARNESS_PERL_SWITCHES - -echo '### Generating POD ###' -mkdir -p docs/pod_html -perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' lib:bin docs/pod_html > /dev/null - -echo '### Archiving docs folder ###' -tar cz -C $MY_PATH/perl -f docs.tar.gz docs - -# generate manifest, and cleanup -echo '### Generating MANIFEST ###' -# delete incase any files are moved, the make target just adds stuff -rm -f MANIFEST -# cleanup things which could break the manifest -rm -rf install_tmp -perl Makefile.PL > /dev/null -make manifest >& /dev/null -rm -f Makefile MANIFEST.bak pm_to_blib - -# change back to original dir -cd $INIT_DIR diff --git a/setup.sh b/setup.sh index 48c34d9..f744386 100755 --- a/setup.sh +++ b/setup.sh @@ -1,25 +1,33 @@ #!/bin/bash - -########## LICENCE ########## -# Copyright (c) 2014-2021 Genome Research Ltd. +# +# Copyright (c) 2014-2021 Genome Research Ltd # # Author: CASM/Cancer IT # # This file is part of cgpPindel. # -# cgpPindel is free software: you can redistribute it and/or modify it under -# the terms of the GNU Affero General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your option) any -# later version. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more -# details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -########## LICENCE ########## +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’. # ALL tool versions used by opt-build.sh # need to keep in sync with Dockerfile