-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3f339ac
commit be46825
Showing
1 changed file
with
263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,263 @@ | ||
#!/usr/bin/env bash | ||
|
||
|
||
############################################################## | ||
# Author Guilhem ROYER [email protected] | 06/04/2018 # | ||
############################################################## | ||
|
||
set -e | ||
set -u | ||
set -o pipefail | ||
|
||
|
||
############# | ||
# Help # | ||
############# | ||
|
||
usage() | ||
{ | ||
cat << EOF | ||
usage: PlaScope.sh [OPTIONS] [ARGUMENTS] | ||
-h, --help display this message | ||
-t number of threads[OPTIONAL] [default : 8] | ||
-i fastq name (assumed to be fastq_name_1.fastq.gz / fastq_name_2.fastq.gz) [MANDATORY] | ||
--fastq_dir path to fastq directory [MANDATORY] | ||
-o output directory [OPTIONAL] [default : current directory] | ||
--db_dir path to centrifuge database [MANDATORY] | ||
--db_name centrifuge database name [MANDATORY] | ||
Wrapper to launch PlaScope (SPAdes + Centrifuge-based plasmidic sequences classification) | ||
EOF | ||
} | ||
|
||
|
||
################################ | ||
# Just a little bit of art # | ||
################################ | ||
|
||
|
||
plascope() | ||
{ | ||
cat << "EOF" | ||
# MNXXXXXXXXXXXXNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWXXXXXXXXXXXXXXXXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM | ||
No'''''''''''''cOWMMWKOOOXWMMMMMMMMMMMMMMWKO0OO0000O0XMMMMNk;''''''''''''',:dXMWX00000000OO0O0NMMMN0O000000OOKNMMWX0000000000OKWMMMN0O000000000000O0NM | ||
N: .:0W0, cNMMMMMMMMMMMMWO:. 'dXM0; ,xKNWKl. .,xNMNx,. .;kNNc .:0WMO' .'dXM | ||
N: .:lllll; ,00' :NMMMMMMMMMMMWd. ;0d ,llllllllokNMMMO' 'dXMMX: oX: .lXk. .lKWMM | ||
N: ,KMWWKl. .c0W0' :NMMMMMMMMMMMN: .okkkkkk; .kd .xMMMMMMMMMMMMMMd. :kkkkkkOXMMMM0' ,xkkkkkd. ;K: .okd; ,kNk. ;kkkkkkkKWMMMM | ||
N: ,KWKl. .c0WMM0' :NMMMMMMMMMMMN: 'OOxxxxx; .kd cOkkkkkkOkOXMMMd. .xMMMMMMMMMMMM0' cWMMMMMX; ;K: ,Ox, ,xNMMk. ;xxxxxxONMMMMM | ||
N: 'ko. .c0WMMMM0' :NMMMMMMMMMMMN: .:. .kO. 'dOXd .xMMMMMMMMMMMM0' cWMMMMMX; ;K: .:. ,xNMMMMk. .OMMMMM | ||
N: .:. .:OWMMMMMM0' :NMMMMMMMMMMMN: 'kOxxxxx; .kWKl. :l. .xMMMMMMMMMMMM0' cWMMMMMX; ;K: ,kd,,xNMMMMMMk. ,xxxxxxONMMMMM | ||
N: ,0O:cOWMMMMMMMM0' ,k0O0000OO0OKN: '0MMMMMMo .kMMWXOOOOOOOOOk, ':. c0OO000000O0X0' ,k0O0OOx' ;K: ,KWNNMMMMMMMMk. :O000000O00OXM | ||
N: ,KMWWMMMMMMMMMM0' :Kc '0MMMMMMo .kMMMMMMMMMMMMMNc 'o' .dK; lK: ,KMMMMMMMMMMMk. cN | ||
N: ,KMMMMMMMMMMMMM0, ;Kc ,0MMMMMWo .k0lcccccccccccc. 'O0c. oWXo. ,xNN: ,KMMMMMMMMMMMk. cN | ||
N: ,KMMMMMMMMMMMMMN0kkkkkkkkkkkkkkk0WKkkk0WMMMMMMXOkkONd .;dNMWKkkkkkkkkkkkkkXMMMXOkkkkkkkkkONMMWKkkk0WMMMMMMMMMMMNOkkkkkkkkkkkkkkkKW | ||
No',,lXMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMO;,,,',,,,,,,,,,lKWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM | ||
MWNWNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWWWWWWWWWWWWWWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM | ||
EOF | ||
} | ||
|
||
|
||
#print on stderr | ||
echoerr() { printf "%s\n" "$*" >&2; } | ||
|
||
|
||
######################## | ||
### Contig sorting ## | ||
######################## | ||
|
||
|
||
contig_sorting() | ||
{ | ||
|
||
local plascopeextendres="$1" | ||
|
||
local contigcov=${CONTIGCOV} | ||
local contiglength=${CONTIGLENGTH} | ||
local hitlength=${HITLENGTH} | ||
|
||
awk -F'\t' -v contigcov=${contigcov} -v contiglength=${contiglength} -v hitlength=${hitlength} ' | ||
BEGIN { | ||
TPLASCOPERES[0]="unclassified" | ||
TPLASCOPERES[1]="unclassified" | ||
TPLASCOPERES[2]="chromosome" | ||
TPLASCOPERES[3]="plasmid" | ||
OFS="\t" | ||
#skip first line | ||
getline | ||
} | ||
{clab=$1; split(clab,T,"_") ; ccov=T[6]; | ||
if ( $7>=contiglength && $6>=hitlength && ccov>contigcov ) print $1,TPLASCOPERES[$3] | ||
else print $1,TPLASCOPERES[0] | ||
}' $plascopeextendres | ||
|
||
} | ||
|
||
|
||
|
||
######################################### | ||
###### Awk fasta extraction ####### | ||
######################################### | ||
|
||
contig_extraction() | ||
{ | ||
|
||
local contigfile="$1" | ||
local contigsortingfile="$2" | ||
local contigfileprefix="$3" | ||
|
||
awk -F'\t' -v contigfileprefix=${contigfileprefix} ' | ||
NR==FNR{Tcontig[">"$1]=$2;next} | ||
/^>/ { | ||
if ($1 in Tcontig) output=contigfileprefix"_"Tcontig[$1]".fasta" | ||
else | ||
{ | ||
print "Warning:", $1, "not classified." > "/dev/stderr" | ||
output="" | ||
} | ||
} | ||
output { print > output }' $contigsortingfile $contigfile | ||
|
||
} | ||
|
||
|
||
#################################### | ||
#### Get argument with getopts ##### | ||
#################################### | ||
|
||
while getopts ":i:o:d:t:-:h" optchar; do | ||
case "${optchar}" in | ||
-) | ||
case "${OPTARG}" in | ||
help) | ||
usage | ||
exit | ||
;; | ||
db_dir) | ||
CENTRI_DIR="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 )) | ||
;; | ||
db_name) | ||
DB_NAME="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 )) | ||
;; | ||
fastq_dir) | ||
FQ_DIR="${!OPTIND}"; OPTIND=$(( $OPTIND + 1 )) | ||
;; | ||
esac;; | ||
h) | ||
usage | ||
exit | ||
;; | ||
i) | ||
INPUT=${OPTARG} | ||
;; | ||
t) | ||
THREADS=${OPTARG} | ||
;; | ||
o) | ||
O_DIR=${OPTARG} | ||
;; | ||
?) | ||
usage | ||
exit | ||
;; | ||
esac | ||
done | ||
|
||
|
||
|
||
|
||
if [[ -z "${INPUT:-}" ]] || [[ -z "${CENTRI_DIR:-}" ]] || [[ -z "${DB_NAME:-}" ]] | ||
then | ||
usage | ||
exit | ||
fi | ||
|
||
# Set default values of optional parameters | ||
if [[ -z "${THREADS:-}" ]] | ||
then | ||
# default value: 8 | ||
THREADS=8 | ||
fi | ||
|
||
if [[ -z "${FQ_DIR:-}" ]] | ||
then | ||
# default value: current directory | ||
FQ_DIR="." | ||
fi | ||
|
||
if [[ -z "${O_DIR:-}" ]] | ||
then | ||
# default value: current directory | ||
O_DIR="." | ||
fi | ||
#Remove trailing slash of paths if exist | ||
FASTQ_DIR=${FQ_DIR%/} | ||
DB_DIR=${CENTRI_DIR%/} | ||
OUTPUT=${O_DIR%/} | ||
|
||
######################## | ||
#### Run functions ##### | ||
######################## | ||
|
||
|
||
if [[ -d "${OUTPUT}/${INPUT}_PlaScope/PlaScope_predictions" ]] | ||
then | ||
echoerr "${OUTPUT}/${INPUT}_PlaScope/PlaScope_predictions already exists" | ||
exit 1 | ||
else | ||
mkdir -p ${OUTPUT}/${INPUT}_PlaScope/PlaScope_predictions | ||
fi | ||
|
||
if [[ -d "${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results" ]] | ||
then | ||
echoerr "${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results already exists" | ||
exit 1 | ||
else | ||
mkdir -p ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results | ||
fi | ||
|
||
|
||
#plascope variables for contig evaluation | ||
readonly CONTIGCOV=2 | ||
readonly CONTIGLENGTH=500 | ||
readonly HITLENGTH=100 | ||
|
||
|
||
plascope | ||
|
||
#Step 1/3 | ||
|
||
echo "Step 1/3: Running assembly with SPAdes 3.10.1" | ||
|
||
spades.py --careful -t ${THREADS} -1 ${FASTQ_DIR}/${INPUT}_1.fastq.gz -2 ${FASTQ_DIR}/${INPUT}_2.fastq.gz -o ${OUTPUT}/${INPUT}_PlaScope/SPAdes | ||
|
||
#Step 2/3 | ||
|
||
echo "Step 2/3: Contigs classification with Centrigue and custom database" | ||
|
||
export CENTRIFUGE_INDEXES=${DB_DIR} | ||
centrifuge -f --threads ${THREADS} -x ${DB_NAME} -U ${OUTPUT}/${INPUT}_PlaScope/SPAdes/contigs.fasta -k 1 --report-file ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results/${INPUT}_summary -S ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results/${INPUT}_extendedresult | ||
|
||
#Step 3/3 | ||
|
||
echo "Step 3/3: Extraction of plasmid, chromosome and unclassified predictions" | ||
|
||
contig_sorting ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results/${INPUT}_extendedresult > ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results/${INPUT}_list | ||
|
||
contig_extraction ${OUTPUT}/${INPUT}_PlaScope/SPAdes/contigs.fasta ${OUTPUT}/${INPUT}_PlaScope/Centrifuge_results/${INPUT}_list ${OUTPUT}/${INPUT}_PlaScope/PlaScope_predictions/${INPUT} | ||
|
||
echo "If you use PlaScope please cite : ..." | ||
|
||
exit 0 |