generated from noaa-nwfsc/NWFSC-Template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding the scripts and snakefile used when testing Trinity on Sedna.
- Loading branch information
1 parent
73be658
commit db06a6b
Showing
5 changed files
with
211 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import glob | ||
import re | ||
|
||
files_R1 = glob.glob('raw/*.R1.fq.gz') | ||
files_R2 = glob.glob('raw/*.R2.fq.gz') | ||
files_R1_str = ','.join(files_R1) | ||
files_R2_str = ','.join(files_R2) | ||
|
||
adapter_file = "/share/nwfsc/ggoetz/202310-terra_gcp-trinity/TruSeq3-PE-2.fa" | ||
trim_string = ("ILLUMINACLIP:" + adapter_file + | ||
":2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50") | ||
|
||
rule trinity: | ||
conda: | ||
"trinity-2.15.1" | ||
output: | ||
"trinity/trinity.all.Trinity.fasta", | ||
"trinity/trinity.all.Trinity.fasta.gene_trans_map" | ||
threads: 20 | ||
log: | ||
"logs/trinity.log" | ||
benchmark: | ||
"benchmarks/trinity.txt" | ||
shell: | ||
""" | ||
Trinity \ | ||
--seqType fq \ | ||
--trimmomatic \ | ||
--quality_trimming_params "{trim_string}" \ | ||
--CPU {threads} \ | ||
--output trinity/trinity.all \ | ||
--max_memory 500G \ | ||
--left {files_R1_str} \ | ||
--right {files_R2_str} \ | ||
&> {log} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --job-name=FASTQC | ||
#SBATCH -c 20 | ||
#SBATCH -t 0 | ||
#SBATCH [email protected] | ||
#SBATCH --mail-type=ALL | ||
|
||
# Fix for using conda with | ||
source ~/.bashrc | ||
|
||
# Load the FastQC environment | ||
mamba activate fastqc-0.12.1 | ||
|
||
# Setup base variables | ||
BASE=/share/nwfsc/ggoetz/202310-terra_gcp-trinity | ||
IN=${BASE}/raw | ||
OUT=${BASE}/fastqc/raw | ||
|
||
# Create output folder if it doesn't exist | ||
if [ ! -d ${OUT} ]; then | ||
mkdir -p ${OUT} | ||
fi | ||
|
||
cd ${BASE} | ||
|
||
# Running FastQC | ||
fastqc \ | ||
-t 20 \ | ||
-o ${OUT} \ | ||
${IN}/*.gz \ | ||
&> ${OUT}/fastqc.log | ||
|
||
# Switch to MultiQC environment | ||
mamba activate multiqc-1.17 | ||
|
||
cd ${OUT} | ||
|
||
# Run MultiQC | ||
multiqc . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --job-name=FASTQCTrimmed | ||
#SBATCH -c 20 | ||
#SBATCH -t 0 | ||
#SBATCH [email protected] | ||
#SBATCH --mail-type=ALL | ||
|
||
# Fix for using conda with | ||
source ~/.bashrc | ||
|
||
# Load the FastQC environment | ||
mamba activate fastqc-0.12.1 | ||
|
||
# Setup base variables | ||
BASE=/share/nwfsc/ggoetz/202310-terra_gcp-trinity | ||
IN=${BASE}/trimmed | ||
OUT=${BASE}/fastqc/trimmed | ||
|
||
# Create output folder if it doesn't exist | ||
if [ ! -d ${OUT} ]; then | ||
mkdir -p ${OUT} | ||
fi | ||
|
||
cd ${BASE} | ||
|
||
# Running FastQC | ||
fastqc \ | ||
-t 20 \ | ||
-o ${OUT} \ | ||
${IN}/*.trimmed.paired.*.fq.gz \ | ||
&> ${OUT}/fastqc.log | ||
|
||
# Switch to MultiQC environment | ||
mamba activate multiqc-1.17 | ||
|
||
cd ${OUT} | ||
|
||
# Run MultiQC | ||
multiqc . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --job-name=TrimmomaticNose | ||
#SBATCH -c 20 | ||
#SBATCH -t 0 | ||
#SBATCH [email protected] | ||
#SBATCH --mail-type=ALL | ||
|
||
# Fix for using conda with | ||
source ~/.bashrc | ||
|
||
# Load Trimmomatic environment | ||
mamba activate trimmomatic-0.39 | ||
|
||
# Setup | ||
BASE=/share/nwfsc/ggoetz/202310-terra_gcp-trinity | ||
IN=${BASE}/raw | ||
OUT=${BASE}/trimmed | ||
|
||
# Trimmomatic Adapter Folder | ||
ADAPTER_FOLDER=/opt/bioinformatics/mambaforge/envs/trimmomatic-0.39/share/trimmomatic/adapters | ||
|
||
# Create output folder if it doesn't exist | ||
if [ ! -d ${OUT} ]; then | ||
mkdir -p ${OUT} | ||
fi | ||
|
||
cd ${BASE} | ||
|
||
# Loop through the samples | ||
SAMPLES=$(ls ${IN}/*R1*.gz | \ | ||
awk -F "/" '{print $NF}' | \ | ||
awk -F "." '{print $1}') | ||
|
||
for SAMPLE in ${SAMPLES} | ||
do | ||
# Run Trimmomatic | ||
trimmomatic \ | ||
PE \ | ||
-threads 20 \ | ||
${IN}/${SAMPLE}.R1.fq.gz \ | ||
${IN}/${SAMPLE}.R2.fq.gz \ | ||
${OUT}/${SAMPLE}.trimmed.paired.R1.fq.gz \ | ||
${OUT}/${SAMPLE}.trimmed.unpaired.R1.fq.gz \ | ||
${OUT}/${SAMPLE}.trimmed.paired.R2.fq.gz \ | ||
${OUT}/${SAMPLE}.trimmed.unpaired.R2.fq.gz \ | ||
ILLUMINACLIP:${ADAPTER_FOLDER}/TruSeq3-PE-2.fa:2:30:10 \ | ||
LEADING:3 \ | ||
TRAILING:3 \ | ||
SLIDINGWINDOW:4:20 \ | ||
MINLEN:50 \ | ||
&> ${OUT}/${SAMPLE}.trim.log | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --job-name=Trinity | ||
#SBATCH -c 24 | ||
#SBATCH -t 0 | ||
#SBATCH [email protected] | ||
#SBATCH --mail-type=ALL | ||
|
||
# Using a himem node since we probably need more than 200 GB | ||
#SBATCH -p himem | ||
|
||
# so we can use conda with the scheduler | ||
source ~/.bashrc | ||
|
||
# load the trinity environment | ||
mamba activate trinity-2.15.1 | ||
|
||
VER=1 | ||
|
||
# Setup | ||
BASE=/share/nwfsc/ggoetz/202310-terra_gcp-trinity | ||
IN=${BASE}/trimmed | ||
OUT=${BASE}/trinity/trinity.all.v${VER} | ||
|
||
if [ ! -d ${OUT} ]; then | ||
mkdir -p ${OUT} | ||
fi | ||
|
||
# Get a list of the files, only using the paired trimmed ones | ||
FILES_R1=$(ls ${IN}/*.paired.R1.*.gz | tr '\n' ',' | sed -e 's/,$//') | ||
FILES_R2=$(ls ${IN}/*.paired.R2.*.gz | tr '\n' ',' | sed -e 's/,$//') | ||
|
||
# Running Trinity, setting a really high memory limit since | ||
# we don't know how much it is going to use. | ||
Trinity \ | ||
--seqType fq \ | ||
--CPU 24 \ | ||
--output ${OUT} \ | ||
--max_memory 1200G \ | ||
--left ${FILES_R1} \ | ||
--right ${FILES_R2} \ | ||
&> ${OUT}/trinity.log |