-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Shixiang Wang (王诗翔)
committed
Apr 22, 2024
1 parent
1f1dbd2
commit cdfb1c7
Showing
5 changed files
with
107 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
mkdir -p /data3/wsx/share/gcap_debug | ||
cd /data3/wsx/share | ||
|
||
export PATH=$HOME/soft/sratoolkit/bin:$PATH | ||
|
||
for i in ERR5242993 ERR5243012 | ||
do | ||
echo handling $i | ||
parallel-fastq-dump -t 20 -O gcap_debug/ --split-3 --gzip -s $i | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/bin/bash | ||
source activate circlemap | ||
cd /data3/wsx/share/gcap_debug | ||
|
||
cores=24 | ||
|
||
mkdir bam | ||
sn=$(ls *.fastq.gz | tr ' ' '\n' | sed 's/_[12].fastq.gz//' | sort | uniq) | ||
#sn=ERR5243012 | ||
INDEX=/data1/database/human/hg38/bwa_index/hg38_p7 | ||
|
||
for id in ${sn}; do | ||
|
||
fastp -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ${id}_1.fq.gz -O ${id}_2.fq.gz -h ${id}.html -j ${id}.json --thread 16 --dont_overwrite | ||
|
||
if [ ! -f bam/${id}.bam ] | ||
then | ||
if [ ! -f bam/${id}.sam ] | ||
then | ||
fq1=${id}_1.fq.gz | ||
fq2=${id}_2.fq.gz | ||
echo "Start aligning for ${id}" | ||
bwa mem -M -t $cores -R "@RG\tID:${id}\tSM:${id}\tLB:WXS\tPL:Illumina" ${INDEX} ${fq1} ${fq2} \ | ||
> bam/${id}.sam 2>bam/${id}_bwa.log | ||
else | ||
echo "BWA align for ${id} is done before, directly go to sam > bam step" | ||
fi | ||
|
||
if [ $? -eq 0 ] | ||
then | ||
if [ ! -f bam/${id}.bam ] | ||
then | ||
samtools sort -@ $cores bam/${id}.sam -o bam/${id}.bam 2>bam/${id}_bam.log | ||
samtools index bam/${id}.bam | ||
if [ $? -eq 0 ] | ||
then | ||
echo "Removing sam files" | ||
rm bam/${id}.sam | ||
else | ||
echo "Failed when using samtools sort, please check" | ||
exit 1 | ||
fi | ||
fi | ||
echo "Done for ${id}." `date` | ||
else | ||
echo "Failed for ${id} in bwa." `date` | ||
fi | ||
|
||
fi | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Set up conda env | ||
# mamba create -n cancerit -c bioconda cancerit-allelecount | ||
|
||
cd /data3/wsx/share/gcap_reference | ||
wget -c https://zenodo.org/records/6524005/files/1000G_loci_hg38.tar.gz | ||
wget -c https://zenodo.org/records/6524005/files/GC_correction_hg38.txt.gz | ||
wget -c https://zenodo.org/records/6524005/files/RT_correction_hg38.txt.gz | ||
|
||
tar zxvf 1000G_loci_hg38.tar.gz | ||
gunzip GC_correction_hg38.txt.gz | ||
gunzip RT_correction_hg38.txt.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# remotes::install_github("ShixiangWang/ascat@v3-for-gcap-v1", subdir = "ASCAT") | ||
# remotes::install_github("ShixiangWang/gcap") | ||
# install.packages("https://cran.r-project.org/src/contrib/Archive/xgboost/xgboost_1.5.2.1.tar.gz", repos = NULL) | ||
|
||
library(gcap) | ||
|
||
# id为PRJEB42904,wes_395LC是tumor,id:ERR5242993,wes_395N是normal,id:ERR5243012 | ||
|
||
# hg38 ---------------- | ||
gcap.workflow( | ||
tumourseqfile = "~/share/gcap_debug/bam/ERR5242993.bam", | ||
normalseqfile = "~/share/gcap_debug/bam/ERR5243012.bam", | ||
tumourname = "wes_395LC", | ||
normalname = "wes_395N", | ||
jobname = "wes_395", | ||
outdir = "~/share/gcap_debug/gcap_result", | ||
allelecounter_exe = "~/miniconda3/envs/cancerit/bin/alleleCounter", | ||
g1000allelesprefix = file.path( | ||
"~/share/gcap_reference/1000G_loci_hg38/", | ||
"1kg.phase3.v5a_GRCh38nounref_allele_index_chr" | ||
), | ||
g1000lociprefix = file.path("~/share/gcap_reference/1000G_loci_hg38/", | ||
"1kg.phase3.v5a_GRCh38nounref_loci_chrstring_chr" | ||
), | ||
GCcontentfile = "~/share/gcap_reference/GC_correction_hg38.txt", | ||
replictimingfile = "~/share/gcap_reference/RT_correction_hg38.txt", | ||
skip_finished_ASCAT = TRUE, | ||
skip_ascat_call = FALSE, | ||
result_file_prefix = "wes_395", | ||
genome_build = "hg38", | ||
model = "XGB11" | ||
) | ||
|