forked from karel-brinda/NanoSim-H
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.sh
executable file
·64 lines (46 loc) · 2.3 KB
/
example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash
#####
# This script shows how the simulated reads are generated in the paper.
# All training datasets are downloaded from ENA and processed with poretools.
# Only 2D pass reads are extracted and stored on bcgsc ftp server.
#####
set -x
##### Inside NanoSim, create a working directory
mkdir ecoli_simulation
cd ecoli_simulation
# 1. E. coli R7 dataset
# Origin: ftp://climb.genomics.cn/pub/10.5524/100001_101000/100102/Ecoli_R7_CombinedFasta.tgz
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R7_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# Profiling stage, make sure to set the mode of read_analysis.py to -r-x or above
nanosimh_train -i ecoli_R7_2D.fasta -r ecoli_K12_MG1655_ref.fa -p ecoli
# Simulation stage, suppose the genome to be simulated is called test.fasta and make sure to provide the correct path to it
nanosimh_simulate --circular -r test.fasta -p ecoli # Note the -c option has to be the same as -o in read_analysis.py, or both use default parameter
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R7_profile.zip
# 2. E. coli R7.3 dataset
# Origin: http://www.ebi.ac.uk/ena/data/view/ERX708228, ERX708229, ERX708230, ERX708231
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R73_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_R73_profile.zip
# 3. E. coli UCSC phase1b dataset
# Origin: http://www.ebi.ac.uk/ena/data/view/ERP010368
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_UCSC_phase1b_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_K12_MG1655_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/ecoli_UCSC1b_profile.zip
# 4. S. cerevisiae dataset
# Origin: http://labshare.cshl.edu/shares/schatzlab/www-data/nanocorr/2015.07.07/W303_ONT_Raw_reads.fa.gz
# Get the 2D reads
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_2D.fasta
# Get the reference genome
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_S288C_ref.fa
# To get the profile directly:
wget ftp://ftp.bcgsc.ca/supplementary/NanoSim/yeast_profile.zip