Skip to content

Commit

Permalink
added config and docs for Google Cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
kyleoconnell committed Jun 28, 2024
1 parent 0abc4b7 commit 46ee92b
Show file tree
Hide file tree
Showing 24 changed files with 1,862 additions and 0 deletions.
Binary file added GoogleCloud/.DS_Store
Binary file not shown.
425 changes: 425 additions & 0 deletions GoogleCloud/amethyst/Snakefile

Large diffs are not rendered by default.

File renamed without changes.
32 changes: 32 additions & 0 deletions GoogleCloud/mega-non-model-wgs-snakeflow/config/chromosomes.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
chrom num_bases
NC_051269.1 44063945
NC_051270.1 41642849
NC_051271.1 41172918
NW_023618430.1 101937
NC_051272.1 39310580
NC_051273.1 38607003
NC_051274.1 36325703
NC_051275.1 36146424
NC_051276.1 36006835
NC_051277.1 35235349
NC_051278.1 34964250
NC_051279.1 34912176
NC_051280.1 34196205
NC_051281.1 33154565
NW_023618431.1 250492
NW_023618432.1 178401
NC_051282.1 32625488
NW_023618433.1 278450
NW_023618434.1 173117
NC_051283.1 32625529
NC_051284.1 32153643
NC_051285.1 30648620
NC_051286.1 29458520
NC_051287.1 28767290
NC_051288.1 27725676
NW_023618435.1 104377
NC_051289.1 27312080
NC_051290.1 24788685
NW_023618436.1 113387
NC_051291.1 23426545
NC_051292.1 16640398
127 changes: 127 additions & 0 deletions GoogleCloud/mega-non-model-wgs-snakeflow/config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
units: config/units.tsv
chromosomes: config/chromosomes.tsv
scaffold_groups: config/scaffold_groups.tsv
scatter_intervals_file: config/scatters_5000000.tsv

# this file is only needed when treating different samples
# as different species, for indel realignment, etc.
indel_grps: config/igrps-species.tsv


rclone_data: False

# This will typically be left at 0. Unless you want to do
# BQSR.
bqsr_rounds: 0

# to specify some downsampling levels to do you can put them in the
# depths list here.
downsample_bams:
depths: []
bqsr_round: 0

# leave thse as one of your maf_cutoffs so that we can do BQSR still
# as before, though we probably won't.
bqsr_maf: 0.01

# these following ones are irrelevant if you are not doing BQSR, but they
# have to be in the config still
bqsr_qual: 37
bqsr_qd: 15


# the following must be a list, even if it is just one element
maf_cutoffs: [0.01]



# where to send the results
rclone_base: "gdrive-rclone:Bioinformatic-Project-Archives/rockfish-genomics/rockfish-lanes-1-and-2"


# this is the default value for the first GenomicsDBImport run.
# If you want to change it, you should typically change it
# on the command line.
genomics_db_import_num: 0


# eric modified this to be able to easily handle genomes of non-model
# organisms that are not yet on enseml, etc.
ref:
# name of the species. (will simply be put in the SnpEff config).
# Put underscores for spaces
species_name: Sebastes_umbrosus
# name you want to use for the genome version
genome_version_name: fSebUmb1.pri
# the URL where this can be downloaded, if public and easy to
# get with wget. If not, then you should hand-download
# genome.fasta and genome.gff or genome.gtf
genome_url: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/015/220/745/GCF_015220745.1_fSebUmb1.pri/GCF_015220745.1_fSebUmb1.pri_genomic.fna.gz
# if there is a GFF or GTF file, they have to have a .gff or
# .gtf extension. This where you put the URL for it
gff_or_gtf_url: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/015/220/745/GCF_015220745.1_fSebUmb1.pri/GCF_015220745.1_fSebUmb1.pri_genomic.gtf.gz


filtering:
# Set to true in order to apply machine learning based recalibration of
# quality scores instead of hard filtering.
vqsr: false
hard:
# hard filtering as outlined in GATK docs
# (https://gatkforums.broadinstitute.org/gatk/discussion/2806/howto-apply-hard-filters-to-a-call-set)
snvs:
"QD < 2.0 || FS > 60.0 || MQ < 40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0"
indels:
"QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0"

processing:
remove-duplicates: true
# Uncomment and point to a bed file with, e.g., captured regions if necessary,
# see https://gatkforums.broadinstitute.org/gatk/discussion/4133/when-should-i-use-l-to-pass-in-a-list-of-intervals.
restrict-regions: chr24_26_28.bed
# If regions are restricted, uncomment this to enlarge them by the given value in order to include
# flanking areas.
# region-padding: 100

params:
gatk:
# the default for haplotype caller is to require very high base quality scores
# because BQSR on non-model organisms doesn't work for crap, and Nina's group
# found it better to just require high base quality scores.
HaplotypeCaller: " --min-base-quality-score 33 --minimum-mapping-quality 20 "
BaseRecalibrator: ""
GenotypeGVCFs: ""
VariantRecalibrator: ""
picard:
MarkDuplicates: " --TAGGING_POLICY All --CREATE_INDEX "
fastp:
pe:
trimmer:
# See fastp manual for adding additional options, e.g. for adapter trimming.
- " --adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
- " --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
- " --detect_adapter_for_pe --cut_right --cut_right_window_size 4 --cut_right_mean_quality 20 "
# single-end is currently broken
trimmomatic:
pe:
trimmer:
# See trimmomatic manual for adding additional options, e.g. for adapter trimming.
- "ILLUMINACLIP:resources/adapters/TruSeq3-PE-2.fa:2:30:10"
- "LEADING:3"
- "TRAILING:3"
- "SLIDINGWINDOW:4:20"
- "MINLEN:36"
se:
trimmer:
# See trimmomatic manual for adding additional options, e.g. for adapter trimming.
- "LEADING:3"
- "TRAILING:3"
- "SLIDINGWINDOW:4:20"
- "MINLEN:36"
vep:
plugins:
# Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
- LoFtool
# extra command line arguments (e.g. --sift, see docs)
extra: ""
44 changes: 44 additions & 0 deletions GoogleCloud/mega-non-model-wgs-snakeflow/config/igrps-species.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
sample sample_id indel_grp
111_31 111_31 rosenblatti
111_33 111_33 rosenblatti
111_34 111_34 rosenblatti
111_35 111_35 rosenblatti
111_55 111_55 rosenblatti
111_56 111_56 rosenblatti
111_57 111_57 rosenblatti
18_37 18_37 rosenblatti
18_38 18_38 rosenblatti
221_49 221_49 rosenblatti
221_50 221_50 rosenblatti
238_55 238_55 rosenblatti
240_12 240_12 rosenblatti
240_16 240_16 rosenblatti
240_17 240_17 rosenblatti
336_15 336_15 rosenblatti
336_16 336_16 rosenblatti
336_31 336_31 rosenblatti
336_36 336_36 rosenblatti
336_38 336_38 rosenblatti
336_55 336_55 rosenblatti
336_56 336_56 rosenblatti
336_70 336_70 rosenblatti
51_71 51_71 rosenblatti
51_72 51_72 rosenblatti
51_73 51_73 rosenblatti
51_74 51_74 rosenblatti
51_75 51_75 rosenblatti
51_76 51_76 rosenblatti
8_35 8_35 rosenblatti
8_41 8_41 rosenblatti
8_43 8_43 rosenblatti
8_44 8_44 rosenblatti
8_45 8_45 rosenblatti
8_47 8_47 rosenblatti
8_49 8_49 rosenblatti
8_74 8_74 rosenblatti
8_75 8_75 rosenblatti
8_76 8_76 rosenblatti
8_77 8_77 rosenblatti
8_78 8_78 rosenblatti
8_80 8_80 rosenblatti
8_82 8_82 rosenblatti
43 changes: 43 additions & 0 deletions GoogleCloud/mega-non-model-wgs-snakeflow/config/meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
NMFS_DNA_ID,BOX_ID,BOX_POSITION,SAMPLE_ID,BATCH_ID,PROJECT_NAME,GENUS,SPECIES,Cluster_Morph,LENGTH,WEIGHT,SEX,AGE,REPORTED_LIFE_STAGE,PHENOTYPE,HATCHERY_MARK,TAG_NUMBER,COLLECTION_DATE,ESTIMATED_DATE,PICKER,PICK_DATE,LEFTOVER_SAMPLE,SAMPLE_COMMENTS,SPECIES_CODE,COMMON_NAME,LANDFALL_PORT,CRUISE,HAUL,SITE,STATE_M,COUNTY_M,LATITUDE_M,LONGITUDE_M,LOCATION_COMMENTS_M
,,,8_41,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_43,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_44,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_45,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_47,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_49,x,,Sebastes,eos,,,,,,,,,,,1996,,,,From J. Hyde,pink rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,8_74,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,8_76,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,8_77,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,8_78,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,8_80,x,,Sebastes,rosenblatti,,,,,,,,,,,1996,,,,From J. Hyde,greenblotched rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,8_82,x,,Sebastes,chlorostictus,,,,,,,,,,,1996,,,,From J. Hyde,greenspotted rockfish,,,,,Guadalupe Island,MX,,29.15917,-118.27,
,,,111_31,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,60 Mile Bank,CA,,32.10517,-118.237,
,,,111_33,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,60 Mile Bank,CA,,32.10517,-118.237,
,,,111_34,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,La Jolla,CA,,32.87333,-117.312,
,,,111_35,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,La Jolla,CA,,32.87333,-117.312,
,,,111_55,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,San Nicholas Island,CA,,32.87333,-117.312,
,,,111_56,x,,Sebastes,rosenblatti,,,,,,,,,,,1994,,,,From J. Hyde,greenblotched rockfish,,,,,San Nicholas Island,CA,,33.20083,-119.512,
,,,111_57,x,,Sebastes,chlorostictus,,,,,,,,,,,1994,,,,From J. Hyde,greenspotted rockfish,,,,,San Nicholas Island,CA,,33.20083,-119.512,
,,,51_71,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,51_72,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,51_73,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,51_74,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,51_75,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,51_76,x,,Sebastes,chlorostictus,,,,,,,,,,,1998,,,,From J. Hyde,greenspotted rockfish,,,,,Point Reyes,CA,,38.075,-123.527,
,,,240_16,x,,Sebastes,chlorostictus,,,,,,,,,,,2005,,,,From J. Hyde,greenspotted rockfish,,,,,Osborne Bank,CA,,33.36,-119.03,
,,,240_17,x,,Sebastes,chlorostictus,,,,,,,,,,,2005,,,,From J. Hyde,greenspotted rockfish,,,,,Osborne Bank,CA,,33.36,-119.03,
,,,336_15,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,Tanner Bank,CA,,32.7,-119.06,
,,,336_31,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Clemente Island,CA,,32.78,-118.36,
,,,336_36,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Clemente Island,CA,,32.78,-118.4,
,,,336_38,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Clemente Island,CA,,32.78,-118.4,
,,,336_55,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Nicholas Island,CA,,33.28,-119.51,
,,,336_70,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Nicholas Island,CA,,33.28,-119.51,
,,,18_37,x,,Sebastes,rosenblatti,,,,,,,,,,,2018,,,,Collected by Aguilar,greenblotched rockfish,,,,,Palos Verdes,CA,,33.6834349,-118.320934,
,,,18_38,x,,Sebastes,eos,,,,,,,,,,,2018,,,,Collected by Aguilar,pink rockfish,,,,,Palos Verdes,CA,,33.6834349,-118.320934,
,,,8_35,x,,Sebastes,chlorostictus,,,,,,,,,,,1996,,,,From J. Hyde,greenspotted rockfish,,,,,Palos Verdes,CA,,33.81383,-118.439,
,,,240_12,x,,Sebastes,chlorostictus,,,,,,,,,,,2005,,,,From J. Hyde,greenspotted rockfish,,,,,Osborne Bank,CA,,33.36,-119.03,
,,,336_16,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,Tanner Bank,CA,,32.7,-119.06,
,,,336_56,x,,Sebastes,chlorostictus,,,,,,,,,,,2007,,,,From J. Hyde,greenspotted rockfish,,,,,San Nicholas Island,CA,,33.28,-119.51,
,,,238_55,x,,Sebates ,eos,,,,,,,,,,,2007,,,,From J. Hyde,pink rockfish,,,,,Nine Mile Bank,CA,,32.83333,-117.25,
,,,221_49,x,,Sebates ,eos,,,,,,,,,,,2005,,,,From J. Hyde,pink rockfish,,,,,Santa Rosa Flats,CA,,33.67993,-120,
,,,221_50,x,,Sebates ,eos,,,,,,,,,,,2005,,,,From J. Hyde,pink rockfish,,,,,Santa Rosa Flats,CA,,33.67993,-120,
108 changes: 108 additions & 0 deletions GoogleCloud/mega-non-model-wgs-snakeflow/config/scaffold_groups.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
id chrom len cumul
scaff_group001 NW_023618437.1 204126 204126
scaff_group001 NW_023618438.1 114053 318179
scaff_group001 NW_023618439.1 246245 564424
scaff_group001 NW_023618440.1 422125 986549
scaff_group001 NW_023618441.1 251291 1237840
scaff_group001 NW_023618442.1 161087 1398927
scaff_group001 NW_023618443.1 107603 1506530
scaff_group001 NW_023618444.1 105840 1612370
scaff_group001 NW_023618445.1 104370 1716740
scaff_group001 NW_023618446.1 45793 1762533
scaff_group001 NW_023618447.1 99780 1862313
scaff_group001 NW_023618448.1 98809 1961122
scaff_group001 NW_023618449.1 98658 2059780
scaff_group001 NW_023618450.1 90733 2150513
scaff_group001 NW_023618451.1 86800 2237313
scaff_group001 NW_023618452.1 87547 2324860
scaff_group001 NW_023618453.1 84445 2409305
scaff_group001 NW_023618454.1 77169 2486474
scaff_group001 NW_023618455.1 76524 2562998
scaff_group001 NW_023618456.1 75864 2638862
scaff_group001 NW_023618457.1 75517 2714379
scaff_group001 NW_023618458.1 74501 2788880
scaff_group001 NW_023618459.1 74818 2863698
scaff_group001 NW_023618460.1 74196 2937894
scaff_group001 NW_023618461.1 73345 3011239
scaff_group001 NW_023618462.1 72699 3083938
scaff_group001 NW_023618463.1 71536 3155474
scaff_group001 NW_023618464.1 71225 3226699
scaff_group001 NW_023618465.1 64522 3291221
scaff_group001 NW_023618466.1 62348 3353569
scaff_group001 NW_023618467.1 60741 3414310
scaff_group001 NW_023618468.1 59442 3473752
scaff_group001 NW_023618469.1 10174 3483926
scaff_group001 NW_023618470.1 58209 3542135
scaff_group001 NW_023618471.1 57847 3599982
scaff_group001 NW_023618472.1 57411 3657393
scaff_group001 NW_023618473.1 57104 3714497
scaff_group001 NW_023618474.1 56904 3771401
scaff_group001 NW_023618475.1 54570 3825971
scaff_group001 NW_023618476.1 54391 3880362
scaff_group001 NW_023618477.1 32169 3912531
scaff_group001 NW_023618478.1 52788 3965319
scaff_group001 NW_023618479.1 52159 4017478
scaff_group001 NW_023618480.1 52063 4069541
scaff_group001 NW_023618481.1 51365 4120906
scaff_group001 NW_023618482.1 50260 4171166
scaff_group001 NW_023618483.1 49436 4220602
scaff_group001 NW_023618484.1 64152 4284754
scaff_group001 NW_023618485.1 49333 4334087
scaff_group001 NW_023618486.1 48336 4382423
scaff_group001 NW_023618487.1 48680 4431103
scaff_group001 NW_023618488.1 48018 4479121
scaff_group001 NW_023618489.1 47477 4526598
scaff_group001 NW_023618490.1 46746 4573344
scaff_group001 NW_023618491.1 46882 4620226
scaff_group001 NW_023618492.1 46310 4666536
scaff_group001 NW_023618493.1 43696 4710232
scaff_group001 NW_023618494.1 43652 4753884
scaff_group001 NW_023618495.1 42768 4796652
scaff_group001 NW_023618496.1 42464 4839116
scaff_group001 NW_023618497.1 42129 4881245
scaff_group001 NW_023618498.1 41579 4922824
scaff_group001 NW_023618499.1 40765 4963589
scaff_group001 NW_023618500.1 40549 5004138
scaff_group001 NW_023618501.1 39165 5043303
scaff_group001 NW_023618502.1 39231 5082534
scaff_group001 NW_023618503.1 38418 5120952
scaff_group001 NW_023618504.1 38339 5159291
scaff_group001 NW_023618505.1 38270 5197561
scaff_group001 NW_023618506.1 38104 5235665
scaff_group001 NW_023618507.1 36413 5272078
scaff_group001 NW_023618508.1 35189 5307267
scaff_group001 NW_023618509.1 34840 5342107
scaff_group001 NW_023618510.1 34679 5376786
scaff_group001 NW_023618511.1 33985 5410771
scaff_group001 NW_023618512.1 33753 5444524
scaff_group001 NW_023618513.1 32839 5477363
scaff_group001 NW_023618514.1 32835 5510198
scaff_group001 NW_023618515.1 31384 5541582
scaff_group001 NW_023618516.1 31237 5572819
scaff_group001 NW_023618517.1 31102 5603921
scaff_group001 NW_023618518.1 29418 5633339
scaff_group001 NW_023618519.1 27743 5661082
scaff_group001 NW_023618520.1 27412 5688494
scaff_group001 NW_023618521.1 26008 5714502
scaff_group001 NW_023618522.1 25196 5739698
scaff_group001 NW_023618523.1 24512 5764210
scaff_group001 NW_023618524.1 22772 5786982
scaff_group001 NW_023618525.1 22257 5809239
scaff_group001 NW_023618526.1 21389 5830628
scaff_group001 NW_023618527.1 21196 5851824
scaff_group001 NW_023618528.1 20509 5872333
scaff_group001 NW_023618529.1 17830 5890163
scaff_group001 NW_023618530.1 7215 5897378
scaff_group001 NW_023618531.1 3314 5900692
scaff_group001 NW_023618532.1 173666 6074358
scaff_group001 NW_023618533.1 217730 6292088
scaff_group001 NW_023618534.1 182171 6474259
scaff_group001 NW_023618535.1 167199 6641458
scaff_group001 NW_023618536.1 162409 6803867
scaff_group001 NW_023618537.1 155123 6958990
scaff_group001 NW_023618538.1 151542 7110532
scaff_group001 NW_023618539.1 146466 7256998
scaff_group001 NW_023618540.1 143925 7400923
scaff_group001 NW_023618541.1 137484 7538407
scaff_group001 NW_023618542.1 137558 7675965
scaff_group001 NW_023618543.1 116618 7792583
Loading

0 comments on commit 46ee92b

Please sign in to comment.