diff --git a/example_params b/example_params index 79d1cee..e69d699 100644 --- a/example_params +++ b/example_params @@ -1,187 +1,186 @@ -# It is a parameters file for GADMA software. +# This is a parameters file for GADMA software. -# Lines started from # are ignored. -# Also comments at the end of a line are ignored too. -# Every line contains: Identificator of parameter : value. +# Lines that begin with # are ignored. +# Comments at the end of a line are also ignored. +# Each line contains: Parameter identifier : value. -#!!! means pay attention to this parameter, they are primary. +#!!! Indicates parameters that require special attention. #!!! -# Output directory to write all GADMA out. -# One need to set it to a missing or empty directory. -# If it is resumed from other directory and output directory -# isn't set, GADMA will add '_resumed' for previous output -# directory. -Output directory: my_example_run_2 - +# Output directory for all GADMA outputs. +# This should be set to a missing or empty directory. +# If the process is resumed from another directory and the output +# directory is not specified, GADMA will append '_resumed' to the +# previous output directory. +Output directory: my_example_run #!!! -# Input data can be sfs file (should end with .fs) or -# file of SNP's in dadi format (should end with .txt) or -# VCF file + popmap file (sample population map). +# Input data can be in the form of an SFS file (should end with .fs), +# a SNP file in Dadi format (should end with .txt), or a +# VCF file along with a popmap file (sample population map). Input data: tests/test_data/DATA/sfs/YRI_CEU.fs -# 'Population labels' is sequence of population names (the same -# names as in input file) -# If .fs file is in old format then it would rename population +# 'Population labels' consist of a sequence of population names +# (these must match the names in the input file). +# If the .fs file is in an old format, it will rename population # labels that are absent. -# It is necessary to put them in order from most ancient to less. -# (In case of more than two populations) -# It is important, because the last of formed populations take -# part in the next split. -# For example, if we have YRI - African population, -# CEU - European population and CHB - Chinese population, -# then we can write YRI, CEU, CHB or YRI, CHB, CEU -# (YRI must be at the first place) +# They must be ordered from the most ancient to the least ancient +# (if there are more than two populations). +# This is important because the last formed population will be +# involved in the next split. +# For example, if we have YRI (African population), +# CEU (European population), and CHB (Chinese population), +# we can write YRI, CEU, CHB or YRI, CHB, CEU +# (YRI must be in the first position). # Default: from input file Population labels: [YRI, CEU] -# Also one can project spectrum to less size. -# For example, we have 80 individuals in each of three -# populations, then spectrum will be 81x81x81 and one can -# project it to 21x21x21 by set 'Projections' parameter -# to 20, 20, 20. +# SFS projections: number of chromosomes for each population. +# It is also possible to downproject the spectrum to a smaller size. +# For example, if we have 40 diploid samples in each of three +# populations, then the full projections will be 80x80x80. We can +# project it by setting the 'Projections' parameter to [20, 20, 20]. # Default: from input file Projections: [20, 20] -# To indicate if outgroup information is included in SFS data -# one can set Outgroup option. -# If outgroup is False then SFS will be folded. +# To indicate if outgroup information is included in the SFS data, +# set the Outgroup option. +# If outgroup is False, then the SFS will be folded. # Default: from input file Outgroup: True -# Effective length of sequence that was used to build SFS data. -# Should be used together with Mutation rate and can be replaced -# be Thera0 setting. +# Effective length of the sequence used to build the SFS data. +# This should be used together with the Mutation rate and can be replaced +# by the Theta0 setting. # Default: None Sequence length: 4040000 #!!! -# Are SNP's linked or unlinked? -# If they are linked, then Composite Likelihood Akaike +# Are the SNPs linked or unlinked? +# If they are linked, then the Composite Likelihood Akaike # Information Criterion (CLAIC) will be used to compare models. -# If they are unlinked, then usual Akaike Information Criterion +# If they are unlinked, then the standard Akaike Information Criterion # (AIC) will be used. # Default: True Linked SNP's: True #!!! -# If SNP's are linked in order to calculate CLAIC, please, set -# the directory with bootstrapped data. -# Bootstrap should be done over the regions of the genome. +# If SNPs are linked, please set the directory containing +# the bootstrapped data for CLAIC calculation. +# Bootstrapping should be done over regions of the genome. # Default: None Directory with bootstrap: Null - #!!! -# Now all main parameters: +# Now for the main parameters: # -# Engine for the demographic inference. +# Engine for demographic inference. # Default: moments Engine: moments -# If you choose to use dadi, please set pts parameter - number -# of points in the grid. Otherwise this pts would be used in dadi's code. +# If you choose to use Dadi, please set the 'pts' parameter - the number +# of points in the grid. Otherwise, you can still specify it: it will be +# used in Dadi's code. # Default: Let n = max number of individuals in one population, # then pts = n, n+10, n+20 Pts: [20, 30, 40] #!!! -# Print parameters of model in units of N_ref = N_A. -# N_A will be placed in brackets at the end of string. +# Print parameters of the model in units of N_ref = N_A. +# N_A will be placed in brackets at the end of the string. # Default: False Relative parameters: False # Total mutation flux - theta. # It is equal to: # theta = 4 * μ * L -# where μ - mutation rate per site per generation and -# L - effective sequenced length, which accounts for losses +# where μ is the mutation rate per site per generation and +# L is the effective sequenced length, which accounts for losses # in alignment and missed calls. -# Note: one should estimate μ based on generation time. +# Note: μ should be estimated based on generation time. # Default: 1.0 Theta0: Null -# Instead of Theta0 mutation rate can be set independently. -# Should be used together with Sequence length option. +# Instead of Theta0, the mutation rate can be set independently. +# This should be used together with the Sequence length option. # Default: None Mutation rate: 2.35e-08 -# Recombination rate +# Recombination rate. Required for momentsLD engine. # Default: None Recombination rate: Null -# Time (years) for one generation. Can be float. -# Is important for drawing models. If one don't want to draw, -# one can pass it. +# Time (in years) for one generation. Can be a float. +# This is important for drawing models. If you do not wish to draw, +# you may skip this parameter. # Default: 1.0 Time for generation: Null - #!!! -# One should choose the demographic history to infer. -# It can be custom or setted up with structure. +# You must choose the demographic history to infer. +# It can be custom or set up with structure. # 1. Using a custom demographic model. -# Please specify a file with a function named 'model_func' in it. -# So file should contain: -# def model_func(params, ns, pts) in case of dadi +# Please specify a file containing a function named 'model_func'. +# The file should contain: +# def model_func(params, ns, pts) for Dadi # or -# def model_func(params, ns) in case of moments +# def model_func(params, ns) for moments # Default: None Custom filename: Null -# Now one should specify either bounds or identifications -# of custom model's parameters. All values are in Nref units. -# Lower and upper bounds - lists of numbers. -# List of usual bounds: +# You must now specify either bounds or identifiers for the +# parameters of the custom model. All values are in genetic units. +# Lower and upper bounds are lists of numbers. +# Common bounds: # N: 1e-2 - 100 # T: 0 - 5 # m: 0 - 10 # s: 0 - 1 -# These bounds will be taken automatically if identifications are set. +# These bounds will be applied automatically if identifiers are set. # Default: None Lower bound: Null Upper bound: Null + # An identifier list: # T - time # N - size of population # m - migration -# s - split event, proportion in which population size -# is divided to form two new populations. +# s - split event, representing the proportion of population size +# divided to form two new populations. # Default: None Parameter identifiers: Null -# 2. Structure is for not custom models! -# Structure of model for one population - number of time periods -# (e.g. 5). -# Structure of model for two populations - number of time periods -# before the split of the ancestral population and after it (e.g. 2,2). -# Structure of model for three populations - number of time periods -# before first split, between first and second splits and after -# second split (e.g. 2,1,2). +# 2. Structure is for non-custom models! +# Structure of the model for one population - number of time periods +# (e.g., 5). +# Structure of the model for two populations - number of time periods +# before the split of the ancestral population and after it (e.g., 2,2). +# Structure of the model for three populations - number of time periods +# before the first split, between the first and second splits, and after +# the second split (e.g., 2,1,2). # -# Structure of initial model: -# Default: all is ones - 1 or 1,1 or 1,1,1 +# Initial model structure: +# Default: all ones - 1 or 1,1 or 1,1,1 Initial structure: [1, 1] -# Structure of final model: -# Default: equals to initial structure +# Final model structure: +# Default: equals the initial structure Final structure: [2, 1] #!!! -# Additional settings for demographic model with structure. +# Additional settings for demographic models with structure. # -# Use sudden changes of population sizes only. Decreases +# Use sudden changes in population sizes only. This reduces # the number of parameters. # Default: False Only sudden: False -# The set of available size dynamics could be set to any subset. -# Sud is for sudden size change (constant during the next time epoch), -# Lin - linear size change, -# Exp - exponential size change. -# If `Only sudden` is True then this setting will be [Sud]. +# The set of available size dynamics can be any subset. +# 'Sud' stands for sudden size change (constant during the next time epoch), +# 'Lin' for linear size change, +# 'Exp' for exponential size change. +# If 'Only sudden' is True, then this setting will be [Sud]. # Default: [Sud, Lin, Exp] Dynamics: [Sud, Lin, Exp] @@ -189,51 +188,51 @@ Dynamics: [Sud, Lin, Exp] # Default: False No migrations: False -# Makes all migrations symmetrical. +# Make all migrations symmetrical. # Default: False Symmetric migrations: False -# Enable/disable migrations selectively. +# Enable or disable migrations selectively. # Default: None Migration masks: Null -# Enable/disable inference of selection coefficients. -# Is supported by moments and dadi engines only. +# Enable or disable inference of selection coefficients. +# Supported only by moments and Dadi engines. # Default: False Selection: False -# Enables/disables inference of dominance coefficient. -# If True then setting `Selection` must be aldo set to True. -# Is supported by moments and dadi engines only. +# Enable or disable inference of the dominance coefficient. +# If True, then the 'Selection' setting must also be True. +# Supported only by moments and Dadi engines. # Default: False -Dominance: True +Dominance: False -# Estimate fraction of ancestral population as parameter of split. -# If False then population splits and each of new populations -# has its own size as parameter of the model. +# Estimate the fraction of the ancestral population as a parameter of the +# split. If False, then the population splits and each of the new +# populations has its own size as a parameter of the model. # Default: False Split fractions: True # Estimate inbreeding coefficients as model parameters. -# Can be used only for dadi engine. +# Can be used only with the Dadi engine. # Default: False Inbreeding: False -# If False then multinomial approach is used in dadi and moments. -# In multinomial approach ancestral size is inferred implicitly. +# If False, then a multinomial approach is used in Dadi and moments. +# In the multinomial approach, the ancestral size is inferred implicitly. # Default: False Ancestral size as parameter: False -# It is possible to limit the time of splits by bounds' specification. +# It is possible to limit the time of splits by specifying bounds. # Split 1 is the most ancient split. -# !Note that time is in generations: -# e.g. we want to limit by 150 kya, time for one generation is -# 25 years, then bound will be 150000 / 25 = 6000. +# !Note that time is measured in generations: +# e.g., to limit by 150 kya, if time for one generation is +# 25 years, then the bound will be 150000 / 25 = 6000. # -# Lower bound for split 1 (in case of 2 or 3 populations). +# Lower bound for split 1 (for 2 or 3 populations). # Default: None Lower bound of first split: Null -# + # Upper bound for split 1 (in case of 2 or 3 populations). # Default: None Upper bound of first split: Null @@ -241,37 +240,34 @@ Upper bound of first split: Null # Lower bound for split 2 (in case of 3 populations). # Default: None Lower bound of second split: Null -# + # Upper bound for split 2 (in case of 3 populations). # Default: None Upper bound of second split: Null - - - #!!! # Local optimization. # -# Choice of local optimization, that is launched after +# Choice of local optimization that is launched after # each genetic algorithm. # Choices: # # * optimize (BFGS method) # -# * optimiza_log (BFGS method) +# * optimize_log (BFGS method) # # * optimize_powell (Powell’s conjugate direction method) -# (Note: is implemented in moments: one need to have moments +# (Note: implemented in moments; one needs to have moments # installed.) # -# (If optimizations are often hitting the parameter bounds, +# (If optimizations often hit the parameter bounds, # try using these methods:) # * optimize_lbfgsb # * optimize_log_lbfgsb -# (Note that it is probably best to start with the vanilla BFGS -# methods, because the L-BFGS-B methods will always try parameter -# values at the bounds during the search. -# This can dramatically slow model fitting.) +# (Note that it is generally best to start with the vanilla BFGS +# methods, as the L-BFGS-B methods will always test parameter +# values at the bounds during the search. This can dramatically +# slow down model fitting.) # # * optimize_log_fmin (simplex (a.k.a. amoeba) method) # @@ -280,69 +276,64 @@ Upper bound of second split: Null # Default: optimize_powell Local optimizer: BFGS_log - - -# Parameters of pipeline +# Parameters of the pipeline. # # One can automatically generate dadi and moments code for models. -# If 0 then only current best model will be printed in GA's -# working directory. -# Also the result model will be saved there. -# If specified (not 0) then every N iteration model will be saved -# in python code folder. +# If set to 0, only the current best model will be printed in the GA's +# working directory. The resulting model will also be saved there. +# If specified (not 0), then every N iterations, the model will be saved +# in the Python code folder. # Default: 0 Print models' code every N iteration: 100 -# Engine that will draw demographic model plots. -# Could be moments or demes. -# Default: moments +# Engine that will draw demographic model plots. +# Can be moments or demes. +# Default: moments Model plot engine: moments -# One can automatically draw models every N iteration. -# If 0 then never. -# Pictures are saved in GA's directory in the picture folder. +# One can automatically draw models every N iterations. +# If set to 0, models will never be drawn. +# Pictures are saved in the GA's directory in the picture folder. # Default: 0 Draw models every N iteration: 100 -# One can choose time units in models' plots: years or thousand -# years (kya, KYA). If time for one generation isn't specified -# then time is in genetic units. +# One can choose time units in model plots: years or thousands +# of years (kya, KYA). If time for one generation is not specified, +# time will be represented in genetic units. # Default: years Units of time in drawing: generations -# Minimum value that will be drawn in SFS plots. -# Default: 1 +# Minimum value to be drawn in SFS plots. +# Default: 1 Vmin: 1 -# No std output. +# Suppresses standard output. # Default: False Silence: False -# Verbosity of optimizations output. -# Default: 1 +# Verbosity level for optimization output. +# Default: 1 Verbose: 1 -# How many times launch GADMA with this parameters. +# How many times to launch GADMA with these parameters. # Default: 1 Number of repeats: 3 -# How many processes to use for this repeats. -# Note that one repeat isn't parallelized, so increasing number +# How many processes to use for these repeats. +# Note that one repeat is not parallelized, so increasing the number # of processes will not affect the time of one repeat. -# It is desirable that the number of repeats is a multiple of +# It is advisable that the number of repeats is a multiple of # the number of processes. # Default: 1 Number of processes: 3 - - -# One can resume from some other launch of GADMA by setting -# output directory of it to 'Resume from' parameter. -# You can set again new parameters of resumed launch. +# One can resume from a previous GADMA run by setting +# the output directory of that run in the 'Resume from' parameter. +# New parameters for the resumed run can be set again. Resume from: Null -# -# If you want to take only models from previous run set this -# flag. Then iterations of GA will start from 0 and values of -# mutation rate and strength will be initial. + +# If you want to only take models from the previous run, set this +# flag. Iterations of the GA will then start from 0, and values for +# the mutation rate and strength will be initial. # Default: None Only models: False diff --git a/gadma/cli/example_params b/gadma/cli/example_params index e3cbb9f..e69d699 100644 --- a/gadma/cli/example_params +++ b/gadma/cli/example_params @@ -1,320 +1,339 @@ -# It is a parameter file for GADMA software. - -# Lines started from # are ignored. -# Also comments at the end of a line are ignored too. -# Every line contains: Identifier of parameter : value. -# If one wants to change some default parameters, one needs to -# remove # at the beginning of a line and change corresponding -# parameter. - -Directory with bootstrap: ../dadi/examples/YRI_CEU/bootstraps - -# Output directory to write all GADMA out. -# One needs to set it to not existing or empty directory. -# If it is resumed from other directory and output directory -# isn't set, GADMA will add '_resumed' for previous output -# directory. -Output directory : my_example_run - - -# One can resume from some other launch of GADMA by setting -# output directory of it to 'Resume from' parameter. -# You can set again new parameters of resumed launch. -#Resume from : another_output_dir - - -# If one wants to take only models from previous run set this -# flag. Then iterations of GA will start from 0 and values of -# mutation rate and strength will be initial. -# Default : None -Only models : None - - -# Input file can be sfs file (should end with .fs) or -# file of SNP's in dadi format (should end with .txt). -Input file : fs_examples/YRI_CEU.fs - - -# 'Population labels' is sequence of population names (the same -# names as in input file) -# If .fs file is in old format then it would rename population -# labels that are absent. -# It is necessary to put them in order from most ancient to less. -# (In case of more than two populations) -# It is important, because the last of formed populations take -# part in next split. -# For example, if we have YRI - African population, -# CEU - European population and CHB - Chinese population, -# then we can write YRI, CEU, CHB or YRI, CHB, CEU -# (YRI must be at the first place) +# This is a parameters file for GADMA software. + +# Lines that begin with # are ignored. +# Comments at the end of a line are also ignored. +# Each line contains: Parameter identifier : value. + +#!!! Indicates parameters that require special attention. + +#!!! +# Output directory for all GADMA outputs. +# This should be set to a missing or empty directory. +# If the process is resumed from another directory and the output +# directory is not specified, GADMA will append '_resumed' to the +# previous output directory. +Output directory: my_example_run + +#!!! +# Input data can be in the form of an SFS file (should end with .fs), +# a SNP file in Dadi format (should end with .txt), or a +# VCF file along with a popmap file (sample population map). +Input data: tests/test_data/DATA/sfs/YRI_CEU.fs + +# 'Population labels' consist of a sequence of population names +# (these must match the names in the input file). +# If the .fs file is in an old format, it will rename population +# labels that are absent. +# They must be ordered from the most ancient to the least ancient +# (if there are more than two populations). +# This is important because the last formed population will be +# involved in the next split. +# For example, if we have YRI (African population), +# CEU (European population), and CHB (Chinese population), +# we can write YRI, CEU, CHB or YRI, CHB, CEU +# (YRI must be in the first position). +# Default: from input file +Population labels: [YRI, CEU] + +# SFS projections: number of chromosomes for each population. +# It is also possible to downproject the spectrum to a smaller size. +# For example, if we have 40 diploid samples in each of three +# populations, then the full projections will be 80x80x80. We can +# project it by setting the 'Projections' parameter to [20, 20, 20]. +# Default: from input file +Projections: [20, 20] + +# To indicate if outgroup information is included in the SFS data, +# set the Outgroup option. +# If outgroup is False, then the SFS will be folded. # Default: from input file -Population labels : CEU, YRI # we change populations order - # (in input file YRI is first) +Outgroup: True - -# Also one can project your spectrum to less size. -# For example, we have 80 individuals in each of three -# populations, then spectrum will be 81x81x81 and one can -# project it to 21x21x21 by set 'Projections' parameter -# to 20, 20, 20. -# Default: from input file -Projections : None # will be 20, 20 - -# Are SNP's linked or unlinked? -# If they are linked, then Composite Likelihood Akaike -# Information Criterion (CLAIC) will be used to compare models. -# If they are unlinked, then usual Akaike Information Criterion -# (AIC) will be used. -# Default: True -Linked SNP's : True - - - - -# Now all main parameters: -# -# Total mutation flux - theta. -# It is equal to: -# theta = 4 * mu * L -# where mu - mutation rate per site per generation and -# L - effective sequenced length, which accounts for losses -# in alignment and missed calls. -# Note: one should estimate mu based on generation time. -# Default: 1.0 -Theta0 : 0.37976 # the same as in Gutenkunst et al 2009 - - -# Time (years) for one generation. Can be float. -# It is important for drawing models. If one doesn't want to draw, -# one can pass it. -# Default: 1.0 -Time for generation : 25 # the same as in Gutenkunst et al 2009 - - -# Parameters for demographic models: -# -# Use moments or dadi -# Default: moments -Use moments or dadi : moments - - -# Use multinom scheme: N_A is not parameter for search, -# it is calculated through optimal_sfs_scaling. -# Multinom scheme decrease number of parameter by one and -# is usually faster, however non multinom scheme usually -# finds better solutions. -# Default: False -Multinom : False - - -# If you choose to use dadi, please set pts parameter - number -# of points in grid -# Default: Let n = max number of individuals in one population, -# then pts = n, n+10, n+20 -#Pts : 20, 30, 40 - - -# Using a custom demographic model. -# Please, specify file with function named 'model_func' in it. -# So file should contain: -# def model_func(params, ns, pts) in case of dadi -# or -# def model_func(params, ns) in case of moments +# Effective length of the sequence used to build the SFS data. +# This should be used together with the Mutation rate and can be replaced +# by the Theta0 setting. # Default: None -Custom filename : None - -# Now one should specify either bounds or identifications -# of custom model's parameters. All values are in Nref units. -# Lower and upper bounds - lists of numbers. -# List of usual bounds: -# N: 1e-2 - 100 -# T: 0 - 5 -# m: 0 - 10 -# s: 0 - 1 -# This bounds will be taken automatically if identifications are set. -# Default: None -Lower bounds : None -Upper bounds : None -# An identifier list: -# T - time -# N - size of population -# m - migration -# s - split event, proportion in which population size -# is divided to form two new populations. +Sequence length: 4040000 + +#!!! +# Are the SNPs linked or unlinked? +# If they are linked, then the Composite Likelihood Akaike +# Information Criterion (CLAIC) will be used to compare models. +# If they are unlinked, then the standard Akaike Information Criterion +# (AIC) will be used. +# Default: True +Linked SNP's: True + +#!!! +# If SNPs are linked, please set the directory containing +# the bootstrapped data for CLAIC calculation. +# Bootstrapping should be done over regions of the genome. +# Default: None +Directory with bootstrap: Null + +#!!! +# Now for the main parameters: +# +# Engine for demographic inference. +# Default: moments +Engine: moments + +# If you choose to use Dadi, please set the 'pts' parameter - the number +# of points in the grid. Otherwise, you can still specify it: it will be +# used in Dadi's code. +# Default: Let n = max number of individuals in one population, +# then pts = n, n+10, n+20 +Pts: [20, 30, 40] + +#!!! +# Print parameters of the model in units of N_ref = N_A. +# N_A will be placed in brackets at the end of the string. +# Default: False +Relative parameters: False + +# Total mutation flux - theta. +# It is equal to: +# theta = 4 * μ * L +# where μ is the mutation rate per site per generation and +# L is the effective sequenced length, which accounts for losses +# in alignment and missed calls. +# Note: μ should be estimated based on generation time. +# Default: 1.0 +Theta0: Null + +# Instead of Theta0, the mutation rate can be set independently. +# This should be used together with the Sequence length option. # Default: None -Parameter identifiers : None - - -# Structure of model for one population - number of time periods -# (e.g. 5). -# Structure of model for two populations - number of time periods -# before split of ancestral population and after it (e.g. 2,2). -# Structure of model for three populations - number of time periods -# before first split, between first and second splits and after -# second split (e.g. 2,1,2). +Mutation rate: 2.35e-08 + +# Recombination rate. Required for momentsLD engine. +# Default: None +Recombination rate: Null + +# Time (in years) for one generation. Can be a float. +# This is important for drawing models. If you do not wish to draw, +# you may skip this parameter. +# Default: 1.0 +Time for generation: Null + +#!!! +# You must choose the demographic history to infer. +# It can be custom or set up with structure. + +# 1. Using a custom demographic model. +# Please specify a file containing a function named 'model_func'. +# The file should contain: +# def model_func(params, ns, pts) for Dadi +# or +# def model_func(params, ns) for moments +# Default: None +Custom filename: Null + +# You must now specify either bounds or identifiers for the +# parameters of the custom model. All values are in genetic units. +# Lower and upper bounds are lists of numbers. +# Common bounds: +# N: 1e-2 - 100 +# T: 0 - 5 +# m: 0 - 10 +# s: 0 - 1 +# These bounds will be applied automatically if identifiers are set. +# Default: None +Lower bound: Null +Upper bound: Null + +# An identifier list: +# T - time +# N - size of population +# m - migration +# s - split event, representing the proportion of population size +# divided to form two new populations. +# Default: None +Parameter identifiers: Null + +# 2. Structure is for non-custom models! +# Structure of the model for one population - number of time periods +# (e.g., 5). +# Structure of the model for two populations - number of time periods +# before the split of the ancestral population and after it (e.g., 2,2). +# Structure of the model for three populations - number of time periods +# before the first split, between the first and second splits, and after +# the second split (e.g., 2,1,2). # -# Structure of initial model: -# Default: all is ones - 1 or 1,1 or 1,1,1 -Initial structure : 1,1 - - -# Structure of final model: -# Default: equals to initial structure -Final structure : 2,2 +# Initial model structure: +# Default: all ones - 1 or 1,1 or 1,1,1 +Initial structure: [1, 1] +# Final model structure: +# Default: equals the initial structure +Final structure: [2, 1] -# It is possible to limit time of splits. -# Split 1 is the most ancient split. -# !Note that time is in genetic units (2 * time for 1 generation): -# e.g. we want to limit by 150 kya, time for one generation is -# 25 years, then bound will be 150000 / (2*25) = 3000. +#!!! +# Additional settings for demographic models with structure. # -# Upper bound for split 1 (in case of 2 or 3 populations). -# Default: None -#Upper bound of first split : None - +# Use sudden changes in population sizes only. This reduces +# the number of parameters. +# Default: False +Only sudden: False + +# The set of available size dynamics can be any subset. +# 'Sud' stands for sudden size change (constant during the next time epoch), +# 'Lin' for linear size change, +# 'Exp' for exponential size change. +# If 'Only sudden' is True, then this setting will be [Sud]. +# Default: [Sud, Lin, Exp] +Dynamics: [Sud, Lin, Exp] + +# Disable migrations in demographic models. +# Default: False +No migrations: False + +# Make all migrations symmetrical. +# Default: False +Symmetric migrations: False -# Upper bound for split 2 (in case of 3 populations). +# Enable or disable migrations selectively. # Default: None -#Upper bound of second split : None - - -# Print parameters of model in units of N_ref = N_A. -# N_A will be placed in brackets at the end of string. +Migration masks: Null + +# Enable or disable inference of selection coefficients. +# Supported only by moments and Dadi engines. +# Default: False +Selection: False + +# Enable or disable inference of the dominance coefficient. +# If True, then the 'Selection' setting must also be True. +# Supported only by moments and Dadi engines. +# Default: False +Dominance: False + +# Estimate the fraction of the ancestral population as a parameter of the +# split. If False, then the population splits and each of the new +# populations has its own size as a parameter of the model. # Default: False -Relative parameters : False - +Split fractions: True -# Disable migrations in demographic models. +# Estimate inbreeding coefficients as model parameters. +# Can be used only with the Dadi engine. # Default: False -No migrations : false - - - - -# Parameters for Genetic Algorithm. -# -# Size of population of demographic models in GA: -# Default: 10 -Size of population in GA : 10 - - -# Fractions of current models, mutated models and crossed models -# to be taken to new population. -# Sum of fractions should be <= 1, the remaining fraction is -# fraction of random models. -# Default: 0.2,0.3,0.3 -#Fractions in GA : 0.2,0.3,0.3 +Inbreeding: False +# If False, then a multinomial approach is used in Dadi and moments. +# In the multinomial approach, the ancestral size is inferred implicitly. +# Default: False +Ancestral size as parameter: False -# Mutation strength - fraction of parameters in model to mutate -# during global mutation process of model. -# Number of parameters to mutate is sampled from binomial -# distribution, so we need to set mean. -# Default: 0.2 -Mean mutation strength : 0.3 -# -# Mutation strength can be adaptive: if mutation is good, -# i.e. has the best fitness function (log likelihood), -# then mutation strength is increased multiplying by const -# otherwise it decreases dividing by (1/4)^const. -# When const is 1.0 it is not adaptive. -# Default: 1.0 -Const for mutation strength : 1.05 - - -# Mutation rate - fraction of any parameter to change during -# its mutation. -# Mutation rate is sampled from truncated normal distribution, -# so we need mean (std can be specified in extra_params). -# Default 0.2 -Mean mutation rate : 0.1 +# It is possible to limit the time of splits by specifying bounds. +# Split 1 is the most ancient split. +# !Note that time is measured in generations: +# e.g., to limit by 150 kya, if time for one generation is +# 25 years, then the bound will be 150000 / 25 = 6000. # -# Mutation rate also can be adaptive as mutation strength. -# Default: 1.02 -Const for mutation rate : 1.01 #very small changes +# Lower bound for split 1 (for 2 or 3 populations). +# Default: None +Lower bound of first split: Null +# Upper bound for split 1 (in case of 2 or 3 populations). +# Default: None +Upper bound of first split: Null -# Genetic algorithm stops when it couldn't improve model by -# more that epsilon in logLL -# Default: 1e-2 -Epsilon : 1e-2 -# -# and it happens during N iterations: -# Default: 100 -Stop iteration : 50 +# Lower bound for split 2 (in case of 3 populations). +# Default: None +Lower bound of second split: Null +# Upper bound for split 2 (in case of 3 populations). +# Default: None +Upper bound of second split: Null -# Local optimization. +#!!! +# Local optimization. # -# Choice of local optimization, that is launched after -# each genetic algorithm. -# Choices: +# Choice of local optimization that is launched after +# each genetic algorithm. +# Choices: # -# * optimize (BFGS method) -# -# * optimiza_log (BFGS method) -# -# * optimize_powell (Powell's conjugate direction method) -# (Note: is implemented in moments: one need to have moments -# installed.) +# * optimize (BFGS method) +# +# * optimize_log (BFGS method) +# +# * optimize_powell (Powell’s conjugate direction method) +# (Note: implemented in moments; one needs to have moments +# installed.) # -# (If optimizations are often hitting the parameter bounds, -# try using these methods:) -# * optimize_lbfgsb -# * optimize_log_lbfgsb -# (Note that it is probably best to start with the vanilla BFGS -# methods, because the L-BFGS-B methods will always try parameter -# values at the bounds during the search. -# This can dramatically slow model fitting.) +# (If optimizations often hit the parameter bounds, +# try using these methods:) +# * optimize_lbfgsb +# * optimize_log_lbfgsb +# (Note that it is generally best to start with the vanilla BFGS +# methods, as the L-BFGS-B methods will always test parameter +# values at the bounds during the search. This can dramatically +# slow down model fitting.) # -# * optimize_log_fmin (simplex (a.k.a. amoeba) method) -# -# * hill_climbing -# -# Default: optimize_powell -Name of local optimization : optimize_log - -# Parameters of pipeline +# * optimize_log_fmin (simplex (a.k.a. amoeba) method) +# +# * hill_climbing +# +# Default: optimize_powell +Local optimizer: BFGS_log + +# Parameters of the pipeline. # -# One can automatically draw models every N iteration. -# If 0 then never. -# Pictures are saved in GA's directory in picture folder. -# Default: 0 -Draw models every N iteration : 100 - - -# One can automatically generate dadi and moments code for models. -# If 0 then only current best model will be printed in GA's -# working directory. -# Also result model will be saved there. -# If specified (not 0) then every N iteration model will be saved -# in python code folder. -# Default: 0 -Print models' code every N iteration : 100 - - -# One can choose time units in models' plots: years or thousand -# years (kya, KYA). If time for one generation isn't specified -# then time is in genetic units. -# Default: years -Units of time in drawing : thousand years - - -# No std output. -# Default: False -Silence : False - - -# How many times launch GADMA with this parameters. -# Default: 1 -Number of repeats : 3 - - -# How many processes to use for this repeats. -# Note that one repeat isn't parallelized, so increasing number -# of processes doesn't effect on time of one repeat. -# It is desirable that the number of repeats is a multiple of -# the number of processes. -# Default: 1 -Number of processes : 3 +# One can automatically generate dadi and moments code for models. +# If set to 0, only the current best model will be printed in the GA's +# working directory. The resulting model will also be saved there. +# If specified (not 0), then every N iterations, the model will be saved +# in the Python code folder. +# Default: 0 +Print models' code every N iteration: 100 + +# Engine that will draw demographic model plots. +# Can be moments or demes. +# Default: moments +Model plot engine: moments + +# One can automatically draw models every N iterations. +# If set to 0, models will never be drawn. +# Pictures are saved in the GA's directory in the picture folder. +# Default: 0 +Draw models every N iteration: 100 + +# One can choose time units in model plots: years or thousands +# of years (kya, KYA). If time for one generation is not specified, +# time will be represented in genetic units. +# Default: years +Units of time in drawing: generations + +# Minimum value to be drawn in SFS plots. +# Default: 1 +Vmin: 1 + +# Suppresses standard output. +# Default: False +Silence: False + +# Verbosity level for optimization output. +# Default: 1 +Verbose: 1 + +# How many times to launch GADMA with these parameters. +# Default: 1 +Number of repeats: 3 + +# How many processes to use for these repeats. +# Note that one repeat is not parallelized, so increasing the number +# of processes will not affect the time of one repeat. +# It is advisable that the number of repeats is a multiple of +# the number of processes. +# Default: 1 +Number of processes: 3 + +# One can resume from a previous GADMA run by setting +# the output directory of that run in the 'Resume from' parameter. +# New parameters for the resumed run can be set again. +Resume from: Null + +# If you want to only take models from the previous run, set this +# flag. Iterations of the GA will then start from 0, and values for +# the mutation rate and strength will be initial. +# Default: None +Only models: False diff --git a/gadma/cli/extra_params_template b/gadma/cli/extra_params_template index 048a7ae..cb4d4a7 100644 --- a/gadma/cli/extra_params_template +++ b/gadma/cli/extra_params_template @@ -1,19 +1,15 @@ -# It is extra parameters file for GADMA software. - -# Lines started from # are ignored. -# Also comments at the end of a line are ignored too. -# Every line contains: Identificator of parameter : value. -# If you want to change some default parameters, you need to -# remove # at the begining of line and change corresponding -# parameter. - - -# Lower and upper bounds on N - sizes of populations, -# T - time of time periods, M - migration rates. -# All bounds are relative to N_A: N is in N_A units, -# T in 2 * N_A units and M in 1 / (2 * N_A). -# Default: min_N = 0.01, max_N = 100.0, min_T = 0.0, -# max_T = 5.0, min_M = 0.0, max_M = 10.0 +# This is the extra parameters file for the GADMA software. + +# Lines that begin with # are ignored. +# Comments at the end of a line are also ignored. +# Each line contains: Parameter identifier : value. + +# Lower and upper bounds on N - population sizes, +# T - time of time periods, M - migration rates. +# All bounds are relative to N_A: N is in N_A units, +# T in 2 * N_A units, and M in 1 / (2 * N_A). +# Default: min_N = 0.01, max_N = 100.0, min_T = 0.0, +# max_T = 5.0, min_M = 0.0, max_M = 10.0 min_N : max_N : min_T : @@ -22,134 +18,134 @@ min_M : max_M : -# Parameters for Genetic Algorithm. +# Parameters for the Genetic Algorithm. # -# Constant for initial design. Number of solutions in initial -# design is equal to number of parameters multiplied by this constant. -# Default: 10 +# Constant for initial design. The number of solutions in the initial +# design is equal to the number of parameters multiplied by this constant. +# Default: 10 Num init const : -# Size of one generation of solutions in GA: -# Default: 10 +# Size of one generation of solutions in the GA: +# Default: 10 Size of generation : -# Fractions of current models, mutated models and crossed models -# to be taken to new population. -# Sum of fractions should be <= 1, the remaining fraction is -# fraction of random models. -# Default: 0.3,0.2,0.3 +# Fractions of current models, mutated models, and crossed models +# to be taken to the new population. +# The sum of the fractions should be <= 1, and the remaining fraction is +# for random models. +# Default: 0.3, 0.2, 0.3 Fractions : -# The fractions could be set independently here. -# Number of best solutions taken from the previous generation. -# Default: 3 +# The fractions can be set independently here. +# Number of the best solutions taken from the previous generation. +# Default: 3 N elitism : -# Fraction of mutated solutions in new generation. -# Default: 0.2 +# Fraction of mutated solutions in the new generation. +# Default: 0.2 P mutation : -# Fraction of crossed solutions in new generation. -# Default: 0.3 +# Fraction of crossed solutions in the new generation. +# Default: 0.3 P crossover : -# Fraction of randomly generated soluations in new generation. -# Default: 0.2 +# Fraction of randomly generated solutions in the new generation. +# Default: 0.2 P random : -# Mutation strength - fraction of parameters in model to mutate -# during global mutation process of model. -# Number of parameters to mutate is sampled from binomial -# distribution, so we need to set mean. -# Default: 0.775539 +# Mutation strength - the fraction of parameters in a model to mutate +# during the global mutation process. +# The number of parameters to mutate is sampled from a binomial +# distribution, so we need to set the mean. +# Default: 0.775539 Mean mutation strength : # -# Mutation strength can be adaptive: if mutation is good, -# i.e. has the best fitness function (log likelihood), -# then mutation strength is increased multiplying by const -# otherwise it decreases dividing by (1/4)^const. -# When const is 1.0 it is not adaptive. -# Default: 1.302280 +# Mutation strength can be adaptive: if the mutation is successful (i.e., +# it has the best fitness function, log likelihood), the mutation strength +# increases by multiplying it by a constant; otherwise, it decreases by dividing +# it by (1/4)^constant. +# When the constant is 1.0, it is not adaptive. +# Default: 1.302280 Const for mutation strength : -# Mutation rate - fraction of any parameter to change during -# its mutation. -# Mutation rate is sampled from truncated normal distribution, -# so we need mean (std can be specified in extra params). -# Default 0.273263 +# Mutation rate - the fraction of any parameter to change during +# its mutation. +# The mutation rate is sampled from a truncated normal distribution, +# so we need the mean (standard deviation can be specified in extra params). +# Default: 0.273263 Mean mutation rate : # -# Mutation rate also can be adaptive as mutation strength. -# Default: 1.475288 +# Mutation rate can also be adaptive, like mutation strength. +# Default: 1.475288 Const for mutation rate : -# Genetic algorithm stops when it couldn't improve model by -# more that epsilon in logLL -# Default: 1e-2 +# The genetic algorithm stops when it is unable to improve the model +# by more than epsilon in log likelihood. +# Default: 1e-2 Eps : -# -# and it happens during N iterations: -# Default: 100 +# +# And this happens during N iterations: +# Default: 100 Stuck generation number : -# Parameters for output of optimizations algorithms +# Parameters for output from optimization algorithms. -# If > 0, print optimization status every steps. -# Default: 1 +# If > 0, print the optimization status every steps. +# Default: 1 Verbose : -# Parameter for drawn sfs plots. vmin - minimum value plotted for sfs. -# Default: 1 or None if all values of fs are <= 1.0 +# Parameter for drawn SFS plots. vmin - minimum value plotted for SFS. +# Default: 1 or None if all values of SFS are <= 1.0 Vmin : -# Options of mutation, crossing and random generating. +# Options for mutation, crossing, and random generation. -# To get random model we sample number between bounds (min_N, -# max_N, min_T, etc.) and then scale them with -# optimal_sfs_scaling. We can choose N_A random between min_N -# and max_N or we can always take it as 1.0. -# Default: True +# To get a random model, we sample numbers between bounds (min_N, +# max_N, min_T, etc.) and then scale them with optimal_sfs_scaling. +# We can choose N_A randomly between min_N and max_N or always take +# it as 1.0. +# Default: True Random N_A : -# Options of printing summary information about repeats -# in parent process. -# How often print all best by likelihood and by BIC models. -# Default: 1 (minute) +# Options for printing summary information about repeats +# in the parent process. +# How often to print all the best models by likelihood and by BIC. +# Default: 1 (minute) Time to print summary : -# Global parameters for optimizations (global and local). +# Global parameters for optimization (both global and local). # -# Global optimizer name -# Default: Genetic_algorithm +# Global optimizer name. +# Default: Genetic_algorithm Global optimizer : -# If True then x will be transformed by logarithm. -# In our case that means that population sizes and times -# will be inferred in log-scale. +# If True, x will be transformed by the logarithm. +# In our case, that means population sizes and times will be inferred +# in log-scale. # -# For global optimization -# Default: True +# For global optimization: +# Default: True Global log transform : -# For local optimization -# Default: True +# For local optimization: +# Default: True Local log transform : -# Maximum number of log-likelihood evalutions -# In global optimization -# Default: None +# Maximum number of log-likelihood evaluations for +# global optimization. +# Default: None Global maxeval : -# In local optimization -# Default: None +# For local optimization: +# Default: None Local maxeval : -# Maximum number of algorithm iterations -# In global optimization -# Default: None +# Maximum number of algorithm iterations for +# global optimization. +# Default: None Global maxiter : -# In local optimization -# Default: None +# For local optimization: +# Default: None Local maxiter : diff --git a/gadma/cli/params_template b/gadma/cli/params_template index 67091d5..1ccbf1e 100644 --- a/gadma/cli/params_template +++ b/gadma/cli/params_template @@ -1,187 +1,186 @@ -# It is a parameters file for GADMA software. +# This is a parameters file for GADMA software. -# Lines started from # are ignored. -# Also comments at the end of a line are ignored too. -# Every line contains: Identificator of parameter : value. +# Lines that begin with # are ignored. +# Comments at the end of a line are also ignored. +# Each line contains: Parameter identifier : value. -#!!! means pay attention to this parameter, they are primary. +#!!! Indicates parameters that require special attention. #!!! -# Output directory to write all GADMA out. -# One need to set it to a missing or empty directory. -# If it is resumed from other directory and output directory -# isn't set, GADMA will add '_resumed' for previous output -# directory. +# Output directory for all GADMA outputs. +# This should be set to a missing or empty directory. +# If the process is resumed from another directory and the output +# directory is not specified, GADMA will append '_resumed' to the +# previous output directory. Output directory : - #!!! -# Input data can be sfs file (should end with .fs) or -# file of SNP's in dadi format (should end with .txt) or -# VCF file + popmap file (sample population map). +# Input data can be in the form of an SFS file (should end with .fs), +# a SNP file in Dadi format (should end with .txt), or a +# VCF file along with a popmap file (sample population map). Input data : -# 'Population labels' is sequence of population names (the same -# names as in input file) -# If .fs file is in old format then it would rename population +# 'Population labels' consist of a sequence of population names +# (these must match the names in the input file). +# If the .fs file is in an old format, it will rename population # labels that are absent. -# It is necessary to put them in order from most ancient to less. -# (In case of more than two populations) -# It is important, because the last of formed populations take -# part in the next split. -# For example, if we have YRI - African population, -# CEU - European population and CHB - Chinese population, -# then we can write YRI, CEU, CHB or YRI, CHB, CEU -# (YRI must be at the first place) +# They must be ordered from the most ancient to the least ancient +# (if there are more than two populations). +# This is important because the last formed population will be +# involved in the next split. +# For example, if we have YRI (African population), +# CEU (European population), and CHB (Chinese population), +# we can write YRI, CEU, CHB or YRI, CHB, CEU +# (YRI must be in the first position). # Default: from input file Population labels : -# Also one can project spectrum to less size. -# For example, we have 80 individuals in each of three -# populations, then spectrum will be 81x81x81 and one can -# project it to 21x21x21 by set 'Projections' parameter -# to 20, 20, 20. +# SFS projections: number of chromosomes for each population. +# It is also possible to downproject the spectrum to a smaller size. +# For example, if we have 40 diploid samples in each of three +# populations, then the full projections will be 80x80x80. We can +# project it by setting the 'Projections' parameter to [20, 20, 20]. # Default: from input file Projections : -# To indicate if outgroup information is included in SFS data -# one can set Outgroup option. -# If outgroup is False then SFS will be folded. +# To indicate if outgroup information is included in the SFS data, +# set the Outgroup option. +# If outgroup is False, then the SFS will be folded. # Default: from input file Outgroup : -# Effective length of sequence that was used to build SFS data. -# Should be used together with Mutation rate and can be replaced -# be Thera0 setting. +# Effective length of the sequence used to build the SFS data. +# This should be used together with the Mutation rate and can be replaced +# by the Theta0 setting. # Default: None Sequence length : #!!! -# Are SNP's linked or unlinked? -# If they are linked, then Composite Likelihood Akaike +# Are the SNPs linked or unlinked? +# If they are linked, then the Composite Likelihood Akaike # Information Criterion (CLAIC) will be used to compare models. -# If they are unlinked, then usual Akaike Information Criterion +# If they are unlinked, then the standard Akaike Information Criterion # (AIC) will be used. # Default: True Linked SNP's : #!!! -# If SNP's are linked in order to calculate CLAIC, please, set -# the directory with bootstrapped data. -# Bootstrap should be done over the regions of the genome. +# If SNPs are linked, please set the directory containing +# the bootstrapped data for CLAIC calculation. +# Bootstrapping should be done over regions of the genome. # Default: None Directory with bootstrap: - #!!! -# Now all main parameters: +# Now for the main parameters: # -# Engine for the demographic inference. +# Engine for demographic inference. # Default: moments Engine : -# If you choose to use dadi, please set pts parameter - number -# of points in the grid. Otherwise this pts would be used in dadi's code. +# If you choose to use Dadi, please set the 'pts' parameter - the number +# of points in the grid. Otherwise, you can still specify it: it will be +# used in Dadi's code. # Default: Let n = max number of individuals in one population, # then pts = n, n+10, n+20 Pts : #!!! -# Print parameters of model in units of N_ref = N_A. -# N_A will be placed in brackets at the end of string. +# Print parameters of the model in units of N_ref = N_A. +# N_A will be placed in brackets at the end of the string. # Default: False Relative parameters : # Total mutation flux - theta. # It is equal to: # theta = 4 * μ * L -# where μ - mutation rate per site per generation and -# L - effective sequenced length, which accounts for losses +# where μ is the mutation rate per site per generation and +# L is the effective sequenced length, which accounts for losses # in alignment and missed calls. -# Note: one should estimate μ based on generation time. +# Note: μ should be estimated based on generation time. # Default: 1.0 Theta0 : -# Instead of Theta0 mutation rate can be set independently. -# Should be used together with Sequence length option. +# Instead of Theta0, the mutation rate can be set independently. +# This should be used together with the Sequence length option. # Default: None Mutation rate : -# Recombination rate +# Recombination rate. Required for momentsLD engine. # Default: None Recombination rate : -# Time (years) for one generation. Can be float. -# Is important for drawing models. If one don't want to draw, -# one can pass it. +# Time (in years) for one generation. Can be a float. +# This is important for drawing models. If you do not wish to draw, +# you may skip this parameter. # Default: 1.0 Time for generation : - #!!! -# One should choose the demographic history to infer. -# It can be custom or setted up with structure. +# You must choose the demographic history to infer. +# It can be custom or set up with structure. # 1. Using a custom demographic model. -# Please specify a file with a function named 'model_func' in it. -# So file should contain: -# def model_func(params, ns, pts) in case of dadi +# Please specify a file containing a function named 'model_func'. +# The file should contain: +# def model_func(params, ns, pts) for Dadi # or -# def model_func(params, ns) in case of moments +# def model_func(params, ns) for moments # Default: None Custom filename : -# Now one should specify either bounds or identifications -# of custom model's parameters. All values are in Nref units. -# Lower and upper bounds - lists of numbers. -# List of usual bounds: +# You must now specify either bounds or identifiers for the +# parameters of the custom model. All values are in genetic units. +# Lower and upper bounds are lists of numbers. +# Common bounds: # N: 1e-2 - 100 # T: 0 - 5 # m: 0 - 10 # s: 0 - 1 -# These bounds will be taken automatically if identifications are set. +# These bounds will be applied automatically if identifiers are set. # Default: None Lower bound : Upper bound : + # An identifier list: # T - time # N - size of population # m - migration -# s - split event, proportion in which population size -# is divided to form two new populations. +# s - split event, representing the proportion of population size +# divided to form two new populations. # Default: None Parameter identifiers : -# 2. Structure is for not custom models! -# Structure of model for one population - number of time periods -# (e.g. 5). -# Structure of model for two populations - number of time periods -# before the split of the ancestral population and after it (e.g. 2,2). -# Structure of model for three populations - number of time periods -# before first split, between first and second splits and after -# second split (e.g. 2,1,2). +# 2. Structure is for non-custom models! +# Structure of the model for one population - number of time periods +# (e.g., 5). +# Structure of the model for two populations - number of time periods +# before the split of the ancestral population and after it (e.g., 2,2). +# Structure of the model for three populations - number of time periods +# before the first split, between the first and second splits, and after +# the second split (e.g., 2,1,2). # -# Structure of initial model: -# Default: all is ones - 1 or 1,1 or 1,1,1 +# Initial model structure: +# Default: all ones - 1 or 1,1 or 1,1,1 Initial structure : -# Structure of final model: -# Default: equals to initial structure +# Final model structure: +# Default: equals the initial structure Final structure : #!!! -# Additional settings for demographic model with structure. +# Additional settings for demographic models with structure. # -# Use sudden changes of population sizes only. Decreases +# Use sudden changes in population sizes only. This reduces # the number of parameters. # Default: False Only sudden : -# The set of available size dynamics could be set to any subset. -# Sud is for sudden size change (constant during the next time epoch), -# Lin - linear size change, -# Exp - exponential size change. -# If `Only sudden` is True then this setting will be [Sud]. +# The set of available size dynamics can be any subset. +# 'Sud' stands for sudden size change (constant during the next time epoch), +# 'Lin' for linear size change, +# 'Exp' for exponential size change. +# If 'Only sudden' is True, then this setting will be [Sud]. # Default: [Sud, Lin, Exp] Dynamics: @@ -189,51 +188,51 @@ Dynamics: # Default: False No migrations : -# Makes all migrations symmetrical. +# Make all migrations symmetrical. # Default: False Symmetric migrations : -# Enable/disable migrations selectively. +# Enable or disable migrations selectively. # Default: None Migration masks : -# Enable/disable inference of selection coefficients. -# Is supported by moments and dadi engines only. +# Enable or disable inference of selection coefficients. +# Supported only by moments and Dadi engines. # Default: False Selection: -# Enables/disables inference of dominance coefficient. -# If True then setting `Selection` must be aldo set to True. -# Is supported by moments and dadi engines only. +# Enable or disable inference of the dominance coefficient. +# If True, then the 'Selection' setting must also be True. +# Supported only by moments and Dadi engines. # Default: False Dominance: -# Estimate fraction of ancestral population as parameter of split. -# If False then population splits and each of new populations -# has its own size as parameter of the model. +# Estimate the fraction of the ancestral population as a parameter of the +# split. If False, then the population splits and each of the new +# populations has its own size as a parameter of the model. # Default: False Split fractions : # Estimate inbreeding coefficients as model parameters. -# Can be used only for dadi engine. +# Can be used only with the Dadi engine. # Default: False Inbreeding : -# If False then multinomial approach is used in dadi and moments. -# In multinomial approach ancestral size is inferred implicitly. +# If False, then a multinomial approach is used in Dadi and moments. +# In the multinomial approach, the ancestral size is inferred implicitly. # Default: False Ancestral size as parameter : -# It is possible to limit the time of splits by bounds' specification. +# It is possible to limit the time of splits by specifying bounds. # Split 1 is the most ancient split. -# !Note that time is in generations: -# e.g. we want to limit by 150 kya, time for one generation is -# 25 years, then bound will be 150000 / 25 = 6000. +# !Note that time is measured in generations: +# e.g., to limit by 150 kya, if time for one generation is +# 25 years, then the bound will be 150000 / 25 = 6000. # -# Lower bound for split 1 (in case of 2 or 3 populations). +# Lower bound for split 1 (for 2 or 3 populations). # Default: None Lower bound of first split : -# + # Upper bound for split 1 (in case of 2 or 3 populations). # Default: None Upper bound of first split : @@ -241,37 +240,34 @@ Upper bound of first split : # Lower bound for split 2 (in case of 3 populations). # Default: None Lower bound of second split : -# + # Upper bound for split 2 (in case of 3 populations). # Default: None Upper bound of second split : - - - #!!! # Local optimization. # -# Choice of local optimization, that is launched after +# Choice of local optimization that is launched after # each genetic algorithm. # Choices: # # * optimize (BFGS method) # -# * optimiza_log (BFGS method) +# * optimize_log (BFGS method) # # * optimize_powell (Powell’s conjugate direction method) -# (Note: is implemented in moments: one need to have moments +# (Note: implemented in moments; one needs to have moments # installed.) # -# (If optimizations are often hitting the parameter bounds, +# (If optimizations often hit the parameter bounds, # try using these methods:) # * optimize_lbfgsb # * optimize_log_lbfgsb -# (Note that it is probably best to start with the vanilla BFGS -# methods, because the L-BFGS-B methods will always try parameter -# values at the bounds during the search. -# This can dramatically slow model fitting.) +# (Note that it is generally best to start with the vanilla BFGS +# methods, as the L-BFGS-B methods will always test parameter +# values at the bounds during the search. This can dramatically +# slow down model fitting.) # # * optimize_log_fmin (simplex (a.k.a. amoeba) method) # @@ -280,69 +276,64 @@ Upper bound of second split : # Default: optimize_powell Local optimizer : - - -# Parameters of pipeline +# Parameters of the pipeline. # # One can automatically generate dadi and moments code for models. -# If 0 then only current best model will be printed in GA's -# working directory. -# Also the result model will be saved there. -# If specified (not 0) then every N iteration model will be saved -# in python code folder. +# If set to 0, only the current best model will be printed in the GA's +# working directory. The resulting model will also be saved there. +# If specified (not 0), then every N iterations, the model will be saved +# in the Python code folder. # Default: 0 Print models' code every N iteration : -# Engine that will draw demographic model plots. -# Could be moments or demes. -# Default: moments +# Engine that will draw demographic model plots. +# Can be moments or demes. +# Default: moments Model plot engine : -# One can automatically draw models every N iteration. -# If 0 then never. -# Pictures are saved in GA's directory in the picture folder. +# One can automatically draw models every N iterations. +# If set to 0, models will never be drawn. +# Pictures are saved in the GA's directory in the picture folder. # Default: 0 Draw models every N iteration : -# One can choose time units in models' plots: years or thousand -# years (kya, KYA). If time for one generation isn't specified -# then time is in genetic units. +# One can choose time units in model plots: years or thousands +# of years (kya, KYA). If time for one generation is not specified, +# time will be represented in genetic units. # Default: years Units of time in drawing : -# Minimum value that will be drawn in SFS plots. -# Default: 1 +# Minimum value to be drawn in SFS plots. +# Default: 1 Vmin : -# No std output. +# Suppresses standard output. # Default: False Silence : -# Verbosity of optimizations output. -# Default: 1 +# Verbosity level for optimization output. +# Default: 1 Verbose : -# How many times launch GADMA with this parameters. +# How many times to launch GADMA with these parameters. # Default: 1 Number of repeats : -# How many processes to use for this repeats. -# Note that one repeat isn't parallelized, so increasing number +# How many processes to use for these repeats. +# Note that one repeat is not parallelized, so increasing the number # of processes will not affect the time of one repeat. -# It is desirable that the number of repeats is a multiple of +# It is advisable that the number of repeats is a multiple of # the number of processes. # Default: 1 Number of processes : - - -# One can resume from some other launch of GADMA by setting -# output directory of it to 'Resume from' parameter. -# You can set again new parameters of resumed launch. +# One can resume from a previous GADMA run by setting +# the output directory of that run in the 'Resume from' parameter. +# New parameters for the resumed run can be set again. Resume from : -# -# If you want to take only models from previous run set this -# flag. Then iterations of GA will start from 0 and values of -# mutation rate and strength will be initial. + +# If you want to only take models from the previous run, set this +# flag. Iterations of the GA will then start from 0, and values for +# the mutation rate and strength will be initial. # Default: None Only models :