diff --git a/docs/output files.rst b/docs/output files.rst index 07d541c5..4be43dea 100644 --- a/docs/output files.rst +++ b/docs/output files.rst @@ -240,7 +240,7 @@ The output includes the estimated intercept and covariate coefficients for the r effect estimates. This file has three columns: the first gives the name of the regression coefficient -(i.e. direct_effect, population_effect, etc.), the second gives the corresponding regression coefficient, +(i.e. proband, for proband PGS; parental, for parental PGS; etc.), the second gives the corresponding regression coefficient, and the third gives the standard error. For example, in the two-generation analysis, if your PGS file has columns proband, paternal, maternal, then the effects file contains the estimate of the direct effect diff --git a/docs/simulation.rst b/docs/simulation.rst index 0342011c..c5ac3715 100644 --- a/docs/simulation.rst +++ b/docs/simulation.rst @@ -68,8 +68,12 @@ To estimate direct effect and average NTC of the PGS, use the following command: ``pgs.py direct --pgs direct.pgs.txt --phenofile phenotype.txt`` This will output a population effect estimate (1 generation model) to direct.1.effects.txt, and -direct effect and average NTC estimates to (2 generation model) to direct.2.effects.txt. The -sampling variance-covariance matrix of the estimates is output to direct.1.vcov.txt (for the 1 generation model) and +direct effect and average NTC estimates to (2 generation model) to direct.2.effects.txt. The +population and direct effect estimates are the coefficients on the proband PGS in the 1 and 2 +generation models, so are indicated by the 'proband' row. The average NTC estimate is the +coefficient on the parental PGS in the two-generation model. The first column gives the name +of the covariate/PGS column, the second column gives the estimated regression coefficient, +and the third column gives the standard error of the estimate. The sampling variance-covariance matrix of the estimates is output to direct.1.vcov.txt (for the 1 generation model) and direct.2.vcov.txt (for the 2 generation model). As we are using the true direct effects as weights, the PGS captures all of the heritability, diff --git a/snipar/pgs.py b/snipar/pgs.py index ffab25ef..6d18d972 100644 --- a/snipar/pgs.py +++ b/snipar/pgs.py @@ -585,15 +585,15 @@ def fit_pgs_model(y, pg, ngen, ibdrel_path=None, covariates=None, fit_sib=False, ## Fit model if ngen==1: print('Fitting 1 generation model (proband only)') - alpha, alpha_cols = make_and_fit_model(y, pg, ['population_effect'], ibdrel_path=ibdrel_path, covariates=covariates, sparse_thresh=sparse_thresh) + alpha, alpha_cols = make_and_fit_model(y, pg, ['proband'], ibdrel_path=ibdrel_path, covariates=covariates, sparse_thresh=sparse_thresh) elif ngen==2 or ngen==3: if fit_sib: if 'sib' in pg.sid: - pg_cols = ['direct_effect','sib_indirect'] + pg_cols = ['proband','sibling'] else: raise(ValueError('Sibling PGS not found (use --fit_sib when calculating PGS)')) else: - pg_cols = ['direct_effect'] + pg_cols = ['proband'] if parsum: if 'maternal' in pg.sid and 'paternal' in pg.sid: parcols = np.sort(np.array([np.where(pg.sid=='maternal')[0][0],np.where(pg.sid=='paternal')[0][0]])) @@ -607,9 +607,9 @@ def fit_pgs_model(y, pg, ngen, ibdrel_path=None, covariates=None, fit_sib=False, pass else: raise(ValueError('Maternal and paternal PGS not found so cannot sum (--parsum option given)')) - pg_cols.append('avg_parental_NTC') + pg_cols.append('parental') else: - pg_cols += ['paternal_NTC','maternal_NTC'] + pg_cols += ['paternal','maternal'] if ngen==2: print('Fitting 2 generation model (proband and observed/imputed parents)') alpha, alpha_cols = make_and_fit_model(y, pg, pg_cols, ibdrel_path=ibdrel_path, covariates=covariates, sparse_thresh=sparse_thresh) diff --git a/snipar/scripts/simulate.py b/snipar/scripts/simulate.py index 7a2ebfc1..49d6f462 100644 --- a/snipar/scripts/simulate.py +++ b/snipar/scripts/simulate.py @@ -1,4 +1,14 @@ #!/usr/bin/env python +"""Simulates genotype-phenotype data using forward simulation. Phenotypes can be affected +by direct genetic effects, indirect genetic effects (vertical transmission), and assortative mating. + +Args: +@parser@ + +Results: + genotype data in .bed format; full pedigree including phenotype and genetic components for all generations + +""" import numpy as np import h5py, argparse from snipar.ibd import write_segs_from_matrix @@ -30,7 +40,7 @@ parser.add_argument('--save_par_gts',action='store_true',help='Save the genotypes of the parents of the final generation',default=False) parser.add_argument('--impute',action='store_true',help='Impute parental genotypes from phased sibling genotypes & IBD',default=False) parser.add_argument('--unphased_impute',action='store_true',help='Impute parental genotypes from unphased sibling genotypes & IBD',default=False) - +__doc__ = __doc__.replace("@parser@", get_parser_doc(parser)) def main(args): """"Calling this function with args is equivalent to running this script from commandline with the same arguments. Args: