Skip to content

Commit

Permalink
Add code for conditioinal gaussian imputation; add note on paper resu…
Browse files Browse the repository at this point in the history
…lts in readme
  • Loading branch information
junmingguan committed Dec 3, 2024
1 parent 0164286 commit 8bb7398
Show file tree
Hide file tree
Showing 3 changed files with 405 additions and 1 deletion.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,7 @@ it in the container. To install snipar, follow these steps:
# Running tests
To check that the code is working properly and that the C modules have been compiled, you can run the tests using this command:

python -m unittest snipar.tests
python -m unittest snipar.tests

# Code for reproducing results
See paper_results/ for code to reproduce results in https://www.biorxiv.org/content/10.1101/2022.10.24.513611v1
36 changes: 36 additions & 0 deletions paper_results/simulations/analyze_imputation_from_cousins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline
from sklearn.model_selection import LeaveOneOut
import pandas as pd


def plot(alpha, alpha_cov, non_sampling=False):
s = np.zeros(alpha.shape[0])
loo = LeaveOneOut()
loo.get_n_splits(alpha)
for i, (index, _) in enumerate(loo.split(alpha)):
if alpha_cov.ndim == 1:
if not non_sampling:
s[i] = np.var(alpha[index, 0] / np.sqrt(alpha_cov)[index]) #/ (np.var(alpha_max[index, 0] / np.sqrt(alpha_cov_max)[index,0,0]))
else:
s[i] = np.mean(alpha[index,0] ** 2 - alpha_cov[index]) #/ (np.mean(alpha_max[index,0] ** 2) - np.mean(alpha_cov_max[index,0,0]))
else:
if not non_sampling:
s[i] = np.var(alpha[index, 0] / np.sqrt(np.diagonal(alpha_cov, axis1=1, axis2=2))[index,0]) #/ (np.var(alpha_max[index, 0] / np.sqrt(alpha_cov_max)[index,0,0]))
else:
s[i] = (np.mean(alpha[index,0] ** 2) - np.mean(alpha_cov[index,0,0])) #/(np.mean(alpha_max[index,0] ** 2) - np.mean(alpha_cov_max[index,0,0]))
mean = np.mean(s)
std = np.sqrt(np.sum(np.power(mean - s, 2)) * (s.shape[0] - 1) / s.shape[0])
return mean, std

df = []
for F in [0, 0.001, 0.01, 0.1]:
x = np.load(f'impute_cond_gau_results/3000SNPs_5000cousinpairs_0.5h2pop_{F}Fst.npz')
m, std, = plot(x['alpha'], x['cov'])
df.append(pd.DataFrame({'Fst': [F],
'method': ['cond. Gaussian'],
'non_sampling_var': m, 'non_sampling_var_std': std}))
plot(x['alpha'], x['cov'], True)

pd.concat(df).to_csv('impute_cond_gau_results/sp_non_sampling_var.csv', index=False, sep='\t')
Loading

0 comments on commit 8bb7398

Please sign in to comment.