Skip to content

Commit

Permalink
documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
zatkins2 committed Dec 6, 2023
1 parent a944ac5 commit ce0374d
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
"Connected to cmb-della8 (Python 3.8.3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by evaluating a KS test for the raw PTE distributions. We expect that by not accounting for correlations induced by the linear dependence of null tests, these KS test may deliver low PTE values."
]
},
{
"cell_type": "code",
"execution_count": 9,
Expand All @@ -23,6 +30,27 @@
"from random import sample, seed\n",
"\n",
"def get_null_list(surveys, arrays, spectra):\n",
" \"\"\"For a given set of surveys, arrays, and polarization cross spectra,\n",
" form all the possible null tests. A null test is a combination of a \n",
" pol cross spectra identifier, the two maps labeling the first leg of \n",
" the null test, and the two maps labeling the second leg of the null test.\n",
"\n",
" Parameters\n",
" ----------\n",
" surveys : list\n",
" All the surveys.\n",
" arrays : list\n",
" All the arrays for each survey\n",
" spectra : list\n",
" All the pol cross spectra for each array of each survey.\n",
"\n",
" Returns\n",
" -------\n",
" list of list\n",
" All the possible null tests, e.g one of which may be:\n",
" \n",
" ['TE', ('pa4_f220_el1', 'pa4_f220_el1'), ('pa4_f220_el1', 'pa4_f220_el2')]\n",
" \"\"\"\n",
" map_set_list = []\n",
" for sv in surveys:\n",
" for ar in arrays:\n",
Expand All @@ -37,6 +65,18 @@
" return null_list\n",
"\n",
"def pte_histo(pte_list, null_test, n_bins):\n",
" \"\"\"Plot the PTE histogram for null test PTEs from a list of\n",
" PTEs. \n",
"\n",
" Parameters\n",
" ----------\n",
" pte_list : list\n",
" PTE values of all the null tests.\n",
" null_test : str\n",
" Description of the null tests (will be plot title).\n",
" n_bins : int\n",
" Number of bins in the histogram.\n",
" \"\"\"\n",
" n_samples = len(pte_list)\n",
"\n",
" bins = np.linspace(0, 1, n_bins + 1)\n",
Expand Down Expand Up @@ -79,6 +119,10 @@
}
],
"source": [
"# First let's get the \"naive\" null test results: this is where we take all the raw results,\n",
"# and get their joint histogram. We will evaluate a single KS test on this single set of PTE\n",
"# values.\n",
"\n",
"null_test = \"pwv\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
Expand All @@ -102,8 +146,12 @@
" n_bins = 20\n",
"\n",
"pte_list = []\n",
"\n",
"# We look at arrays individually\n",
"for t_ar in test_arrays:\n",
" null_list = get_null_list(surveys, t_ar, spectra)\n",
"\n",
" # For a given null, we evaluate whether to include it\n",
" for null in null_list:\n",
" mode, (ms1, ms2), (ms3, ms4) = null\n",
"\n",
Expand All @@ -112,13 +160,17 @@
" else:\n",
" my_ar = t_ar[0][:8]\n",
"\n",
" # Skip if exactly equal to another null test\n",
" if (ms1 == ms2) & (ms3 == ms4) & (mode in [\"ET\", \"BT\", \"BE\"]) :\n",
" # print(f\"skip {ms1}x{ms2}-{ms3}x{ms4} {mode} since it's a doublon of {mode[::-1]}\")\n",
" continue\n",
"\n",
" # Skip pa4 pol\n",
" if (my_ar == \"pa4_f220\" ) & (mode != \"TT\"):\n",
" continue\n",
"\n",
" res_dict = pickle.load(open(f\"/scratch/gpfs/zatkins/data/simonsobs/PSpipe/project/dr6v4_unblinding/nulls/{null_test}/null_test_{my_ar}/diff_{mode}_{ms1}x{ms2}_{ms3}x{ms4}.pkl\", \"rb\"))\n",
" # Otherwise, grab its PTE and plot PTEs of all the valid nulls\n",
" res_dict = pickle.load(open(f\"/global/cfs/cdirs/act/data/tlouis/dr6v4/nulls/{null_test}/null_test_{my_ar}/diff_{mode}_{ms1}x{ms2}_{ms3}x{ms4}.pkl\", \"rb\"))\n",
"\n",
" name = res_dict[\"fname\"]\n",
" chi2 = res_dict[\"chi2\"]\n",
Expand Down Expand Up @@ -148,6 +200,8 @@
}
],
"source": [
"# Repeat above but for inout\n",
"\n",
"null_test = \"inout\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
Expand Down Expand Up @@ -217,6 +271,8 @@
}
],
"source": [
"# Repeat above but for elevation\n",
"\n",
"null_test = \"elevation\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
Expand Down Expand Up @@ -267,6 +323,15 @@
"pte_histo(pte_list, null_test, n_bins)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we repeat the above test, but rather than including all null tests in a single PTE distribution, we break up the null tests into separate sets linearly independent subsamples, and evaluate the PTE distribution of each subsample. This is motivated by the following observation (for example):\n",
"\n",
"Given 2 PWV splits, we can form 3 cross-pwv spectra: pwv1pwv1, pwv1pwv2, and pwv2pwv2. From 3 cross-pwv spectra, we can form 3 total null tests: pwv1pwv1 - pwv1pwv2, pwv1pwv2 - pwv2pwv2, pwv2pwv2 - pwv1pwv1. But when we include all 3 null tests, we have repeated information, since given any 2 of these null tests, the 3rd is a simple sum of the other 2. Thus, we can ask what happens when we only pick 2 null tests, which is the smallest number that contain the full information in the maps. There are 3 ways to do form these linearly independent subsamples, so we rerun the PTE distribution and KS test for each subsample."
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand All @@ -281,6 +346,10 @@
"from random import sample, seed\n",
"\n",
"def get_null_list(surveys, arrays, spectra):\n",
" \"\"\"Like the function before, but this time also return a list\n",
" of the possible cross-spectra that will be differenced to form\n",
" null tests.\n",
" \"\"\"\n",
" map_set_list = []\n",
" for sv in surveys:\n",
" for ar in arrays:\n",
Expand All @@ -297,6 +366,21 @@
" return null_list, crosspecs\n",
"\n",
"def get_best_null_sets(specs):\n",
" \"\"\"Given a list of N cross-spectra, form all the subsamples of N-1\n",
" pairs, i.e. null tests. In particular, return N-1 pairs such that\n",
" no leg of the pairs is repeated more than twice.\n",
"\n",
" Parameters\n",
" ----------\n",
" specs : list\n",
" List of spectra identifiers.\n",
"\n",
" Returns\n",
" -------\n",
" List of two-tuples\n",
" Each tuple contains two elements. Each element is a \n",
" pair of spectra, i.e. a null test.\n",
" \"\"\"\n",
" perms = list(permutations(specs, len(specs)))\n",
"\n",
" for s in perms:\n",
Expand All @@ -311,7 +395,10 @@
"\n",
" return best_sets\n",
"\n",
"def pte_histo(null_test, list_of_pte_lists, list_of_pte_titles, n_bins): \n",
"def pte_histo(null_test, list_of_pte_lists, list_of_pte_titles, n_bins):\n",
" \"\"\"Like before, but now we will make a plot for each subsample of null tests.\n",
" So, list_of_pte_lists contains the pte_list for each subsample, and list_of_pte_titles\n",
" contains the title for each subsample plot.\"\"\" \n",
" ks_test_list = []\n",
" for i, pte_list in enumerate(list_of_pte_lists):\n",
" plt.figure(figsize=(8,6))\n",
Expand Down Expand Up @@ -423,14 +510,19 @@
}
],
"source": [
"# Now let's get the \"subsampled\" null test results: for pwv, there are 3 total \n",
"# subsamples of null tests that contain all the map information. Each subsample\n",
"# contains 2 out of the 3 possible null tests. We will just plot the PTE distributions\n",
"# for all 3 subsamples.\n",
"\n",
"null_test = \"pwv\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
"if null_test == \"elevation\":\n",
" surveys = [\"el1\", \"el2\", \"el3\"]\n",
" test_arrays = [[\"pa4_f220\"], [\"pa5_f090\"], [\"pa5_f150\"], [\"pa6_f090\"], [\"pa6_f150\"]]\n",
" n_bins = 60\n",
" n_subsamp = 10\n",
" n_subsamp = 100\n",
"\n",
"if null_test == \"pwv\":\n",
" surveys = [\"pwv1\", \"pwv2\"]\n",
Expand All @@ -448,18 +540,29 @@
" n_bins = 20\n",
" n_subsamp = 3\n",
"\n",
"# important to initialize list_of_pte_lists this way so that each subsample pte_list\n",
"# has a different pointer!\n",
"list_of_pte_lists = [[] for i in range(n_subsamp)]\n",
"for t_ar in test_arrays:\n",
" null_list, crosspecs = get_null_list(surveys, t_ar, spectra)\n",
"\n",
" # get the minimal subsamples that do not repeat a leg more than twice\n",
" best_crosspecs = get_best_null_sets(crosspecs)\n",
" \n",
" seed(2) # same sample of best_crosspecs for each array\n",
" # if there are a lot of possible subsamples, choose only a few of them\n",
" seed(2) # same subsamples chosen for each array\n",
" crosspecs_samples = sample(best_crosspecs, n_subsamp)\n",
" print(crosspecs_samples)\n",
" print(crosspecs_samples) \n",
"\n",
" # do the following for each subsample of null tests\n",
" for i, crosspecs in enumerate(crosspecs_samples):\n",
"\n",
" # check each null if we want to include it\n",
" for null in null_list:\n",
" mode, (ms1, ms2), (ms3, ms4) = null\n",
"\n",
" # skip if the null is not in our subsample! note we also need to check the \n",
" # \"backwards\" version of the null too\n",
" if [(ms1, ms2), (ms3, ms4)] not in crosspecs and [(ms3, ms4), (ms1, ms2)] not in crosspecs:\n",
" # print([(ms1, ms2), (ms3, ms4)], 'not in', crosspecs)\n",
" continue\n",
Expand All @@ -469,6 +572,7 @@
" else:\n",
" my_ar = t_ar[0][:8]\n",
"\n",
" # same checks as before\n",
" if (ms1 == ms2) & (ms3 == ms4) & (mode in [\"ET\", \"BT\", \"BE\"]) :\n",
" # print(f\"skip {ms1}x{ms2} - {ms3}x{ms4} {mode} since it's a doublon of {mode[::-1]}\")\n",
" continue\n",
Expand Down Expand Up @@ -552,14 +656,16 @@
}
],
"source": [
"# repeat for inout\n",
"\n",
"null_test = \"inout\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
"if null_test == \"elevation\":\n",
" surveys = [\"el1\", \"el2\", \"el3\"]\n",
" test_arrays = [[\"pa4_f220\"], [\"pa5_f090\"], [\"pa5_f150\"], [\"pa6_f090\"], [\"pa6_f150\"]]\n",
" n_bins = 60\n",
" n_subsamp = 10\n",
" n_subsamp = 100\n",
"\n",
"if null_test == \"pwv\":\n",
" surveys = [\"pwv1\", \"pwv2\"]\n",
Expand Down Expand Up @@ -1845,6 +1951,9 @@
}
],
"source": [
"# repeat for elevation. note, unlike pwv and inout which had only 3 possible subsamples,\n",
"# elevation has a very large number. thus the \"n_subsamp\" variable becomes more important!\n",
"\n",
"null_test = \"elevation\"\n",
"spectra = [\"TT\", \"TE\", \"TB\", \"ET\", \"BT\", \"EE\", \"EB\", \"BE\", \"BB\"]\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,54 @@
from scipy import stats
from scipy.interpolate import interp1d

nbin = 39
# Our simulation setup is as follows. A given simulation involves 99 null tests,
# matching the number of pwv and inout null tests. This 99 is assumed to be
# composed of 33 independent test sets (a test set could be a separate array
# or polarization cross), each of which is composed of 3 pwv cross differences:
# pwv1pwv1 - pwv1pwv2, pwv1pwv2 - pwv2pwv2, pwv1pwv1 - pwv2pwv2. Each "spectrum"
# has 39 "ell bins.""
nsim = 20_000
ntest = 33
nbin = 39

sim_ptes = np.zeros((nsim, ntest, 3))

for sidx in range(nsim):
if sidx % 100 == 0: print(sidx)
if sidx % 100 == 0:
print(sidx)

for tidx in range(ntest):
p11 = np.random.randn(nbin)
p22 = np.random.randn(nbin)
p12 = np.random.randn(nbin)

# simulate the 3 pwv cross spectra as Gaussian r.v's
p11 = np.random.randn(nbin)
p22 = np.random.randn(nbin)
p12 = np.random.randn(nbin)

# form the 3 null test residuals. this step introduces correlations
# between null tests
res_a = p11 - p22
res_b = p12 - p22
res_c = p11 - p12

# get the chi2 of each null test, assuming diagonal over null tests
# and ell bins. The variance of a null test residual in a given ell
# bin is 2.
chi2_a = np.sum(res_a ** 2 / 2)
chi2_b = np.sum(res_b ** 2 / 2)
chi2_c = np.sum(res_c ** 2 / 2)

# Get the ptes of the 3 null tests
pte_a = 1 - stats.chi2.cdf(chi2_a, nbin)
pte_b = 1 - stats.chi2.cdf(chi2_b, nbin)
pte_c = 1 - stats.chi2.cdf(chi2_c, nbin)

# For each sim, we have 99 null tests; they are correlated along
# the last axis which holds the pwv residuals
sim_ptes[sidx,tidx,0] = pte_a
sim_ptes[sidx,tidx,1] = pte_b
sim_ptes[sidx,tidx,2] = pte_c

# Evaluate the kstest of the 99 null tests for each of our sims
ks_tests = np.zeros(nsim)
for sidx in range(nsim):
ks_tests[sidx] = stats.kstest(
Expand All @@ -38,5 +63,5 @@
ks_test_pwv = 0.018
ks_test_inout = 0.039

print(1 - cdf_interp(ks_test_pwv))
print(1 - cdf_interp(ks_test_pwv))
print(cdf_interp(ks_test_pwv))
print(cdf_interp(ks_test_pwv))

0 comments on commit ce0374d

Please sign in to comment.