From ddba704d6068816eaf382b0a961877093c431d4d Mon Sep 17 00:00:00 2001
From: Thibaut Louis <thbtlouis@gmail.com>
Date: Mon, 4 Dec 2023 07:23:52 +0100
Subject: [PATCH] new v

---
 .../python/null_tests/compute_null_tests.py   | 267 ++++++++++++++++++
 .../python/null_tests/null_infos.py           |  27 +-
 .../null_tests/{ => old}/array_null_tests.py  |   0
 3 files changed, 292 insertions(+), 2 deletions(-)
 create mode 100644 project/data_analysis/python/null_tests/compute_null_tests.py
 rename project/data_analysis/python/null_tests/{ => old}/array_null_tests.py (100%)

diff --git a/project/data_analysis/python/null_tests/compute_null_tests.py b/project/data_analysis/python/null_tests/compute_null_tests.py
new file mode 100644
index 00000000..7a853901
--- /dev/null
+++ b/project/data_analysis/python/null_tests/compute_null_tests.py
@@ -0,0 +1,267 @@
+"""
+This script performs null tests
+and plot residual power spectra and a summary PTE distribution
+"""
+
+from pspy import so_dict, pspy_utils, so_cov, so_spectra
+from pspipe_utils import consistency, best_fits, covariance, pspipe_list
+import pickle
+import scipy.stats as ss
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+from null_infos import *
+
+def get_lmin_lmax(null, multipole_range):
+    """
+    compute the lmin and lmax associated to a given null test
+    """
+    m, ar1, ar2, ar3, ar4 = null
+    m0, m1 = m[0], m[1]
+
+    lmin0, lmax0 = multipole_range[ar1][m0]
+    lmin1, lmax1 = multipole_range[ar2][m1]
+    ps12_lmin = max(lmin0, lmin1)
+    ps12_lmax = min(lmax0, lmax1)
+
+    lmin2, lmax2 = multipole_range[ar3][m0]
+    lmin3, lmax3 = multipole_range[ar4][m1]
+    ps34_lmin = max(lmin2, lmin3)
+    ps34_lmax = min(lmax2, lmax3)
+
+    lmin = max(ps12_lmin, ps34_lmin)
+    lmax = min(ps12_lmax, ps34_lmax)
+    
+    return lmin, lmax
+
+def pte_histo(pte_list, file_name, n_bins):
+    n_samples = len(pte_list)
+    bins = np.linspace(0, 1, n_bins + 1)
+    
+    min, max = np.min(pte_list), np.max(pte_list)
+
+
+    plt.figure(figsize=(8,6))
+    plt.xlabel(r"Probability to exceed (PTE)")
+    plt.hist(pte_list, bins=bins, label=f"n tests: {n_samples}, min: {min:.4f}, max: {max:.4f}")
+    plt.axhline(n_samples/n_bins, color="k", ls="--")
+    plt.tight_layout()
+    plt.legend()
+    plt.savefig(f"{file_name}", dpi=300)
+    plt.clf()
+    plt.close()
+
+d = so_dict.so_dict()
+d.read_from_file(sys.argv[1])
+
+pte_threshold = 0.01
+remove_first_bin = True
+
+cov_dir = "covariances"
+null_test_dir = "null_test"
+
+spectra = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
+
+pspy_utils.create_directory(null_test_dir)
+pspy_utils.create_directory(plot_dir)
+
+test_list = [{"name":  "spectra_corrected+mc_cov+beam_cov+leakage_cov",
+              "spec_dir": "spectra_corrected",
+              "cov_correction": ["mc_cov", "beam_cov","leakage_cov"]}]
+               
+
+#test_list += [{"name": "spectra_with_analytic_cov",
+#               "spec_dir": "spectra",
+#               "cov_correction": []}]
+
+# we start by making list of all the elements we need to load in memory
+# for this we loop on the differnet test and identify which elements are required for them
+spec_dir_list = []
+cov_type_list = ["analytic_cov"]
+label_list = []
+for test in test_list:
+    spec_dir_list = np.append(spec_dir_list, test["spec_dir"])
+    cov_type_list = np.append(cov_type_list, test["cov_correction"])
+    label_list += [test["name"]]
+
+cov_type_list = list(dict.fromkeys(cov_type_list)) #remove doublon
+spec_dir_list = list(dict.fromkeys(spec_dir_list)) #remove doublon
+map_set_list = pspipe_list.get_map_set_list(d)
+
+# we now read all covariances, note that the function call is a bit annoying
+# this is because the function is supposed to load spectra and cov at the same time
+
+all_cov = {}
+_ps_temp = "spectra" + "/Dl_{}x{}_cross.dat"
+for cov in cov_type_list:
+    cov_template = f"{cov_dir}/{cov}" + "_{}x{}_{}x{}.npy"
+    _, all_cov[cov] =  consistency.get_ps_and_cov_dict(map_set_list, _ps_temp, cov_template, spectra_order=spectra)
+    
+# now I'm reading all spectra
+
+all_ps = {}
+for spec_dir in spec_dir_list:
+    ps_template = spec_dir + "/Dl_{}x{}_cross.dat"
+    all_ps[spec_dir], _ = consistency.get_ps_and_cov_dict(map_set_list, ps_template, cov_template, spectra_order=spectra)
+    lb = all_ps[spec_dir]["ell"]
+
+
+# Load foreground best fits
+fg_file_name = "best_fits/fg_{}x{}.dat"
+l_fg, fg_dict = best_fits.fg_dict_from_files(fg_file_name, map_set_list, d["lmax"], spectra=spectra)
+
+# Define PTE dict
+
+pte_dict = {}
+for label in label_list:
+    pte_dict[label, "all"] = []
+    for spec in tested_spectra:
+        pte_dict[label, spec] = []
+    
+operations = {"diff": "ab-cd"}
+
+null_list = pspipe_list.get_null_list(d, spectra=tested_spectra) #skip_EB
+
+
+print(label_list)
+
+
+
+
+for null in null_list:
+
+        
+    mode, ms1, ms2, ms3, ms4 = null
+    lmin, lmax = get_lmin_lmax(null, multipole_range)
+
+    res_ps, res_cov = {}, {}
+
+    # same comment as before in the current implementation residual ps and cov
+    # are read at the same time while for us we prefer to read ps and cov separately, so the ugly hack
+    for spec_dir in spec_dir_list:
+        lb, res_ps[spec_dir], _,  = consistency.compare_spectra([ms1, ms2, ms3, ms4],
+                                                                "ab-cd",
+                                                                all_ps[spec_dir],
+                                                                all_cov["analytic_cov"],
+                                                                mode = mode,
+                                                                return_chi2=False)
+        if remove_first_bin:
+            res_ps[spec_dir] = res_ps[spec_dir][1:]
+            
+            
+    for cov in cov_type_list:
+        lb, _, res_cov[cov] = consistency.compare_spectra([ms1, ms2, ms3, ms4],
+                                                          "ab-cd",
+                                                          all_ps[spec_dir_list[0]],
+                                                          all_cov[cov],
+                                                          mode = mode,
+                                                          return_chi2=False)
+        if remove_first_bin:
+            lb, res_cov[cov] = lb[1:], res_cov[cov][1:,1:]
+            
+
+    res_fg = fg_dict[ms1, ms2][mode] - fg_dict[ms3, ms4][mode]
+    lb_fg, res_fg_b = pspy_utils.naive_binning(l_fg, res_fg, d["binning_file"], d["lmax"])
+    
+    if remove_first_bin:
+        lb_fg, res_fg_b = lb_fg[1:], res_fg_b[1:]
+
+    
+    res_ps_dict = {}
+    res_cov_dict = {}
+    
+    fname = f"diff_{mode}_{ms1}x{ms2}_{ms3}x{ms4}"
+
+    for test in test_list:
+        spec_dir = test["spec_dir"]
+        cov_correction = test["cov_correction"]
+        label = test["name"]
+
+
+        res_ps_dict[label] = res_ps[spec_dir]
+        res_cov_dict[label] = res_cov["analytic_cov"]
+        
+        if "mc_cov" in cov_correction:
+            res_cov_dict[label] = covariance.correct_analytical_cov(res_cov_dict[label],
+                                                                    res_cov["mc_cov"],
+                                                                    only_diag_corrections=True)
+        if "beam_cov" in cov_correction:
+            res_cov_dict[label] += res_cov["beam_cov"]
+            
+        if "leakage_cov" in cov_correction:
+            res_cov_dict[label] += res_cov["leakage_cov"]
+            
+        if fudge:
+            ind = np.diag_indices_from(res_cov_dict[label])
+            print("WARNING FUDGE FACTOR")
+            fudge_error = 1.02
+            res_cov_dict[label][ind] *= fudge_error ** 2
+        
+        sigma = np.sqrt(np.diagonal(res_cov_dict[label]))
+        
+
+        np.save(f"{null_test_dir}/ps_{label}_{fname}.npy", res_cov_dict[label] )
+        np.savetxt(f"{null_test_dir}/cov_{label}_{fname}.npy", np.transpose([lb, res_ps_dict[label], sigma]))
+
+    
+    # Plot residual and get chi2
+    lrange = np.where((lb >= lmin) & (lb <= lmax))[0]
+    plot_title = f"{ms1}x{ms2} - {ms3}x{ms4}"
+    expected_res = 0.
+    
+    
+    ylims = y_lims[mode]
+    if "pa4" in fname:
+        ylims = (y_lims[mode][0] * 10,  y_lims[mode][1] * 10)
+
+    chi2_dict = consistency.plot_residual(lb, res_ps_dict, res_cov_dict, mode=mode,
+                                          title=plot_title.replace("dr6_", ""),
+                                          file_name=f"{plot_dir}/{fname}",
+                                          expected_res=expected_res,
+                                          lrange=lrange,
+                                          overplot_theory_lines=(lb_fg, res_fg_b),
+                                          l_pow=l_pows[mode],
+                                          return_chi2=True,
+                                          ylims=ylims)
+                                          
+                 
+
+    if skip_pa4_pol == True:
+        if ("pa4" in fname) & (mode != "TT"):
+            continue
+        
+    if (ms1 == ms2) & (ms3 == ms4) & (mode in ["ET", "BT", "BE"]) :
+        print("")
+        print(f"skip {ms1}x{ms2}- {ms3}x{ms4} {mode} since it's a doublon of {mode[::-1]}")
+        print("")
+
+        continue
+    else:
+        # Fill pte_dict
+        for label in label_list:
+            pte = 1-ss.chi2(chi2_dict[label]["ndof"]).cdf(chi2_dict[label]["chi2"])
+            pte_dict[label, mode].append(pte)
+            pte_dict[label, "all"].append(pte)
+
+            if (pte <= pte_threshold) or (pte >= 1-pte_threshold):
+                print(f"[{label}] [{plot_title} {mode}] PTE = {pte:.04f}")
+
+
+
+# Save pte to pickle
+pickle.dump(pte_dict, open(f"{plot_dir}/pte_dict.pkl", "wb"))
+
+# Plot PTE histogram for each cov type
+for label in label_list:
+    pte_list = pte_dict[label, "all"]
+    print(f"{label} Worse PTE: min {np.min(pte_list)}, max {np.max(pte_list)}")
+    n_bins = 10
+    file_name = f"{plot_dir}/pte_hist_all_{label}{hist_label}.png"
+    pte_histo(pte_list, file_name, n_bins)
+    for mode in tested_spectra:
+        pte_list = pte_dict[label, mode]
+        print(f"{len(pte_list)} test for mode {mode}")
+        n_bins = 8
+        file_name = f"{plot_dir}/pte_hist_{mode}_{label}{hist_label}.png"
+        pte_histo(pte_list, file_name, n_bins)
+
diff --git a/project/data_analysis/python/null_tests/null_infos.py b/project/data_analysis/python/null_tests/null_infos.py
index 4cb7d22f..5726b4de 100644
--- a/project/data_analysis/python/null_tests/null_infos.py
+++ b/project/data_analysis/python/null_tests/null_infos.py
@@ -1,5 +1,28 @@
 
 # Define the multipole range
+
+skip_EB = True
+fudge = False
+skip_pa4_pol = True
+plot_dir = "plots/array_nulls"
+
+spectra = ["TT", "TE", "TB", "ET", "BT", "EE", "EB", "BE", "BB"]
+
+if skip_EB == True:
+    tested_spectra = ["TT", "TE", "ET", "TB", "BT", "EE", "BB"]
+    plot_dir = "plots/array_nulls_skip_EB"
+else:
+    tested_spectra = spectra
+    plot_dir = "plots/array_nulls"
+
+
+
+hist_label = ""
+
+if skip_pa4_pol == True:
+    hist_label = "skip_pa4pol"
+
+
 multipole_range = {
     "dr6_pa4_f150": {
         "T": [1250, 8500],
@@ -12,7 +35,7 @@
         "B": [500, 8500]
     },
     "dr6_pa5_f090": {
-        "T": [800, 8500],
+        "T": [1000, 8500],
         "E": [500, 8500],
         "B": [500, 8500]
     },
@@ -22,7 +45,7 @@
         "B": [500, 8500]
     },
     "dr6_pa6_f090": {
-        "T": [600, 8500],
+        "T": [1000, 8500],
         "E": [500, 8500],
         "B": [500, 8500]
     },
diff --git a/project/data_analysis/python/null_tests/array_null_tests.py b/project/data_analysis/python/null_tests/old/array_null_tests.py
similarity index 100%
rename from project/data_analysis/python/null_tests/array_null_tests.py
rename to project/data_analysis/python/null_tests/old/array_null_tests.py