-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
36 lines (30 loc) · 1.78 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
import pandas as pd
from scipy.stats import genextreme
from pykelihood.profiler import Profiler
logrange = np.logspace(np.log10(1 + 1e-2), np.log10(10000), 100)
def bootstrap_confidence_interval(y: pd.Series, evt_distri=genextreme, confidence=0.05,
metric=lambda x: x.ppf(1 - 1 / logrange)):
n_points = len(y)
shuffled_samples = np.array([y.sample(n_points, replace=True).values.flatten() for _ in range(500)])
met_values = np.array([metric(evt_distri(*evt_distri.fit(n))) for n in shuffled_samples])
lower_bound = np.quantile(met_values, confidence, axis=0)
upper_bound = np.quantile(met_values, 1 - confidence, axis=0)
return lower_bound, upper_bound
def parametric_confidence_interval(pykelihood_distribution_type, y, range_x, string_metric, x0):
string_ci = 'r' if string_metric == 'p' else 'p' # the one that we fix is not the one we compute CI for
print("Building distributions")
distributions = [pykelihood_distribution_type.fit_instance(y, x0=tuple(x), **{string_metric: n}) for n, x in
zip(range_x, x0)]
valid_distributions_and_indices = np.array(
[[1 / i, d] for i, d in zip(range_x, distributions) if np.isfinite(d.logpdf(y).sum())])
profilers = [Profiler(d, y, inference_confidence=0.95) for d in valid_distributions_and_indices[:, 1]]
print("Searching for interval bounds")
cis = []
for i, p in enumerate(profilers):
print(f"Confidence interval {i + 1}/{len(profilers)}")
cis.append(p.confidence_interval_bs(param=string_ci))
cis = np.array(cis)
lower, upper = cis[:, 0], cis[:, 1]
indices = valid_distributions_and_indices[:, 0]
return pd.Series(lower, index=indices).sort_values(), pd.Series(upper, index=indices).sort_values()