-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiment_1.py
81 lines (69 loc) · 2.9 KB
/
experiment_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from sklearn.naive_bayes import GaussianNB
from config import *
from tqdm import tqdm
import numpy as np
from detectors.DDM import DDM
from detectors.EDDM import EDDM
from detectors.ADWIN import ADWIN
from detectors.HDDM_AA import HDDM_AA
from detectors.HDDM_WW import HDDM_WW
from detectors.CDDE import CDDE
from detectors.meta import Meta
import problexity as px
# Establish base stream info
n_chunks = static['n_chunks']
chunk_size = static['chunk_size']
n_clusters_per_class = number_of_clusters[0]
n_detectors = 4
measures = np.array([getattr(px.classification, n)
for n in px.classification.__all__])
metric_mask = np.ones_like(measures).astype(bool)
metric_mask[4] = False
measures = measures[metric_mask]
# Prepare storage for complexities and time
detection_results = np.zeros((len(replications), len(dimensionalities), len(drift_types), n_detectors, n_chunks-1))
# Do the main loop
i = 0 # counter
for replication, random_state in enumerate(replications):
for n_features_id, n_features in enumerate(dimensionalities):
for drift_type_id, drift_type in enumerate(drift_types):
# Establish filename
filename = '%s_f%i_c%i_r%i' % (
drift_type, n_features,
n_clusters_per_class, replication
)
# Say the counter
print('stream %i - %s' % (i, filename))
i += 1
# Load the data
try:
data = np.load('/Volumes/T7/ComplexityStreams/%s.npz' % filename)
except:
print('NO FILE YET')
exit()
X, y = data['X'], data['y']
print(X.shape, y.shape)
# Define detectors
detectors = [
Meta(base_clf=GaussianNB(), detector=DDM()),
# Meta(base_clf=GaussianNB(), detector=EDDM()),
Meta(base_clf=GaussianNB(), detector=ADWIN()),
Meta(base_clf=GaussianNB(), detector=HDDM_AA()),
Meta(base_clf=GaussianNB(), detector=HDDM_WW()),
# Meta(base_clf=GaussianNB(), detector=CDDE(measures=measures))
]
# Iterate stream
for chunk_id in tqdm(range(n_chunks)):
a, b = chunk_id*chunk_size, (chunk_id+1)*chunk_size
_X, _y = X[a:b], y[a:b]
for detector in detectors:
# Test then train
try:
detector.predict(_X)
except:
pass
detector.partial_fit(_X, _y, [0,1])
for detector_id, detector in enumerate(detectors):
detection_results[replication, n_features_id, drift_type_id, detector_id] = detector.detector.drift
# Store results
np.save('results/exp_comparison', detection_results)