-
Notifications
You must be signed in to change notification settings - Fork 0
/
enviroment_generation.py
173 lines (117 loc) · 6.63 KB
/
enviroment_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from consts import state_variables
def quantiles_discretization(vector):
new_vector = vector.copy(deep=True)
# Change NaN values to 0 (no drug)
new_vector = new_vector.fillna(0)
# Calculate quantiles (only for the non-zero values!)
non_zero_values = new_vector[new_vector > 0]
q1 = non_zero_values.quantile(0.25)
q2 = non_zero_values.quantile(0.50)
q3 = non_zero_values.quantile(0.75)
new_vector = new_vector.astype("object")
new_vector.loc[vector == 0.0] = 0
new_vector[(vector > 0.0) & (vector < q1)] = 1
new_vector[(vector >= q1) & (vector < q2)] = 2
new_vector[(vector >= q2) & (vector < q3)] = 3
new_vector[(vector >= q3)] = 4
return new_vector, [q1, q2, q3]
def clustering_states(data, state_variables, num_clusters):
kmeans = KMeans(n_clusters=num_clusters, init='k-means++', n_init='auto').fit(data[state_variables].to_numpy())
return kmeans.labels_, kmeans.cluster_centers_
def add_states_and_rewards(data, state_clusters):
"""
Returns the given dataframe, including state, next_state and reward columns
In addition, drop patients with a trajectory of size 1
"""
data_copy = data.copy(deep=True)
# Add states to the dataframe
data_copy["state"] = state_clusters
# Drop patients that has only one row (meaning a patient with a trajectory of size 1)
value_counts = data_copy['patientunitstayid'].value_counts()
df_filtered = data_copy[data_copy['patientunitstayid'].map(value_counts) > 1].reset_index(drop=True)
# Make sure the dataframw is sorted by patient and time
df_filtered = df_filtered.sort_values(by=["patientunitstayid", "timestep_1h"]).reset_index(drop=True)
# Add next state column
# by group by 'patientunitstayid' and shift the 'state' column to get the previous state
# note that the last row for each patient will be next_state=np.NaN, because there is no state after it
df_filtered['next_state'] = df_filtered.groupby('patientunitstayid')['state'].shift(-1)
# Add two terminal states (for the last row for each patient)
num_states = df_filtered["state"].nunique()
ALIVE_STATE = num_states
EXPIRED_STATE = num_states + 1
mask = df_filtered['next_state'].isna() # Create a mask for NaN values in 'next_state'
df_filtered['next_state'] = np.where(mask & (df_filtered['unitdischargestatus'] == 'Alive'), ALIVE_STATE, df_filtered['next_state'])
df_filtered['next_state'] = np.where(mask & (df_filtered['unitdischargestatus'] == 'Expired'), EXPIRED_STATE, df_filtered['next_state'])
df_filtered['next_state'] = df_filtered['next_state'] .astype("int64")
# Add reward column
df_filtered["reward"] = np.zeros(df_filtered.shape[0])
df_filtered['reward'] = np.where(mask & (df_filtered['unitdischargestatus'] == 'Alive'), 100, df_filtered['reward'])
df_filtered['reward'] = np.where(mask & (df_filtered['unitdischargestatus'] == 'Expired'), -100, df_filtered['reward'])
df_filtered['reward'] = df_filtered['reward'] .astype("int64")
return df_filtered
def generate_test_set_states(test_set, state_centroids):
"""
Input: test_set: data frame of shape (n, features)
state_centroids: array of shape (num_clusters, state_dim)
Outputs: test_set_states - a vector of length n. Includes discrete state assigment for each test sample
"""
# Use pairwise_distances_argmin_min to find the closest cluster for each row in the test set
test_set_states, _ = pairwise_distances_argmin_min(test_set[state_variables], state_centroids)
return test_set_states
def create_transition_table(data, state_dim, action_dim):
"""
Note:
- Each action defines a 2D matrix T[a, :, :] which is describe the probability to move from each state to another by action a.
- T[a, :, :] should be a stochastic matrix, but some transitions do not exisit in the data, so there are some rows which contains only 0's (and hence doesnt sum up to 1).
"""
T = np.zeros((action_dim, state_dim, state_dim))
unique_state = data['state'].unique()
unique_next_state = data['next_state'].unique()
unique_action = data['action'].unique()
# Group the DataFrame by action and calculate transition probabilities
for action in unique_action:
group = data[data['action'] == action]
for state in unique_state:
total_transitions = len(group[group['state'] == state])
for next_state in unique_next_state:
num_transitions = len(group[(group['state'] == state) & (group['next_state'] == next_state)])
if total_transitions > 0:
T[action, state ,next_state] = num_transitions / total_transitions
# Add probability 1 to move from terminal state to itself
terminal_state_1 = state_dim-2
terminal_state_2 = state_dim-1
T[:,terminal_state_1,terminal_state_1] = 1
T[:,terminal_state_2,terminal_state_2] = 1
return T
def enviroment_generation(data, num_clusters):
"""
Generate enviroment components.
"""
df = data.copy(deep=True)
# Discreteize Actions
df["action_vaso"], vaso_quantiles = quantiles_discretization(data["drugrate_vaso"])
df["action_iv_fluids"], iv_quantiles = quantiles_discretization(data["drugrate_iv_fluids"])
# Add Action column
# Create a column of a single action (using the vaso action and the iv_fluids action)
action_combinations = [(i, j) for i in range(5) for j in range(5)]
actions_map = {combo: index for index, combo in enumerate(action_combinations)}
def map_actions_to_number(row):
return actions_map[(row["action_vaso"], row["action_iv_fluids"])]
df['action'] = df.apply(map_actions_to_number, axis=1)
# Discreteize States, and create state, next_state, reward columns
state_clusters, state_centroids = clustering_states(df, state_variables, num_clusters)
df = add_states_and_rewards(df, state_clusters)
# Create transition matrix
state_dim = num_clusters + 2 # Clusters number + 2 terminal states
action_dim = 25 # Actions are numbered from 0 to 24 (total: 25 possibole actions)
T = create_transition_table(df, state_dim, action_dim)
# Create reward vector
ALIVE_STATE = num_clusters
EXPIRED_STATE = num_clusters + 1
R = np.zeros(state_dim)
R[ALIVE_STATE] = 100
R[EXPIRED_STATE] = -100
return df, T, R, state_centroids, vaso_quantiles, iv_quantiles