-
Notifications
You must be signed in to change notification settings - Fork 0
/
RKHS_SHAP_preprocess.py
120 lines (108 loc) · 4.77 KB
/
RKHS_SHAP_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import shap
import pandas as pd
import os
import numpy as np
if __name__ == '__main__':
X,y = shap.datasets.adult()
print(X.columns)
X = X.values
y = np.where(np.array(y)*1.0==0,-1,1)
if not os.path.exists('census'):
os.makedirs('census')
fn='census'
with open(f'{fn}/X.npy', 'wb') as f:
np.save(f, X)
with open(f'{fn}/y.npy', 'wb') as f:
np.save(f, y)
##############
prefix = "archive/"
matches = pd.read_csv(prefix+"matches.csv")
participants = pd.read_csv(prefix+"participants.csv")
stats1 = pd.read_csv(prefix+"stats1.csv", low_memory=False)
stats2 = pd.read_csv(prefix+"stats2.csv", low_memory=False)
stats = pd.concat([stats1,stats2])
# merge into a single DataFrame
a = pd.merge(participants, matches, left_on="matchid", right_on="id")
allstats_orig = pd.merge(a, stats, left_on="matchid", right_on="id")
allstats = allstats_orig.copy()
# drop games that lasted less than 10 minutes
allstats = allstats.loc[allstats["duration"] >= 10*60,:]
# Convert string-based categories to numeric values
cat_cols = ["role", "position", "version", "platformid"]
for c in cat_cols:
allstats[c] = allstats[c].astype('category')
allstats[c] = allstats[c].cat.codes
allstats["wardsbought"] = allstats["wardsbought"].astype(np.int32)
X = allstats.drop(["win"], axis=1)
y = allstats["win"]
# convert all features we want to consider as rates
rate_features = [
"kills", "deaths", "assists", "killingsprees", "doublekills",
"triplekills", "quadrakills", "pentakills", "legendarykills",
"totdmgdealt", "magicdmgdealt", "physicaldmgdealt", "truedmgdealt",
"totdmgtochamp", "magicdmgtochamp", "physdmgtochamp", "truedmgtochamp",
"totheal", "totunitshealed", "dmgtoobj", "timecc", "totdmgtaken",
"magicdmgtaken" , "physdmgtaken", "truedmgtaken", "goldearned", "goldspent",
"totminionskilled", "neutralminionskilled", "ownjunglekills",
"enemyjunglekills", "totcctimedealt", "pinksbought", "wardsbought",
"wardsplaced", "wardskilled"
]
for feature_name in rate_features:
X[feature_name] /= X["duration"] / 60 # per minute rate
# convert to fraction of game
X["longesttimespentliving"] /= X["duration"]
# define friendly names for the features
full_names = {
"kills": "Kills per min.",
"deaths": "Deaths per min.",
"assists": "Assists per min.",
"killingsprees": "Killing sprees per min.",
"longesttimespentliving": "Longest time living as % of game",
"doublekills": "Double kills per min.",
"triplekills": "Triple kills per min.",
"quadrakills": "Quadra kills per min.",
"pentakills": "Penta kills per min.",
"legendarykills": "Legendary kills per min.",
"totdmgdealt": "Total damage dealt per min.",
"magicdmgdealt": "Magic damage dealt per min.",
"physicaldmgdealt": "Physical damage dealt per min.",
"truedmgdealt": "True damage dealt per min.",
"totdmgtochamp": "Total damage to champions per min.",
"magicdmgtochamp": "Magic damage to champions per min.",
"physdmgtochamp": "Physical damage to champions per min.",
"truedmgtochamp": "True damage to champions per min.",
"totheal": "Total healing per min.",
"totunitshealed": "Total units healed per min.",
"dmgtoobj": "Damage to objects per min.",
"timecc": "Time spent with crown control per min.",
"totdmgtaken": "Total damage taken per min.",
"magicdmgtaken": "Magic damage taken per min.",
"physdmgtaken": "Physical damage taken per min.",
"truedmgtaken": "True damage taken per min.",
"goldearned": "Gold earned per min.",
"goldspent": "Gold spent per min.",
"totminionskilled": "Total minions killed per min.",
"neutralminionskilled": "Neutral minions killed per min.",
"ownjunglekills": "Own jungle kills per min.",
"enemyjunglekills": "Enemy jungle kills per min.",
"totcctimedealt": "Total crown control time dealt per min.",
"pinksbought": "Pink wards bought per min.",
"wardsbought": "Wards bought per min.",
"wardsplaced": "Wards placed per min.",
"turretkills": "# of turret kills",
"inhibkills": "# of inhibitor kills",
"dmgtoturrets": "Damage to turrets"
}
feature_names = [full_names.get(n, n) for n in X.columns]
print(feature_names)
X.columns = feature_names
X = X.values
y = y.values
y = np.where(np.array(y)*1.0==0,-1,1)
if not os.path.exists('LOL'):
os.makedirs('LOL')
fn='LOL'
with open(f'{fn}/X.npy', 'wb') as f:
np.save(f, X)
with open(f'{fn}/y.npy', 'wb') as f:
np.save(f, y)