generated from lenskit/lk-demo-experiment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlgoMetrics.py
141 lines (114 loc) · 3.06 KB
/
AlgoMetrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.4
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %% [markdown]
# # Algorithm Metrics
#
# This notebook shows algorithm metrics over the release branches.
# %%
import re
from pathlib import Path
from packaging.version import parse as parse_version
import json
# %%
import pandas as pd
import seaborn as sns
# %% [markdown]
# **Note:** LensKit 0.14 and earlier defaulted to using rating values to compute
# gain for nDCG. This is the primary cause of the overall shifts in NDCG in
# LensKit 2025.1 and later.
# %% [markdown]
# ## Load DVC metrics
#
# Let's load all the DVC metrics:
# %%
run_dir = Path('runs')
# %%
metrics = {}
for file in run_dir.glob('*/metrics.csv'):
ver = file.parent.name
metrics[ver] = pd.read_csv(file)
list(metrics.keys())
# %%
versions = [k for k in metrics.keys() if k != 'main']
versions.sort(key=parse_version)
versions.append('main')
versions
# %% [markdown]
# Now let's collect all these metrics into a frame:
# %%
mdf = pd.concat(metrics, names=['version'])
# pull version out of index
mdf.reset_index('version', inplace=True)
# drop remaining index
mdf.reset_index(drop=True, inplace=True)
# set up category and ordering
mdf = mdf.astype({'version': 'category'})
mdf['version'] = mdf['version'].cat.reorder_categories(versions)
mdf
# %% [markdown]
# And get data sets & algorithms from run keys:
# %%
mdf['data'] = mdf['run'].str.replace(r'^(\w+)-.*', r'\1', regex=True)
mdf['algo'] = mdf['run'].str.replace(r'^\w+-(.*)', r'\1', regex=True)
mdf
# %% [markdown]
# ## ALS Results
#
# Let's first look at biased MF from ALS:
# %%
als = mdf[mdf['algo'] == 'ALS']
sns.lineplot(x='version', y='GRMSE', hue='data', data=als)
# %%
als = mdf[mdf['algo'] == 'ALS']
sns.lineplot(x='version', y='nDCG', hue='data', data=als)
# %% [markdown]
# ## Item-Item Results
#
# Now the item-item results:
# %%
ii_exp = mdf[mdf['algo'] == 'II']
sns.lineplot(x='version', y='GRMSE', hue='data', data=ii_exp)
# %%
ii_exp = mdf[mdf['algo'] == 'II']
sns.lineplot(x='version', y='nDCG', hue='data', data=ii_exp)
# %% [markdown]
# ## User-User Results
#
# %%
uu_exp = mdf[mdf['algo'] == 'UU']
sns.lineplot(x='version', y='GRMSE', hue='data', data=uu_exp)
# %%
uu_exp = mdf[mdf['algo'] == 'UU']
sns.lineplot(x='version', y='nDCG', hue='data', data=uu_exp)
# %% [markdown]
# ## IALS Results
#
# Let's first look at biased MF from Implicit ALS:
# %%
als = mdf[mdf['algo'] == 'IALS']
sns.lineplot(x='version', y='nDCG', hue='data', data=als)
# %% [markdown]
# ## Implicit BPR Results
#
# We also test BPR implementation from Implicit.
# %%
als = mdf[mdf['algo'] == 'impBPR']
sns.lineplot(x='version', y='nDCG', hue='data', data=als)
# %% [markdown]
# ## Combined Algorithm Results
#
# Let's look at all the algorithms together as a point plot:
# %%
sns.catplot(x='version', y='nDCG', hue='data', col='algo', data=mdf, kind='point')