-
Notifications
You must be signed in to change notification settings - Fork 24
/
utils.py
111 lines (94 loc) · 3.58 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import tensorflow as tf
def calc_auc(raw_arr):
"""Summary
Args:
raw_arr (TYPE): Description
Returns:
TYPE: Description
"""
# sort by pred value, from small to big
arr = sorted(raw_arr, key=lambda d:d[2])
auc = 0.0
fp1, tp1, fp2, tp2 = 0.0, 0.0, 0.0, 0.0
for record in arr:
fp2 += record[0] # noclick
tp2 += record[1] # click
auc += (fp2 - fp1) * (tp2 + tp1)
fp1, tp1 = fp2, tp2
# if all nonclick or click, disgard
threshold = len(arr) - 1e-3
if tp2 > threshold or fp2 > threshold:
return -0.5
if tp2 * fp2 > 0.0: # normal auc
return (1.0 - auc / (2.0 * tp2 * fp2))
else:
return None
def auc_arr(score_p, score_n):
score_arr = []
for s in score_p.numpy():
score_arr.append([0, 1, s])
for s in score_n.numpy():
score_arr.append([1, 0, s])
return score_arr
def eval(model, test_data):
auc_sum = 0.0
score_arr = []
for u, i, j, hist_i, sl in test_data:
p_out, p_logit = model(u,i,hist_i,sl)
n_out, n_logit = model(u,j,hist_i,sl)
mf_auc = tf.reduce_sum(tf.cast(p_out>n_out, dtype=tf.float32))
score_arr += auc_arr(p_logit, n_logit)
auc_sum += mf_auc
test_gauc = auc_sum / len(test_data)
auc = calc_auc(score_arr)
return test_gauc, auc
def sequence_mask(lengths, maxlen=None, dtype=tf.bool):
"""Returns a mask tensor representing the first N positions of each cell.
If `lengths` has shape `[d_1, d_2, ..., d_n]` the resulting tensor `mask` has
dtype `dtype` and shape `[d_1, d_2, ..., d_n, maxlen]`, with
```
mask[i_1, i_2, ..., i_n, j] = (j < lengths[i_1, i_2, ..., i_n])
```
Examples:
```python
tf.sequence_mask([1, 3, 2], 5) # [[True, False, False, False, False],
# [True, True, True, False, False],
# [True, True, False, False, False]]
tf.sequence_mask([[1, 3],[2,0]]) # [[[True, False, False],
# [True, True, True]],
# [[True, True, False],
# [False, False, False]]]
```
Args:
lengths: integer tensor, all its values <= maxlen.
maxlen: scalar integer tensor, size of last dimension of returned tensor.
Default is the maximum value in `lengths`.
dtype: output type of the resulting tensor.
name: name of the op.
Returns:
A mask tensor of shape `lengths.shape + (maxlen,)`, cast to specified dtype.
Raises:
ValueError: if `maxlen` is not a scalar.
"""
# lengths = lengths.numpy()
if maxlen is None:
maxlen = max(lengths)
# else:
# maxlen = maxlen
# if maxlen.get_shape().ndims is not None and maxlen.get_shape().ndims != 0:
# raise ValueError("maxlen must be scalar for sequence_mask")
# The basic idea is to compare a range row vector of size maxlen:
# [0, 1, 2, 3, 4]
# to length as a matrix with 1 column: [[1], [3], [2]].
# Because of broadcasting on both arguments this comparison results
# in a matrix of size (len(lengths), maxlen)
row_vector = range(maxlen)
# Since maxlen >= max(lengths), it is safe to use maxlen as a cast
# authoritative type. Whenever maxlen fits into tf.int32, so do the lengths.
matrix = np.expand_dims(lengths, -1)
result = row_vector < matrix
if dtype is None:
return tf.convert_to_tensor(result)
else:
return tf.cast(tf.convert_to_tensor(result), dtype)