-
Notifications
You must be signed in to change notification settings - Fork 0
/
featurefunctions.py
51 lines (34 loc) · 1.4 KB
/
featurefunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from collections import Counter
from itertools import product, combinations
def cross_product_features(x, y):
return Counter(['%s|%s' % (a, b) for a, b in product(x, y)])
def null_features(x, y):
"""This is equivalent to random guessing because of the way SGD breaks ties."""
return Counter([])
def attribute_type_features(x, y):
return Counter(['%s:*' % a.split(':')[0] for a in y])
def attribute_pair_features(x, y):
mentioned = set(attribute_type_features(x, y).keys())
not_mentioned = set(attribute_type_features(x, x).keys()) - mentioned
return Counter(['%s+%s' % (a, b) for a, b in combinations(mentioned, 2)] +
['%s+NO_%s' % (a, b) for a, b in product(mentioned, not_mentioned)] +
['NO_%s+NO_%s' % (a, b) for a, b in combinations(not_mentioned, 2)])
def attribute_count_features(x, y):
return Counter(['#:%d' % len(y)])
def falsehood_features(x, y):
return Counter(['[FALSEHOOD]' for a in y if a not in x])
def phi(feat_names):
def features(x, y):
f = Counter()
for name in feat_names:
f.update(FEATURES[name](x, y))
return f
return features
FEATURES = {
'null': null_features,
'cross_product': cross_product_features,
'attr_type': attribute_type_features,
'attr_count': attribute_count_features,
'attr_pair': attribute_pair_features,
'falsehood': falsehood_features,
}