-
Notifications
You must be signed in to change notification settings - Fork 0
/
TF-IDF.py
36 lines (32 loc) · 829 Bytes
/
TF-IDF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from collections import defaultdict
import math
def get_tfidf(d, w, t, articles):
tf_dict = defaultdict(int)
inf_list = []
for i in range(d):
inf_list.append(set())
for word in articles[i]:
tf_dict[word] += 1
inf_list[i].add(word)
res = 0
for word, fre in tf_dict.items():
tf = fre / (d * w)
j = 0
for a_set in inf_list:
if word in a_set:
j += 1
if tf * math.log(d / (j + 1)) > t:
res = 1
break
return res
n = int(input())
output = []
for i in range(n):
d, w, t = map(float, input().split(' '))
articles = []
d = int(d)
for _ in range(d):
articles.append(input().split(' '))
output.append(get_tfidf(d, w, t, articles))
for o in output:
print(o)