-
Notifications
You must be signed in to change notification settings - Fork 0
/
spacy_rouge_no.py
106 lines (85 loc) · 3.31 KB
/
spacy_rouge_no.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from collections import OrderedDict
import nlp
import os
import numpy as np
import spacy
import networkx as nx
from evaluate_rouge import make_sentences_into_summary, total_rouge, make_random_comparison
import matplotlib.pyplot as plt
from spacy.lang.nb.stop_words import STOP_WORDS
rouge = nlp.load_metric('rouge')
path = "FILE_PATH"
text_dict = {}
predone_summary_dict = {}
list_of_highlights = []
list_of_summaries = []
list_of_random_summaries = []
def make_spacy_gen_indexes_into_list(spacy):
actual_list = []
for sent in spacy:
actual_list.append(sent)
return actual_list
for k, file in enumerate(os.listdir(path)):
filename = os.path.join(path, file)
with open(filename, "r") as myfile:
text = myfile.read()
new_text = text.split("===")
predone_summary_dict[k] = new_text[0]
text_dict[k] = new_text[1]
for key, v in text_dict.items():
nlp_nb = spacy.load('nb_core_news_lg')
doc = nlp_nb(text_dict[key])
list_of_sentences = make_spacy_gen_indexes_into_list(doc.sents)
similarity_and_tuples = []
checked_similarity = []
largest_sim = 0
for i in range(len(list_of_sentences)):
for k in range(len(list_of_sentences)):
if k == i:
continue
if (i, k) in checked_similarity or (k, i) in checked_similarity:
continue
if list_of_sentences[i].similarity(list_of_sentences[k]) > largest_sim:
largest_sim = list_of_sentences[i].similarity(list_of_sentences[k])
checked_similarity.append((i, k))
similarity_and_tuples.append((i, k, list_of_sentences[i].similarity(list_of_sentences[k])))
dict_of_tuples = {}
for i in range(len(list_of_sentences)):
dict_of_tuples[i] = []
for tuple in similarity_and_tuples:
if (i == tuple[0] or i == tuple[1]):
dict_of_tuples[i].append(tuple)
G = nx.Graph()
# make tuples into nodes
# print(largest_sim)
threshold = 0.6
for tuple in similarity_and_tuples:
if threshold * largest_sim < tuple[2]:
src_node = tuple[0]
dest_node = tuple[1]
weight = tuple[2]
G.add_edge(src_node, dest_node, weight=weight)
pos = nx.spring_layout(G) # compute graph layout
nx.draw(G, pos, node_size=700) # draw nodes and edges
nx.draw_networkx_labels(G, pos) # draw node labels/names
# draw edge weights
labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
# show image
plt.show()
pr = nx.pagerank(G, max_iter=1000, alpha=0.9)
plt.show()
max_key = max(pr, key=pr.get)
from operator import itemgetter
list_of_result_tuples = sorted(pr.items(), key=itemgetter(1), reverse=True)
list_of_order = sorted(pr, key=pr.get, reverse=True)
# put together sentences by order of importance
list_of_summaries.append(make_sentences_into_summary(list_of_result_tuples, list_of_sentences))
list_of_random_summaries.append(make_random_comparison(list_of_sentences))
list_of_highlights.append(predone_summary_dict[key])
print("actual rouge:")
total_rouge(list_of_summaries, list_of_highlights, rouge)
print("random rouge")
rouge_rand = nlp.load_metric('rouge')
total_rouge(list_of_random_summaries, list_of_highlights, rouge_rand)
print("DONE")