-
Notifications
You must be signed in to change notification settings - Fork 1
/
Main3.py
75 lines (60 loc) · 2.32 KB
/
Main3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# you may need to pip install python-docx
# you may need to pip install gingerit
import ParseDocx
from gingerit.gingerit import GingerIt
import os
import sys
def Main():
# Put direct path to word doc here, or pass as command line argument
word_doc = ''
if len(sys.argv)>1:
word_doc = sys.argv[1]
#Flashcard Generation
def process(section_text, section_name):
#Speedup: bash runStanfordParserServer.sh, bash runSSTServer.sh
import sys
import pip
import spacy
import neuralcoref
import lexnlp.nlp.en.segments.sentences as lex_sentences
import question_generator as gen
import csv
import time
#Load
start_time = time.time()
brief = section_text
print("--- %s seconds to Load ---" % (time.time() - start_time))
start_time = time.time()
pronouns = spacy.load('en')
neuralcoref.add_to_pipe(pronouns,greedyness=0.5,max_dist=100,blacklist=False)
neural = pronouns(brief)
brief = neural._.coref_resolved
print("--- %s seconds to Pronoun Fix ---" % (time.time() - start_time))
#Tokenize
start_time = time.time()
sentences = list(lex_sentences.get_sentence_list(brief))
questions = gen.QuestionGenerator()
print("--- %s seconds to Tokenize ---" % (time.time() - start_time))
#Print
start_time = time.time()
for sentence in sentences:
flashcard = questions.generate_question(sentence)
if flashcard:
question, answer = flashcard[0]['Q'], flashcard[0]['A']
question = grammar.parse(question)['result']
answer = grammar.parse(answer)['result']
partial = {"question": question, "answer": answer}
result[section_name].append(partial)
print("--- %s seconds to Generate Questions ---" % (time.time() - start_time))
#Parse and Process
#ParseDocx returns dictionary {section: 'section text', section:''}
parsed = ParseDocx.ParseDocx(word_doc)
grammar = GingerIt()
result = {}
for section, text in parsed.items():
result[section] = []
process(text, section)
#print(result)
return result
if __name__ == "__main__":
Main()