This repository has been archived by the owner on Jul 28, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
workflow.py
128 lines (99 loc) · 4.22 KB
/
workflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
from multiprocessing import Pool, TimeoutError, Process, freeze_support
from searchWikipedia import getWikipediaLink
from searchGoogleimage import doImageSearch
from importantWords import getWordCloud
from textSummarize import summarize
from searchNews import doArticleSearch
from operator import itemgetter
from db import db_getAllCounter
import time
import os
from searchGoogletext import getTextTopic, doSentimentAnalysis, getMostRelevantEvent, getMostRelevantEntity, getMostRelevantLocation
upperBound = 5
def getBounds(category):
result = db_getAllCounter(category)
print result
def generateContent(originalContent):
pool = Pool(processes=16) # start 16 worker processes
count = 0
# mockData = {'Hackathon': 0.03434,
# 'SAP': 0.111,
# 'Jan Böhmermann': 0.001}
originalContent.decode("utf-8")
wordlist = getWordCloud(originalContent)
# wordlist = mockData
wordlist = sorted(wordlist.iteritems(), key=itemgetter(1), reverse=True)
result = {}
result['baseText'] = originalContent
result['importantWords'] = []
result['helpfulLinks'] = []
result['imageLinks'] = []
result['summary'] = summarize(originalContent)
result['meta'] = {}
mostImportantWord = wordlist[0][0]
print mostImportantWord
# # runs in *only* one process
# ressentiment = pool.apply_async(doSentimentAnalysis, (originalContent,))
# resMostRelevantEntity = pool.apply_async(
# getMostRelevantEntity, (originalContent,)) # runs in *only* one process
# # runs in *only* one process
# res3 = pool.apply_async(getMostRelevantLocation, (originalContent,))
# # runs in *only* one process
# res4 = pool.apply_async(getMostRelevantEvent, (originalContent,))
# # runs in *only* one process
# res5 = pool.apply_async(getTextTopic, (originalContent,))
# res6 = pool.apply_async(doArticleSearch, (mostImportantWord,))
# try:
# result['meta']['sentiment'] = ressentiment.get(timeout=30)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['sentiment'] = []
# try:
# result['meta']['person'] = resMostRelevantEntity.get(timeout=0)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['person'] = []
# try:
# result['meta']['location'] = res3.get(timeout=0)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['location'] = []
# try:
# result['meta']['event'] = res4.get(timeout=0)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['event'] = []
# try:
# result['meta']['topic'] = res5.get(timeout=0)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['topic'] = []
# try:
# result['meta']['news'] = res6.get(timeout=0)
# except TimeoutError:
# print "We lacked patience and got a multiprocessing.TimeoutError"
# result['meta']['news'] = []
result['meta']['sentiment'] = doSentimentAnalysis (originalContent)
result['meta']['person'] = getMostRelevantEntity (originalContent)
result['meta']['location'] = getMostRelevantLocation (originalContent)
result['meta']['event'] = getMostRelevantEvent (originalContent)
result['meta']['topic'] = getTextTopic (originalContent)
result['meta']['news'] = doArticleSearch (mostImportantWord)
for key, value in wordlist:
result['importantWords'].append({
'word': key,
'importance': value})
if count < upperBound:
result['helpfulLinks'].append({
'word': key,
'url': getWikipediaLink(key)})
result['imageLinks'].append({
'word': key,
'url': doImageSearch(key)})
count = count + 1
return result
#data = generateContent("afja fsfjasfd ü adsfadsf")
# Nachricht umwandeln (unicode -> Ausgabe-Encoding) und
# mit print an die Standardausgabe übergeben.
# print data