forked from edwardhuahan/twitter-covid-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph_1.py
101 lines (74 loc) · 3.23 KB
/
graph_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
""" CSC110 Final Project
This file is Copyright (c) 2021 Edward Han, Zekun Liu, Arvin Gingoyon
"""
import plotly.express as px
import pandas as pd
from tweet import Tweet
def categorize(list_of_topics: list[str],
tweet_data: list[tuple[Tweet, dict[str, float], set[str]]]) -> dict[str, list[dict]]:
""" Returns a dictionary mapping each topic in list_of_topics to a list of
all the dictionaries of scores for those topics
"""
score_data_so_far = {}
for t in list_of_topics:
score_data_so_far[t] = []
for tweet in tweet_data:
for topic in tweet[2]:
score_data_so_far[topic].append(tweet[1])
return score_data_so_far
# above gives {topic1: [{neg, pos, comp}, ...], topic2: [{neg, pos, comp}, ...]}
# helper for below
def average_list_dict(lst: list[dict[str, float]]) -> dict[str, float]:
""" Returns average of each sentiment sign (pos, neg, comp) in list of score dictionaries"""
dict_so_far = {}
for dictionary in lst:
for sentiment in dictionary:
if sentiment not in dict_so_far:
dict_so_far[sentiment] = 0
dict_so_far[sentiment] += dictionary[sentiment]
for key in dict_so_far:
dict_so_far[key] = dict_so_far[key] / len(lst)
return dict_so_far
def average_scores(topic_dict: dict[str, list[dict[str, float]]]) -> dict[str, dict[str, float]]:
""" Return average negative, positive, compound for each topic"""
new_topic_dict = {}
for key in topic_dict:
new_topic_dict[key] = average_list_dict(topic_dict[key])
return new_topic_dict
# above gives {topic1: {neg, pos, comp}, topic2: {neg, pos, comp}}
def graph_1(data: dict[str, dict[str, float]]) -> None:
""" Creates bar graph displaying sentiment score (positive, negative, compound; specified
by slider) to the most significant covid related topics"""
topics = []
scores = []
for topic in data:
for _ in range(3):
topics.append(topic)
for sent_score in data[topic]:
if sent_score != 'neu':
scores.append(abs(data[topic][sent_score]))
df = pd.DataFrame({
'Sentiment': ['Negative', 'Positive', 'Compound'] * len(data),
'Topics': topics,
'Sentiment Score': scores
})
# graph dataframe
fig1 = px.bar(df, x='Topics', y='Sentiment Score', animation_frame='Sentiment',
animation_group='Topics', range_y=[0, 1], color='Sentiment', barmode='group')
fig1.update_layout(title_text='Topics to Average Sentiment Scores')
fig1.show()
# fig1.write_html('graph1.html')
if __name__ == '__main__':
import python_ta
import python_ta.contracts
python_ta.contracts.DEBUG_CONTRACTS = False
python_ta.contracts.check_all_contracts()
# When you are ready to check your work with python_ta, uncomment the following lines.
# (Delete the "#" and space before each line.)
# IMPORTANT: keep this code indented inside the "if __name__ == '__main__'" block
python_ta.check_all(config={
'extra-imports': ['plotly.express', 'pandas', 'tweet'],
'allowed-io': [], # the names (strs) of functions that call print/open/input
'max-line-length': 100,
'disable': ['R1705', 'C0200']
})