-
Notifications
You must be signed in to change notification settings - Fork 2
/
sent.py
92 lines (74 loc) · 2.79 KB
/
sent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from __future__ import absolute_import, division
import csv
import argparse
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from builtins import range, str, zip
from datetime import datetime
from io import open
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
CALTECH_FILE = 'caltech.csv'
def plot_sentiments(filename, outfilename):
# --- examples -------
sentences = []
timestamps = []
output = []
# first row is timestamp, second row is confession
# df = pd.read_csv(fileName, header=0, sep=u",", index_col=None)
# sentences = df.as_matrix
#here
# df = pd.read_csv(filename, header=None, sep=u",", index_col=None)
# sentences = df[1].tolist()
# timestamps = df[0].tolist()
#here
with open(filename) as f:
lines = f.readlines()
for line in lines:
pair = line.split(',',1)
sentences.append(pair[1])
timestamps.append(pair[0])
# sentences.append(line.split(',', 1))
# print(sentences[1])
# with open(fileName) as csvDataFile:
# csvReader = csv.reader(csvDataFile)
# for row in csvReader:
# sentences.append([row[0], row[1]])
print(len(sentences))
# print("zero is" + sentences[0])
# print(sentences)
# print("one is " + sentences[1])
analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
# if (len(sentence) >= 2):
result = []
result.append(sentence)
# print("{:-<65} {}".format(sentence[1], str(vs)))
vs = analyzer.polarity_scores(sentence)
# print(str(vs))
result.append(str(vs['compound']))
result.append(str(vs['pos']))
result.append(str(vs['neg']))
result.append(str(vs['neu']))
output.append(result)
# print(sentence)
# print("{:-<65} {}".format(sentence[1], str(vs)))
i = 0
for time in timestamps:
output[i].append(time)
i += 1
# print(len(output))
# print(output)
# res = [[1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e']]
my_df = pd.DataFrame(output, columns = ["confession", "compound", "positive", "negative", "neutral", "timestamp"])
my_df.to_csv(outfilename, index=False, header=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Plots sentiment data from csv")
parser.add_argument('-i', '--input', action='store', dest='filename', type=str, required=True,
help="Path to CSV input")
parser.add_argument('-o', '--output', action='store', dest='outfilename', type=str, required=True,
help="Path to CSV output")
args = parser.parse_args()
plot_sentiments(args.filename, args.outfilename)