-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathBook.py
58 lines (51 loc) · 1.73 KB
/
Book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# python3
from textblob import TextBlob
'''
NTLK capabilities also need to be installed:
run `python -m textblob.download_corpora` from the command line
to install.
'''
import pandas as pd
import collections
import re
class Book:
kind = 'book'
def __init__(self, filename, **kwargs):
self.filename = filename
'''python3 syntax:'''
def read_book(self):
with open(self.filename, 'r') as t:
self.reader = t.read()
return self.reader
'''python2 syntax:'''
#def read_book(self):
# with open(self.filename, 'r') as t:
# self.reader = t.read()
# return unicode(self.reader)
def textblobify(self):
self.read_book()
self.tb = TextBlob(self.reader)
return self.tb
def sentimentify(self):
self.textblobify()
paragraph = self.tb.sentences
i = -1
for sentence in paragraph:
i += 1
pol = sentence.sentiment.polarity
if i == 0:
with open('temp.csv', 'w') as text_file:
header = 'number,' + 'polarity,' + '\n'
text_file.write(str(header))
with open('temp.csv', 'a') as text_file:
newline = str(i) + ',' + str(pol) + '\n'
text_file.write(str(newline))
self.df = pd.DataFrame.from_csv('temp.csv')
return self.df
def count_words(self, n):
'''interger n for the nth most common words'''
words = re.findall(r'\w+', open(self.filename).read().lower())
common = collections.Counter(words).most_common(n)
self.df_freq = pd.DataFrame(common, columns=['word', 'freq'])
self.df_freq.set_index('word').head(n)
return self.df_freq