-
Notifications
You must be signed in to change notification settings - Fork 0
/
KakaoTalk Conversation Analyzer.py
396 lines (343 loc) · 13.2 KB
/
KakaoTalk Conversation Analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
#KakaaoTalk Conversation Analyzer
#import python modules
#for date and time analysis
from datetime import datetime
#for frequency analysis
from collections import Counter
#for graphical analysis
import turtle
def Introduction():
'''
Introduces user to the program
'''
print('Welcome, this is the <KakaoTalk Conversation Analyzer> program')
print('This program will analyze your KakaoTalk conversation with your friend')
print('Please use PC (not mobile) to open the KakaoTalk coversation you wish to analyze')
print('Then, click <chat settings>, <storage management>, and <save> to export all data')
print('Export the data file as a .txt file and place in the same folder as this program')
print('Once all this is done, you are ready\n')
def OpenFile():
'''
Prompts for and opens input file
'''
#init
input_file_opened = False
attempts = 5
#prompt for file name and attempt to open files until successful
while (not input_file_opened) and (attempts > 0):
try:
if not input_file_opened:
conversation_file = str(input('Enter input file name (.txt file): '))
input_file = open(conversation_file, 'r')
input_file_opened = True
except IOError:
print('<',conversation_file, '> file not found. Please re-enter')
attempts -= 1
print(attempts, 'attempts left\n')
#terminate or continue program
if not input_file_opened:
raise IOError('Too many attempts. Please restart the program.')
else:
return conversation_file
def GetNames():
'''
Prompts for user and friend names
'''
#prompt for names
my_name = str(input('Enter your name as saved in your conversation file: '))
friend_name = str(input("Enter your friend's name as saved in your conversation file: "))
return (my_name, friend_name)
def FilesNameAssignment(my_name,friend_name):
'''
Forms file names for user and friend
'''
#create file names by adding .txt at the end of names
my_file = my_name + '.txt'
friend_file = friend_name + '.txt'
return (my_file, friend_file)
def Extractconvo(conversation_file, my_file, friend_file, my_name, friend_name):
'''
Extracts conversation contents to create a file each for user and friend
'''
#open input file and form a specific string to use as condition in the next step
input_file = open(conversation_file,'r')
lines = input_file.readlines()
me = my_name + '","'
friend = friend_name + '","'
#extract my chat
with open(my_file, 'w') as mychat:
for line in lines:
if me in line:
line = line.split(',')[0] + ',' + line.split(',')[2]
line = line.replace('"','')
mychat.write(line)
#extract friend chat
with open(friend_file, 'w') as friendchat:
for line in lines:
if friend in line:
line = line.split(',')[0] + ',' + line.split(',')[2]
line = line.replace('"','')
friendchat.write(line)
#close files
input_file.close()
mychat.close()
friendchat.close()
def SetParameter():
'''
Prompts for time period to be analyzed
'''
#prompt for start and finish date
start_date = str(input('Enter start date of period you wish to analyze (YYYY-MM-DD): '))
start_date = datetime.strptime(start_date, "%Y-%m-%d")
start_date = start_date
finish_date = str(input("Enter finish date of period you wish to analyze (YYYY-MM-DD): "))
finish_date = datetime.strptime(finish_date, "%Y-%m-%d")
print('\nThe length of given period is',(finish_date - start_date).days, 'days')
#alter my file according to parameter
mychat = open(my_file, 'r')
lines = mychat.readlines()
with open(my_file, 'w') as mychat:
for line in lines:
line_date_time = line.split(',')[0]
line_date = line_date_time.split(' ')[0]
line_date = datetime.strptime(line_date, "%Y-%m-%d")
difference1 = (line_date - start_date).days
difference2 = (finish_date - line_date).days
if (difference1 >= 0) and (difference2 >= 0):
mychat.write(line)
mychat.close()
print('\nA new file containing all texts you sent has been created')
#alter friend file according to parameter
friendchat = open(friend_file, 'r')
lines = friendchat.readlines()
with open(friend_file, 'w') as friendchat:
for line in lines:
line_date_time = line.split(',')[0]
line_date = line_date_time.split(' ')[0]
line_date = datetime.strptime(line_date, "%Y-%m-%d")
difference1 = (line_date - start_date).days
difference2 = (finish_date - line_date).days
if (difference1 >= 0) and (difference2 >= 0):
friendchat.write(line)
friendchat.close()
print("A new file containing all texts your friend sent has been created\n")
return (start_date, finish_date)
def ArrangeData(my_file, friend_file):
'''
Places all words spoken by each person in respective lists
'''
#forms empty lists
my_words = []
friend_words = []
#extract spoken words from line and remove '\n' for words at the end of each line
mychat = open(my_file, 'r')
line = mychat.readline()
for line in mychat:
#Get words
my_words_in_single_line = line.split(',', 1)[1]
my_words_in_single_line = my_words_in_single_line.split(' ')
for k in range(0,len(my_words_in_single_line)):
my_words.append(my_words_in_single_line[k])
my_words = [str.replace('\n', '') for str in my_words]
#extract spoken words from line and remove '\n' for words at the end of each line
friendchat = open(friend_file, 'r')
line = friendchat.readline()
for line in friendchat:
#Get words
friend_words_in_single_line = line.split(',', 1)[1]
friend_words_in_single_line = friend_words_in_single_line.split(' ')
for k in range(0,len(friend_words_in_single_line)):
friend_words.append(friend_words_in_single_line[k])
friend_words = [str.replace('\n', '') for str in friend_words]
return (my_words, friend_words)
def TimeDictionary(my_file, friend_file):
'''
Places hours in list for easier analysis
'''
#form empty list
talk_time = []
#my time
with open(my_file, 'r') as input_file:
lines = input_file.readlines()
for line in lines:
#Get time
talk_time_in_single_line = line.split(',')[0]
talk_line_time = talk_time_in_single_line.split(' ')[1]
talk_line_hour = int(talk_line_time.split(':')[0])
talk_time.append(talk_line_hour)
#friend time
with open(friend_file, 'r') as input_file:
lines = input_file.readlines()
for line in lines:
#Get time
talk_time_in_single_line = line.split(',')[0]
talk_line_time = talk_time_in_single_line.split(' ')[1]
talk_line_hour = int(talk_line_time.split(':')[0])
talk_time.append(talk_line_hour)
return talk_time
def NumberofLines(my_file, friend_file, my_name, friend_name):
'''
Counts total number of texts for each person
'''
#form a sub_function to avoid repetition
def CountingLines(file_name, speaker):
'''
Counts and prints the total number of texts
'''
#find total number of texts
input_file = open(file_name,'r')
line = input_file.readline()
num_lines = 0
for line in input_file:
num_lines += 1
input_file.close()
print(speaker, 'sent', num_lines, 'texts in total')
#sub_main
CountingLines(my_file, my_name)
CountingLines(friend_file, friend_name)
def CommonWordsAnalyzer(my_words, friend_words):
'''
Analyzes the most common words spoken by each person
'''
#prompt for how many most frequenty used words user wishes to find
x = int(input('\nEnter value of x for which you wish to find the top x most frequently used words: '))
#analyze my conversation
my_words = Counter(my_words)
my_most_common_words = list(my_words.most_common(x))
#analyze friend conversation
friend_words = Counter(friend_words)
friend_most_common_words = list(friend_words.most_common(x))
#print my result
for i in range(0, x):
word = my_most_common_words[i][0]
frequency = my_most_common_words[i][1]
print('Your top', int(i+1), 'most commonly used word is <', word, '> and it was used <', frequency, '> times')
#print friend result
for j in range(0, x):
word = friend_most_common_words[j][0]
frequency = friend_most_common_words[j][1]
print("Your friend's top", int(j+1), 'most commonly used word is <', word, '> and it was used <', frequency, '> times')
return (my_most_common_words, friend_most_common_words)
def WordFrequencyCounter(my_words, friend_words):
'''
Analyzes how many times the input word is spoken by each person
'''
#init
terminate = False
#repeat until break while loop
while not terminate:
inputword = input('\nEnter the word of which you wish to find its frequency: ')
inputword_frequency_me = my_words.count(str(inputword))
inputword_frequency_friend = friend_words.count(str(inputword))
print('You used <', inputword, '>', inputword_frequency_me, ' times')
print('Your friend used <', inputword, '>', inputword_frequency_friend, ' times')
#prompt for command to retry or terminate
response = input('\nDo you wish to enter another word? Press any key to retry or <n> to quit')
if response == 'n':
terminate = True
print('Word Frequency Counter terminated\n')
def ActiveTimeTracker(talk_time):
'''
Analyzes the hourly activity of conversation and shows result on barchart
'''
def CountHours(talk_time):
'''
Adds hour and its frequency to dictionary
'''
#add time and its frequency to dictionary
time = {}
for a in range(0, 24):
hour_frequency = talk_time.count(int(a))
time[str(a)] = int(hour_frequency)
return time
def DrawBar(t, height):
'''
Creates barchart of the analyzed result
'''
#start filling shape
t.begin_fill()
t.left(90)
t.forward(height)
t.right(90)
#start writing the data value on top of the column
t.forward(20)
t.penup()
t.left(90)
t.forward(2)
t.pendown()
t.write(str(height), align = "center")
t.penup()
t.right(180)
t.forward(2)
t.left(90)
#writing the data value stopped
t.pendown()
t.forward(20)
t.right(90)
t.forward(height)
#start writing the group name on the bottom of the column
t.penup()
t.right(90)
t.forward(20)
t.pendown()
t.write(str(i)+'h', align = "center")
t.penup()
t.right(180)
t.forward(20)
t.pendown()
#stop writing the group name on the bottom of the column
#stop filling shape
t.end_fill()
#sub_main
#interpret talk time for use in turtle mode
time = CountHours(talk_time)
all_values = time.values()
maxheight = max(all_values)
numbers = len(time)
border = 10
#set turtle mode window
window = turtle.Screen()
window.title('Talk Activity Graph')
window.setworldcoordinates(0 - border, 0 - border, 40 * numbers + border, maxheight + border)
window.bgcolor("white")
#create pen and fix options
pen = turtle.Turtle()
pen.color("black")
pen.fillcolor("gray")
pen.pensize(3)
pen.speed(100000)
pen.hideturtle()
#run function
for i in time:
DrawBar(pen, time[i])
#explain result
print('This is a barchart of at what hours in the day the texts were sent')
print('The numbers on the bottom of the bars indicate the hours in the day')
print('The numbers on the top of the bars indicate the data value\n')
print('Click screen to exit graph\n')
#exit turtle mode on click
window.exitonclick()
def Outro():
'''
Thanks user for using program
'''
print('Thank you for using the "KakaoTalk Conversation Analyzer"')
# ----- main -----
#program welcome and introduction
Introduction()
#obtain names and making files
conversation_file = OpenFile()
my_name, friend_name = GetNames()
my_file, friend_file = FilesNameAssignment(my_name,friend_name)
#process data
Extractconvo(conversation_file, my_file, friend_file, my_name, friend_name)
start_data, finish_date = SetParameter()
my_words, friend_words = ArrangeData(my_file, friend_file)
talk_time = TimeDictionary(my_file, friend_file)
#analyze and present results
NumberofLines(my_file, friend_file, my_name, friend_name)
my_most_common_words, friend_most_common_words = CommonWordsAnalyzer(my_words, friend_words)
WordFrequencyCounter(my_words, friend_words)
ActiveTimeTracker(talk_time)
#outro
Outro()