forked from timothybeal/kjvbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kjv_markovizer1.py
49 lines (38 loc) · 1.36 KB
/
kjv_markovizer1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
'''
Created on Feb 23, 2016
@author: timothybeal
This simple module builds its utterance as a Markov chain based on a
single starting word from the entire text of the KJV Bible.
'''
import re
from nltk.tokenize import sent_tokenize
from collections import defaultdict
from random import choice
with open(file="kjv.txt", encoding="UTF8") as kjv:
kjv_text = kjv.read()
sentences = sent_tokenize(kjv_text)
# create an empty dictionary with list as the default value type.
token_nextwords = defaultdict(list)
for sentence in sentences:
# use re to word tokenize each sentence, making each word or period a token.
tokens = re.findall(r"\w+|[.]", sentence)
previous = None
for word in tokens:
if not previous:
previous = word
continue
else:
# build dictionary with each word as a key and value as list of all words that
# immediately follow it in the text
token_nextwords[previous].append(word)
previous = word
# Define starting word
present_word = "Behold"
print(present_word)
# until a period is selected, randomly choose a word to follow each word
# from the list keyed to each word in the dictionary and
# string them together with spaces
while present_word != ".":
next_ = choice(token_nextwords[present_word])
present_word = next_
print(next_, end=" ")