-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPoemExtractor.py
171 lines (142 loc) · 7.87 KB
/
PoemExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
class PoemExtractor:
def __init__(self) -> None:
self.poem_names = list()
self.poem = list()
self.start_mark = False
self.end_mark = False
def getPoemNames(self, file_name):
# Using readlines()
file = open(file_name, 'r')
Lines = file.readlines()
for line in Lines:
char = line[0]
if char.isalpha():
self.poem_names.append(line)
return self.poem_names
# designed to use with automatically detected poem names, then it doesn't require of conversion for lower case or upper case.
def getPoem(self, file_name, poem_name):
self.poem.clear()
self.start_mark = False
self.end_mark = False
# Using readlines()
file = open(file_name, 'r')
Lines = file.readlines()
for line in Lines:
if line == poem_name :
self.start_mark = True
continue
else :
if self.start_mark == True:
char = line[0]
if char.isalpha():
self.end_mark = True
break
if self.end_mark == False:
self.poem.append(line)
return self.poem
# designed to use poem names which are prepared by human, and then there is some poem name errors.
# then, it convert the lower case for titles and compare it.
def getPoemWithRange(self, file_name, start_poem_name, end_poem_name):
self.poem.clear()
self.start_mark = False
self.end_mark = False
# Using readlines()
file = open(file_name, 'r')
Lines = file.readlines()
# there is some mismatching due to lower case & upper case characters, then make all to lower case for the comparison
# poem names doesn't include the new line character('\n'), added it to make easy comparion.
# there is an alternative comparing way with elimination of the new line character('\n'), but the treatment is much more complicate,
# then, simply adding the new line character('\n')
start_poem_name = start_poem_name.lower()
start_poem_name = start_poem_name + '\n'
end_poem_name = end_poem_name.lower()
end_poem_name = end_poem_name + '\n'
# Strips the newline character
for line in Lines:
if line.lower() == start_poem_name :
self.start_mark = True
continue
else :
if self.start_mark == True:
if line.lower() == end_poem_name :
self.end_mark = True
break
if self.end_mark == False:
self.poem.append(line)
return self.poem
# designed to use poem names which are prepared by human, and then there is some poem name errors.
# then, it convert the lower case for titles and compare it.
def getLastPoem(self, file_name, poem_name):
self.poem.clear()
self.start_mark = False
self.end_mark = False
# Using readlines()
file = open(file_name, 'r')
Lines = file.readlines()
# there is some mismatching due to lower case & upper case characters, then make all to lower case for the comparison
# poem names doesn't include the new line character('\n'), added it to make easy comparion.
# there is an alternative comparing way with elimination of the new line character('\n'), but the treatment is much more complicate,
# then, simply adding the new line character('\n')
poem_name = poem_name.lower()
poem_name = poem_name + '\n'
# Strips the newline character
for line in Lines:
if line.lower() == poem_name :
self.start_mark = True
continue
else :
if self.start_mark == True:
end_value = line.find('THE END')
if end_value != -1: # return value -1 doesn't mean the False, then it should use the comparison way
break
else :
self.poem.append(line)
return self.poem
if __name__ == '__main__':
file_directory = os.path.dirname(os.path.realpath(__file__))
file_name = file_directory + '\\A dome of many coloredglass2.txt'
poemExtractor = PoemExtractor()
# poem_names = poemExtractor.getPoemNames(file_name)
# for name in poem_names:
# poem = poemExtractor.getPoem(file_name, name)
# print('POEM : {}'.format(name))
# print('\t{}'.format(poem))
# poem = poemExtractor.getPoem(file_name, poem_names[0])
# print('POEM : {}'.format(poem_names[0]))
# print('\t{}'.format(poem))
# some of the poem name is different against the actual name in book, then the table is modified
# i.e. Diya {original title is Greek, Delta-iota-psi-alpha}, Teatro Bambino. Dublin, N. H.,J--K. Huysmans
# On Carpaccio's Picture: The Dream of St. Ursula
# In odrder to eliminate the human error, this table should be firstly generated by computer with calling getPoemNames(...)
# and then it needs to be retouched by human.
# due to human error, debugging it by junior programmer is not an easy job, then it should be automatically created.
poem_names = ['Before the Altar', "Suggested by the Cover of a Volume of Keats's Poems", 'Apples of Hesperides',
'Azure and Gold', 'Petals', 'Venetian Glass', 'Fatigue', 'A Japanese Wood-Carving', 'A Little Song',
'Behind a Wall', 'A Winter Ride', 'A Coloured Print by Shokei', 'Song', 'The Fool Errant',
'The Green Bowl', 'Hora Stellatrix', 'Fragment', 'Loon Point', 'Summer',
'"To-morrow to Fresh Woods and Pastures New"', 'The Way',
'Diya {original title is Greek, Delta-iota-psi-alpha}', 'Roads', 'Teatro Bambino. Dublin, N. H.',
'The Road to Avignon', 'New York at Night', 'A Fairy Tale', 'Crowned', 'To Elizabeth Ward Perkins',
'The Promise of the Morning Star', 'J--K. Huysmans', 'March Evening', 'Leisure',
"On Carpaccio's Picture: The Dream of St. Ursula", 'The Matrix', 'Monadnock in Early Spring',
'The Little Garden', 'To an Early Daffodil', 'Listening', 'The Lamp of Life', 'Hero-Worship',
'In Darkness', 'Before Dawn', 'The Poet', 'At Night', 'The Fruit Garden Path', 'Mirage', 'To a Friend',
'A Fixed Idea', 'Dreams', 'Frankincense and Myrrh', 'From One Who Stays', 'Crepuscule du Matin',
'Aftermath', 'The End', 'The Starling', 'Market Day',
'Epitaph in a Church-Yard in Charleston, South Carolina', 'Francis II, King of Naples', 'To John Keats',
'The Boston Athenaeum', 'Sea Shell', 'Fringed Gentians', 'The Painted Ceiling', 'The Crescent Moon',
'Climbing', 'The Trout', 'Wind', 'The Pleiades']
count = 0
number_of_poems = len(poem_names)
# paired poem titles are required for loop
for count in range(0,number_of_poems - 1):
poem = poemExtractor.getPoemWithRange(file_name, poem_names[count], poem_names[count+1])
print('POEM : {}'.format(poem_names[count]))
print('\t{}'.format(poem))
print('--------------------------------------------------------------------------------')
# the last poem needs a special care because it doesn't have the next poem title.
poem = poemExtractor.getLastPoem(file_name, poem_names[number_of_poems - 1])
print('POEM : {}'.format(poem_names[number_of_poems - 1]))
print('\t{}'.format(poem))
print('--------------------------------------------------------------------------------')