-
Notifications
You must be signed in to change notification settings - Fork 0
/
srt_maker.py
259 lines (233 loc) · 8.1 KB
/
srt_maker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import sys
import re
def main():
helpText = """
Usage: srt_maker.py -help OR <in-file> <out-file>
Will convert the input file to SRT format and write it to the output file"""
moreHelp = """
===Sample subtitle===
index 1
start and end time 00:00:00,000 --> 00:00:10,000
subtitle text Text here (multiple lines allowed)
empty line
===Special formatting===
<SCRIPT>
[start time] --> [end time of first subtitle]
multiline text ending in a line that contains only
</SCRIPT>
===Other directives===
@OFFSET <time in milliseconds>
will push all following subtitles later by the given time. Use 0 to stop the offset.
@DELAY <time in milliseconds>
will increase or decrease the time offset by the specified time.
@SPEED <time in milliseconds>
will change how long each letter increments the duration of the next subtitle. Default 50. Resets for each subtitle.
@SPEED = <time in milliseconds>
changes the default speed factor for all subsequent subtitles
@GAP <time in milliseconds>
sets the gap time in between subtitles. Default 0.
@EXTEND <time in milliseconds>
@EXTEND <HH:MM:SS:mmm>
extend the previous subtitle's duration by a number of milliseconds or make it end at the specified time.
"""
if len(sys.argv) != 3:
print helpText
elif sys.argv[1] == "-help":
print helpText + "\n" + moreHelp
else:
f = open(sys.argv[1], "r")
converted = convert(f.read())
f.close()
f = open(sys.argv[2], "w")
f.write(converted)
f.close()
#input should be a string; returns a string
def convert(input):
directiveOffset = re.compile("@OFFSET [+-]?\\d{1,9}")
directiveDelay = re.compile("@DELAY [+-]?\\d{1,9}")
directiveSpeed = re.compile("@SPEED \\d{1,6}")
directiveDefaultSpeed = re.compile("@SPEED = \\d{1,6}")
directiveGap = re.compile("@GAP \\d{1,6}")
directiveExtend1 = re.compile("@EXTEND [+-]?\\d{1,6}$") #extend by milliseconds
directiveExtend2 = re.compile("@EXTEND \\d\\d:\\d\\d:\\d\\d,\\d{3}") #extend to specific endpoint
lines = input.splitlines()
state = "heading"
defaultTimeMultiplier = 50
timeMultiplier = defaultTimeMultiplier
timeOffset = 0
gap = 0
subs = [Subtitle()]
unknownEndTime = False
script = False
keepNames = False
for l in lines:
if directiveOffset.match(l):
newOffset = int(l[8:])
if script:
subs[-1].adjustTime(newOffset - timeOffset)
timeOffset = newOffset
elif directiveDelay.match(l):
if script:
subs[-1].adjustTime(int(l[7:]))
timeOffset += int(l[7:])
elif directiveSpeed.match(l):
timeMultiplier = int(l[7:])
elif directiveDefaultSpeed.match(l):
defaultTimeMultiplier = int(l[9:])
timeMultiplier = defaultTimeMultiplier
elif directiveGap.match(l):
gap = int(l[5:])
elif directiveExtend1.match(l):
i = -2 if script else -1
ext = int(l[8:])
subs[i].setDuration(max(0, subs[i].getDuration() + ext))
if script:
subs[-1].adjustTime(ext)
elif directiveExtend2.match(l):
if script:
ext = subs[-2].end.difference(parseTime(l[8:]))
subs[-2].setDuration(subs[-2].getDuration() + ext)
subs[-1].adjustTime(ext)
else:
subs[-1].end = parseTime(l[8:])
elif state == "heading":
unknownEndTime = False
if l.find("<SCRIPT>") == 0: #this is the start of a script section
n = subs[-1].next()
subs.append(n)
state = "time"
timeMultiplier = defaultTimeMultiplier
script = True
keepNames = False
elif l.find("<SCRIPT names>") == 0: #also a script section, but keep the names preceding text
n = subs[-1].next()
subs.append(n)
state = "time"
timeMultiplier = defaultTimeMultiplier
script = True
keepNames = True
elif isInt(l): #this is already an SRT style timed text
n = subs[-1].next()
subs.append(n)
state = "time"
timeMultiplier = defaultTimeMultiplier
script = False
elif state == "time":
valid = "-->" in l #true if this contains the arrow, false otherwise so it can't have two times in it
times = l.replace(" ", "").split("-->")
if valid and isTime(times[0]):
subs[-1].start = parseTime(times[0])
if valid and isTime(times[1]):
subs[-1].end = parseTime(times[1])
unknownEndTime = False
else:
subs[-1].setDuration(0) #makes this a 0 length subtitle because we don't know when it ends
unknownEndTime = True
subs[-1].adjustTime(timeOffset) #account for time offset
state = "script" if script else "text"
elif state == "script":
if l == '</SCRIPT>':
script = False
state = "heading"
else:
if not keepNames: #remove names from subtitles if that option was not set
l = l.split(": ", 2)[-1]
subs[-1].text += l #add text to subtitle
if unknownEndTime:
subs[-1].setDuration(len(l) * timeMultiplier)
timeMultiplier = defaultTimeMultiplier
unknownEndTime = True
subs[-1].adjustTime(gap) #move this subtitle forward/backward in time based on the current gap between subtitles
subs.append(subs[-1].next()) #add next subtitle to the chain
elif state == "text":
if l != "":
if len(subs[-1].text) != 0: #add newline if this is a multiline subtitle
subs[-1].text += "\n"
subs[-1].text += l #add text to subtitle
if unknownEndTime: #if the end time is not specified, then approximate based on the speaking speed and length of text
subs[-1].adjustEnd(len(l) * timeMultiplier)
timeMultiplier = defaultTimeMultiplier
else:
state = "heading"
return "\n".join([x.toString() for x in subs[1:]])
def isInt(thing):
try:
i = int(thing)
return True
except:
return False
#Returns true if the string is a time like HH:MM:SS,LLL where L is a millisecond
def isTime(thing):
return re.match("\\d\\d:\\d\\d:\\d\\d,\\d{3}", thing)
#Returns a Time object given a string where isTime(string) = True
def parseTime(s):
array4 = [int(s[0:2]), int(s[3:5]), int(s[6:8]), int(s[9:12])]
return Time(array4[0], array4[1], array4[2], array4[3])
class Time:
h = 0
m = 0
s = 0
l = 0
def __init__(self, hours=0, minutes=0, seconds=0, millis=0):
self.h = hours
self.m = minutes
self.s = seconds
self.l = millis
def addTime(self, millis):
total = self.toMillis()
total += millis
self.l = total % 1000
total /= 1000
self.s = total % 60
total /= 60
self.m = total % 60
total /= 60
self.h = total % 100
def toMillis(self):
return self.l + self.s * 1000 + self.m * 1000 * 60 + self.h * 1000 * 60 * 60
def difference(self, otherTime):
return otherTime.toMillis() - self.toMillis()
def copy(self):
return Time(self.h, self.m, self.s, self.l)
def toString(self):
return "{:0>2}:{:0>2}:{:0>2},{:0>3}".format(self.h, self.m, self.s, self.l)
#Each subtitle object holds one subtitle, which can then be turned into SRT format
class Subtitle:
index = 1
start = Time()
end = Time()
text = ""
# @param i the index of this subtitle in the entire transcript
# @param s the time that this subtitle appears on screen
# @param e the time that this subtitle ends
# @param t the actual text of this subtitle, can contain newlines
def __init__(self, i=1, s=Time(), e=Time(), t=""):
self.index = i
self.start = s
self.end = e
self.text = t
def adjustStart(self, millis):
self.start.addTime(millis)
def adjustEnd(self, millis):
self.end.addTime(millis)
#Gives an offset to this subtitle
def adjustTime(self, millis):
self.adjustStart(millis)
self.adjustEnd(millis)
#Adjusts the end time of this subtitle to be <millis> milliseconds after the start time
def setDuration(self, millis):
self.adjustEnd(millis - self.getDuration())
def getDuration(self):
return self.start.difference(self.end)
def getStart(self):
return self.start.toString()
def getEnd(self):
return self.end.toString()
#Returns a template for the next subtitle in the series. Index is increased by one, duration is 100 milliseconds, and it starts right when this one ended.
def next(self):
n = Subtitle(i=(self.index + 1), s=self.end.copy(), e=self.end.copy())
n.adjustEnd(100)
return n
def toString(self):
return str(self.index) + "\n" + self.getStart() + " --> " + self.getEnd() + "\n" + self.text + "\n"
main()