-
Notifications
You must be signed in to change notification settings - Fork 1
/
find-text
executable file
·157 lines (134 loc) · 5.08 KB
/
find-text
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# Copyright 2022 by Chris Osborn <[email protected]>
#
# This file is part of viddin.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License at <http://www.gnu.org/licenses/> for
# more details.
import argparse
import os, sys
import re
import csv
import json
import viddin
PAST_TITLE = ["producer", "guest", "starring", "directed", "produced", "written"]
def build_argparser():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("titles", help="csv file with titles")
parser.add_argument("files", nargs="+", help="Video file to guess")
parser.add_argument("--chapter", default=1, help="Chapter to search for title")
parser.add_argument("--offset", type=float,
help="Position within chapter to start searching for title")
parser.add_argument("--within", type=float, default=3 * 60.0,
help="Title is within this many seconds of beginnng of chapter")
parser.add_argument("--skip-matched", action="store_true",
help="Skip any files that already have a match in order file")
parser.add_argument("--past-title",
help="comma separated list of words that occur after the title")
parser.add_argument("--crop", help="bounding box to crop to [x,y,w,h]")
return parser
def append_order(path, epnum, text, position):
base, ext = os.path.splitext(os.path.basename(path))
if 'x' in base:
start = base.split('x')
season = int(start[0])
else:
season = epnum.season
log_dir = os.path.dirname(os.path.abspath(path))
log_path = os.path.join(log_dir, str(season) + "order.txt")
pos = position.split(" ")
row = [path, f"{epnum}{ext}", pos[0], text]
with open(log_path, "a") as f:
writer = csv.writer(f)
writer.writerow(row)
return
def main():
global PAST_TITLE
args = build_argparser().parse_args()
titles = viddin.loadEpisodeInfoFromCSV(args.titles)
# FIXME - generate list of words of all titles to quickly discard OCR words not in titles
season_titles = {}
for title in titles:
if title.airedID.season not in season_titles:
season_titles[title.airedID.season] = []
season_titles[title.airedID.season].append(title)
ep_match = []
past_words = PAST_TITLE
if args.past_title:
past_words = [x.strip() for x in args.past_title.split(",")]
bounds = None
if args.crop:
bounds = json.loads(args.crop)
for path in args.files:
season = None
base, ext = os.path.splitext(os.path.basename(path))
if 'x' in base:
start = base.split('x')
season = int(start[0])
if args.skip_matched and season is not None:
log_dir = os.path.dirname(os.path.abspath(path))
log_path = os.path.join(log_dir, str(season) + "order.txt")
if os.path.exists(log_path):
with open(log_path, newline='') as f:
reader = csv.reader(f)
matches = list(reader)
matched = False
for row in matches:
if len(row) >= 2 and path == row[0] and row[1] != ".mkv":
matched = True
break
if matched:
# Skip this video file
continue
print()
print("Working on", path)
vfile = viddin.Media(path, None)
chapters = list(vfile.chapters)
chapters.append(viddin.Chapter(vfile.length, "end"))
print("Chapter", args.chapter)
usechap = args.chapter
if isinstance(usechap, int) or re.match("^-?[0-9]+$", usechap):
usechap = int(usechap)
if usechap < 0:
usechap -= 1
usechap, _ = vfile.chapterWithID(usechap)
if usechap is None:
print("Chapter not found")
continue
searcher = viddin.OCR(vfile, season_titles[season] if season is not None else titles,
bounds=bounds, pastTitleWords=past_words)
start = chapters[usechap][0]
end = chapters[usechap+1][0]
if args.offset is not None:
offset = args.offset
if offset < 0:
start = end + offset
else:
start += args.offset
chap_len = end - start
print("Chapter len", viddin.formatTimecode(chap_len))
episode, offset, text = searcher.searchForTitleCard(start, chap_len, args.within)
if episode is not None:
timecode = viddin.formatTimecode(offset)
print(file=sys.stderr)
print(path, "is", episode, "-- Title card at", timecode,
"within", offset - start, file=sys.stderr)
ep_match.append([path, episode.dvdID])
append_order(path, episode.dvdID, text, timecode)
print("\r", end="", file=sys.stderr)
else:
append_order(path, "", str(text), "")
print()
print()
print(ep_match)
return
if __name__ == '__main__':
exit(main() or 0)