-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtitle-cleanup
executable file
·35 lines (31 loc) · 1.07 KB
/
subtitle-cleanup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
HOMEBASE=$(pwd)
__exists () {
which $1 >/dev/null 2>&1;
}
__exists wget && getCommand="wget" && getArgs="-O"
__exists fetch && getCommand="fetch" && getArgs="-o"
grab() {
$getCommand $getArgs "$1" "$2" || exit $?
}
utf8() {
iconv -f latin1 -t UTF-8 -o "$1"
}
find -name '*html' | while read file; do
id=`echo $file | sed -e 's/.*\/\([-0-9]*\)*.html/\1/'`
dir=`dirname $file`
xml="$id.xml"
cd $dir
grab - "http://video.google.com/videotranscript?docid=$id&type=list" | utf8 "$xml"
cat -- "$xml" | tr ">" "\n" | grep "^<track " | while read line; do
tid=`echo $line | sed -n -e 's/.*id="\([^"]*\)".*/\1/p'`
name=`echo $line | sed -n -e 's/.*name="\([^"]*\)".*/\1/p'`
lang=`echo $line | sed -n -e 's/.*lang_code="\([^"]*\)".*/\1/p'`
lang_name=`echo $line | sed -n -e 's/.*lang_translated="\([^"]*\)".*/\1/p'`
echo "Found $lang_name subtitle track $tid named $name..."
tfile="$id.$tid.xml"
grab - "http://video.google.com/videotranscript?docid=$id&type=track&name=$name&lang=$lang" | utf8 "$tfile"
done
cd $HOMEBASE
sleep 1
done