-
Notifications
You must be signed in to change notification settings - Fork 12
/
grep-verse-from-textgrid.sh
executable file
·30 lines (25 loc) · 1.16 KB
/
grep-verse-from-textgrid.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/bash
TEXTGRID=$1
TXT_VERSES=$(echo $TEXTGRID| sed 's/mfa-output\///g'|sed 's/TextGrid/txt/g')
GRIDNAME=$(echo $TEXTGRID | rev | cut -d '/' -f1|rev)
BOOK="$( echo $TXT_VERSES| sed 's/.txt//'|rev |cut -d'/' -f1|rev )"
cat $TEXTGRID |tr '\n' ' ' |grep -o "words.*phones"|tr -s ' '| sed 's/\"\"/SPACE/g' | sed -E "s/intervals \[[0-9]+\]: //g" | sed 's/xmin = //g' |sed 's/ xmax = /,/g' |sed 's/ text = /,/g' |tr -d '"' > /tmp/$GRIDNAME
SPACE_REGEX="[0-9]+\.*[0-9]*,[0-9]+\.*[0-9]*,SPACE"
WORD_REGEX="[0-9]+\.*[0-9]*,[0-9]+\.*[0-9]*"
while read VERSE; do
VERSE_REGEX="($SPACE_REGEX)?"
for WORD in $VERSE; do
VERSE_REGEX="$VERSE_REGEX ${WORD_REGEX},${WORD}( $SPACE_REGEX)?"
done
FULL_REGEX=$VERSE_REGEX
FOUND="$(grep -P -o "$FULL_REGEX" /tmp/$GRIDNAME )"
if [[ ! -z $FOUND ]]; then
FIRST=$(echo $FOUND|cut -d',' -f3|cut -d' ' -f1)
LAST=$(echo $FOUND|rev|cut -d',' -f1|rev)
if [[ $FIRST == "SPACE" ]] && [[ $LAST == "SPACE" ]];then
START=$(echo $FOUND|cut -d',' -f1)
END=$(echo $FOUND|rev|cut -d',' -f2|rev)
echo "$BOOK $START,$END $VERSE" >> ${TEXTGRID/.TextGrid/_best_verses.tsv}
fi
fi
done<$TXT_VERSES