-
Notifications
You must be signed in to change notification settings - Fork 1
/
transformer.go
104 lines (90 loc) · 3.38 KB
/
transformer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package vcat
import (
"encoding/json"
"encoding/xml"
"strconv"
"strings"
"time"
"github.com/mitchellh/mapstructure"
)
// getRawVideoDetailFromInitialResponse transforms the provided HTML body into a struct,
// which represents the available captions, and other metadata, of the video.
//
// It receives the initial HTML YouTube response as a byte slice.
//
// @TODO: This is a very naive function
func getRawVideoDetailFromInitialHttpResponse(b []byte) (*rawVideoDetail, error) {
dataStr := string(b)
// Unfortunately the response is in HTML, so we parse it as a string
// and only load the necessary parts as a valid JSON.
parts := strings.Split(dataStr, "\"captions\":")
if len(parts) < 2 {
return nil, ErrCaptionsNotFound
}
parts = strings.Split(parts[1], ",\"videoDetails\"")
// We also care about the title, thumbnail, etc.
metadataParts := strings.Split(parts[1], ",\"playerConfig\"")
var (
rawCaptions = parts[0]
rawMetadata = strings.Split(metadataParts[0][1:], ",\"annotations\"")[0] // Remove the ":" prefix of the string, so that it is a valid JSON
captionsMap map[string]interface{}
metadataMap map[string]interface{}
)
if err := json.Unmarshal([]byte(rawCaptions), &captionsMap); err != nil {
return nil, ErrCaptionsNotFound
}
var captions captions
if err := mapstructure.Decode(captionsMap, &captions); err != nil {
return nil, ErrCaptionsNotFound
}
if err := json.Unmarshal([]byte(rawMetadata), &metadataMap); err != nil {
return nil, ErrCaptionsNotFound
}
var metadata VideoMetadata
if err := mapstructure.Decode(metadataMap, &metadata); err != nil {
return nil, ErrCaptionsNotFound
}
return &rawVideoDetail{metadata: &metadata, captions: captions}, nil
}
// GetTranscriptFromXMLResponse transforms the provided XML body into a struct,
// which represents the available transcript for the given YouTube video.
//
// It receives the timedtext (https://www.youtube.com/api/timedtext) response as a byte slice.
func getTranscriptFromXMLResponse(b []byte) (*transcript, error) {
var transcript transcript
err := xml.Unmarshal(b, &transcript)
if err != nil {
return nil, ErrTranscriptNotFound
}
unescapeCharactersFromText(&transcript)
return parseStartTimeEndTimeTimestamps(transcript)
}
// Parses the raw transcript into a more human-readable form with normalised datetime representations.
// {"start": "0.0", "duration": "1.0", "text": "hello"} => {"start": "00:00:00", "end": "00:00:01", "duration": 1.0, "text": "hello"}
func parseStartTimeEndTimeTimestamps(t transcript) (*transcript, error) {
for i, item := range t.Text {
startTimeFloat, err := strconv.ParseFloat(item.Start, 64)
if err != nil {
return nil, err
}
var (
startOffset = time.Duration(startTimeFloat) * time.Second
duration = time.Duration(item.Duration) * time.Second
endOffset = time.Duration(startOffset + duration)
startTime time.Time
endTime time.Time
)
startTime = startTime.Add(startOffset)
endTime = endTime.Add(endOffset)
t.Text[i].Start = startTime.Format("15:04:05")
t.Text[i].End = endTime.Format("15:04:05")
}
return &t, nil
}
// unescapeCharactersFromText removes unecessary character encoding possibly present in the raw text field
func unescapeCharactersFromText(t *transcript) {
for i, txt := range t.Text {
t.Text[i].Text = strings.ReplaceAll(txt.Text, "\u0026#39;", "'")
t.Text[i].Text = strings.ReplaceAll(txt.Text, "\u0026amp;#39;", "'")
}
}