From 057970b1217d785790bdbf1a0d8d24805813e31a Mon Sep 17 00:00:00 2001 From: "F. Talha Altinel" Date: Mon, 23 Dec 2024 23:45:46 +0000 Subject: [PATCH] add YAML file input --- .gitignore | 4 +- README.md | 33 +++++++++++++++ go.mod | 13 ++++++ go.sum | 22 ++++++++++ main.go | 73 +++++++++++++++++++++++++------- synthesize/synthesize.go | 66 +++++++++++++++++++++++------ synthesize/synthesize_test.go | 67 ++++++++++++++++++++++++++++- testdata/synthesize-example.yaml | 9 ++++ testdata/synthesize.yaml | 3 ++ 9 files changed, 258 insertions(+), 32 deletions(-) create mode 100644 go.sum create mode 100644 testdata/synthesize-example.yaml create mode 100644 testdata/synthesize.yaml diff --git a/.gitignore b/.gitignore index bf9e782..7d4fbd0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -*.mp3 \ No newline at end of file +*.mp3 +.idea/ +.code/ \ No newline at end of file diff --git a/README.md b/README.md index 9b7a02e..d4ebd73 100644 --- a/README.md +++ b/README.md @@ -25,5 +25,38 @@ the goddess of the thieves, helps you steal translation speeches from the G-dadd w*W*W*W*w ``` +### Install Via Go + +```shell +go install github.com/lingua-sensei/laverna@latest +``` + +### Grab Binaries + +You can find binaries through GitHub releases. + +### Sample Usage + +Let's create example YAML. + +```yaml +- speed: normal + voice: th + text: "สวัสดีครับ" +- speed: slower + voice: en + text: "Hello there" +- speed: slowest + voice: ja + text: "こんにちは~" +``` + +Running below command will generate audios in the same directory. + +```shell +go run main.go -file example.yaml +``` + + > [!IMPORTANT] > There seems to be heavy obfuscatication over 200 characters, I am working on an algo that to crack that. \ No newline at end of file diff --git a/go.mod b/go.mod index 29af81d..09d51c6 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,16 @@ module github.com/lingua-sensei/laverna go 1.23.4 + +require ( + github.com/google/go-cmp v0.6.0 + github.com/mrwormhole/errdiff v1.1.1 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + golang.org/x/sys v0.28.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb // indirect + google.golang.org/grpc v1.69.2 // indirect + google.golang.org/protobuf v1.36.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0c74afa --- /dev/null +++ b/go.sum @@ -0,0 +1,22 @@ +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/mrwormhole/errdiff v1.1.1 h1:q9qZ3jLbSakQcBeyLY2N/Ae4z0wLJBGiqMTiWVGwOrA= +github.com/mrwormhole/errdiff v1.1.1/go.mod h1:qnClTDpmPWOsOUWnf2MlkkntSyTRQ9MbGwkRjrQk5yU= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb h1:3oy2tynMOP1QbTC0MsNNAV+Se8M2Bd0A5+x1QHyw+pI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb/go.mod h1:lcTa1sDdWEIHMWlITnIczmw5w60CF9ffkb8Z+DVmmjA= +google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU= +google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= +google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= +google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index eab0fbf..e4baccb 100644 --- a/main.go +++ b/main.go @@ -1,33 +1,76 @@ package main import ( + "flag" + "fmt" "log" + "net/http" "os" + "runtime" + "sync" "github.com/lingua-sensei/laverna/synthesize" ) -// New feature, make a CLI that accepts 1 opt or multiple opts via YAML file -// when 1 opt is used via CLI, text should be the filename -// when YAML file used, go with sequential ID name generation but there should be a default start number settable on YAML +var ( + filenamePath = flag.String("file", "", "filename path that is used for reading YAML file") + maxWorkers = flag.Int("workers", runtime.GOMAXPROCS(0), "maximum number of concurrent downloads") + generationNumber = flag.Int("n", 1, "generation number that is used in output filenames") +) func main() { - opts := synthesize.Opts{ - Text: "สวัสดีชาวโลก วันนี้เราจะมาพูดคุยกันถึงปัญหาของโลก", - Voice: synthesize.ThaiVoice, - Speed: synthesize.NormalSpeed, + flag.Parse() + if *filenamePath == "" { + flag.Usage() + os.Exit(0) + } + + raw, err := os.ReadFile(*filenamePath) + if err != nil { + log.Fatalf("[ERR] failed to read filename path: %v", err) } - audio, err := synthesize.Run(opts) + opts, err := synthesize.UnmarshalYAML(raw) if err != nil { - log.Printf("[ERR] Synthesize(%v): %v\n", opts, err) - return + log.Fatalf("[ERR] failed to unmarshal YAML: %v", err) } - const filename = "hello_world_thai_slowest.mp3" - if err := os.WriteFile(filename, audio, 0644); err != nil { - log.Printf("[ERR] os.WriteFile(%v): %v\n", audio, err) - return + c := &http.Client{} + for err := range batchSave(c, *maxWorkers, *generationNumber, opts) { + log.Printf("[WARN] failed to batch save: %v", err) } - log.Printf("[INFO] Successfully saved audio to %v\n", filename) +} + +func batchSave(client *http.Client, workerCount, generationNumber int, opts []synthesize.Opt) <-chan error { + errChan := make(chan error, len(opts)) + throttle := make(chan struct{}, workerCount) + var wg sync.WaitGroup + + for i := range opts { + wg.Add(1) + go func(generationNumber int) { + defer wg.Done() + throttle <- struct{}{} + defer func() { + <-throttle + }() + + audio, err := synthesize.Run(client, opts[i]) + if err != nil { + errChan <- fmt.Errorf("failed to run opt(%s): %w", opts[i].Text, err) + return + } + + filename := fmt.Sprintf("audio_%d.mp3", generationNumber) + if err := os.WriteFile(filename, audio, 0644); err != nil { + errChan <- fmt.Errorf("failed to write file(%s): %w", filename, err) + } + }(generationNumber + i) + } + + go func() { + wg.Wait() + close(errChan) + }() + return errChan } diff --git a/synthesize/synthesize.go b/synthesize/synthesize.go index b8f84cd..d794353 100644 --- a/synthesize/synthesize.go +++ b/synthesize/synthesize.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "gopkg.in/yaml.v3" "io" "net/http" "net/url" @@ -20,11 +21,45 @@ const ( SlowestSpeed ) -type Opts struct { - Speed Speed - Voice Voice - Text string - Client http.Client +type Opt struct { + Speed Speed + Voice Voice + Text string +} + +var ErrEmptyYAML = errors.New("empty yaml") + +// UnmarshalYAML reads raw bytes from YAML and turns into Opts +func UnmarshalYAML(raw []byte) ([]Opt, error) { + if len(raw) == 0 { + return nil, ErrEmptyYAML + } + + type YAML struct { + Speed string `yaml:"speed"` + Voice string `yaml:"voice"` + Text string `yaml:"text"` + } + var in []YAML + if err := yaml.Unmarshal(raw, &in); err != nil { + return nil, fmt.Errorf("yaml.Unmarshal(): %w", err) + } + + opts := make([]Opt, len(in)) + for i, v := range in { + switch strings.ToLower(v.Speed) { + case "normal": + opts[i].Speed = NormalSpeed + case "slower": + opts[i].Speed = SlowerSpeed + case "slowest": + + opts[i].Speed = SlowestSpeed + } + opts[i].Voice = Voice(strings.ToLower(v.Voice)) + opts[i].Text = v.Text + } + return opts, nil } // Request will look as below, since it is a form, the key is f.req @@ -47,7 +82,7 @@ type Opts struct { ] ] */ -func makeFormData(opts Opts) (string, error) { +func makeFormData(opts Opt) (string, error) { genericOpts := []any{opts.Text, opts.Voice, nil, nil, []Speed{opts.Speed}} rawOpts, err := json.Marshal(genericOpts) if err != nil { @@ -137,19 +172,22 @@ const hostname = "https://translate.google.com" var ErrTextTooLong = errors.New("text must be less than 200 chars") -func Run(opts Opts) ([]byte, error) { - const url = hostname + "/_/TranslateWebserverUi/data/batchexecute" +func Run(c *http.Client, opt Opt) ([]byte, error) { + const URL = hostname + "/_/TranslateWebserverUi/data/batchexecute" - if len(opts.Text) > 200 { + if len(opt.Text) > 200 { return nil, ErrTextTooLong } + if c == nil { + c = http.DefaultClient + } - formData, err := makeFormData(opts) + formData, err := makeFormData(opt) if err != nil { - return nil, fmt.Errorf("makeFormData(%v): %v", opts, err) + return nil, fmt.Errorf("makeFormData(%v): %v", opt, err) } - req, err := http.NewRequest(http.MethodPost, url, bytes.NewBufferString(formData)) + req, err := http.NewRequest(http.MethodPost, URL, bytes.NewBufferString(formData)) if err != nil { return nil, fmt.Errorf("http.NewRequest(): %v", err) } @@ -158,9 +196,9 @@ func Run(opts Opts) ([]byte, error) { req.Header.Set("Accept", "*/*") req.Header.Set("Origin", hostname) req.Header.Set("Referer", hostname) - resp, err := opts.Client.Do(req) + resp, err := c.Do(req) if err != nil { - return nil, fmt.Errorf("%T.Do(): %v", opts.Client, err) + return nil, fmt.Errorf("%T.Do(): %v", c, err) } defer resp.Body.Close() diff --git a/synthesize/synthesize_test.go b/synthesize/synthesize_test.go index 2593145..8a87e71 100644 --- a/synthesize/synthesize_test.go +++ b/synthesize/synthesize_test.go @@ -1,17 +1,23 @@ package synthesize import ( + "errors" + "github.com/google/go-cmp/cmp" + "github.com/mrwormhole/errdiff" + "net/http" + "os" "testing" ) func TestRun(t *testing.T) { - opts := Opts{ + c := &http.Client{} + opts := Opt{ Text: "สวัสดีชาวโลก วันนี้เราจะมาพูดคุยกันถึงปัญหาของโลก", Voice: ThaiVoice, Speed: SlowestSpeed, } - audio, err := Run(opts) + audio, err := Run(c, opts) if err != nil { t.Fatalf("Synthesize(%v): %v", opts, err) } @@ -19,3 +25,60 @@ func TestRun(t *testing.T) { t.Error("audio must not be empty") } } + +func TestOptsUnmarshalYAML(t *testing.T) { + tests := []struct { + name string + wantOpts []Opt + rawYAML func() []byte + wantErr error + }{ + { + name: "example YAML", + rawYAML: func() []byte { + const filename = "../testdata/synthesize-example.yaml" + raw, err := os.ReadFile(filename) + if err != nil { + t.Fatalf("os.ReadFile(%s): %v", filename, err) + } + return raw + }, + wantOpts: []Opt{ + { + Speed: NormalSpeed, + Voice: ThaiVoice, + Text: "สวัสดีครับ", + }, + { + Speed: SlowerSpeed, + Voice: EnglishVoice, + Text: "Hello there", + }, + { + Speed: SlowestSpeed, + Voice: JapaneseVoice, + Text: "こんにちは~", + }, + }, + }, + { + name: "empty YAML", + rawYAML: func() []byte { + return nil + }, + wantErr: errors.New("empty yaml"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := UnmarshalYAML(tt.rawYAML()) + if diff := errdiff.Check(err, tt.wantErr); diff != "" { + t.Errorf("UnmarshalYAML(): err diff=\n%s", diff) + } + + if diff := cmp.Diff(tt.wantOpts, got); diff != "" { + t.Errorf("UnmarshalYAML(): opts diff=\n%s", diff) + } + }) + } +} diff --git a/testdata/synthesize-example.yaml b/testdata/synthesize-example.yaml new file mode 100644 index 0000000..b6ff669 --- /dev/null +++ b/testdata/synthesize-example.yaml @@ -0,0 +1,9 @@ +- speed: normal + voice: th + text: "สวัสดีครับ" +- speed: slower + voice: en + text: "Hello there" +- speed: slowest + voice: ja + text: "こんにちは~" \ No newline at end of file diff --git a/testdata/synthesize.yaml b/testdata/synthesize.yaml new file mode 100644 index 0000000..b1baa36 --- /dev/null +++ b/testdata/synthesize.yaml @@ -0,0 +1,3 @@ +- speed: slowest + voice: th + text: "สวัสดีชาวโลก วันนี้เราจะมาพูดคุยกันถึงปัญหาของโลก" \ No newline at end of file