From 3b35d444a9ea972a4cf1c6c3b664c6a06c380c36 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 13 Nov 2024 18:59:59 +0100 Subject: [PATCH] feat(vad): add silero-vad backend (WIP) Signed-off-by: Ettore Di Giacinto --- Makefile | 16 +++++++++++ backend/go/vad/silero/main.go | 21 ++++++++++++++ backend/go/vad/silero/vad.go | 54 +++++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 2 ++ 5 files changed, 94 insertions(+) create mode 100644 backend/go/vad/silero/main.go create mode 100644 backend/go/vad/silero/vad.go diff --git a/Makefile b/Makefile index 5f9e7b3205f..2ff1ece2a06 100644 --- a/Makefile +++ b/Makefile @@ -275,6 +275,15 @@ sources/go-stable-diffusion: sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a +sources/onnxruntime: + mkdir -p sources/onnxruntime + wget https://github.com/microsoft/onnxruntime/releases/download/v1.20.0/onnxruntime-linux-x64-1.20.0.tgz -O sources/onnxruntime/onnxruntime-linux-x64-1.20.0.tgz + cd sources/onnxruntime && tar -xvf onnxruntime-linux-x64-1.20.0.tgz && rm onnxruntime-linux-x64-1.20.0.tgz + cd sources/onnxruntime && mv onnxruntime-linux-x64-1.20.0/* ./ + +backend-assets/lib/libonnxruntime.so: backend-assets/lib sources/onnxruntime + cp -rfv sources/onnxruntime/lib/* backend-assets/lib/ + ## tiny-dream sources/go-tiny-dream: mkdir -p sources/go-tiny-dream @@ -831,6 +840,13 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/stablediffusion endif +backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so + CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero +ifneq ($(UPX),) + $(UPX) backend-assets/grpc/silero-vad +endif + backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream diff --git a/backend/go/vad/silero/main.go b/backend/go/vad/silero/main.go new file mode 100644 index 00000000000..28f51e49298 --- /dev/null +++ b/backend/go/vad/silero/main.go @@ -0,0 +1,21 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each model + +import ( + "flag" + + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + flag.Parse() + + if err := grpc.StartServer(*addr, &VAD{}); err != nil { + panic(err) + } +} diff --git a/backend/go/vad/silero/vad.go b/backend/go/vad/silero/vad.go new file mode 100644 index 00000000000..5a164d2a858 --- /dev/null +++ b/backend/go/vad/silero/vad.go @@ -0,0 +1,54 @@ +package main + +// This is a wrapper to statisfy the GRPC service interface +// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) +import ( + "fmt" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "github.com/streamer45/silero-vad-go/speech" +) + +type VAD struct { + base.SingleThread + detector *speech.Detector +} + +func (vad *VAD) Load(opts *pb.ModelOptions) error { + v, err := speech.NewDetector(speech.DetectorConfig{ + ModelPath: opts.ModelFile, + SampleRate: 16000, + //WindowSize: 1024, + Threshold: 0.5, + MinSilenceDurationMs: 0, + SpeechPadMs: 0, + }) + if err != nil { + return fmt.Errorf("create silero detector: %w", err) + } + + vad.detector = v + return err +} + +func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) { + audio := req.Audio + + segments, err := vad.detector.Detect(audio) + if err != nil { + return pb.VADResponse{}, fmt.Errorf("detect: %w", err) + } + + vadSegments := []*pb.VADSegment{} + for _, s := range segments { + vadSegments = append(vadSegments, &pb.VADSegment{ + Start: float32(s.SpeechStartAt), + End: float32(s.SpeechEndAt), + }) + } + + return pb.VADResponse{ + Segments: vadSegments, + }, nil +} diff --git a/go.mod b/go.mod index 57373450876..de5fe0b8080 100644 --- a/go.mod +++ b/go.mod @@ -108,6 +108,7 @@ require ( github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/savsgio/gotils v0.0.0-20240303185622-093b76447511 // indirect github.com/shirou/gopsutil/v4 v4.24.7 // indirect + github.com/streamer45/silero-vad-go v0.2.1 // indirect github.com/urfave/cli/v2 v2.27.4 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/wlynxg/anet v0.0.4 // indirect diff --git a/go.sum b/go.sum index a2ee8c1fa31..13ef9a73be5 100644 --- a/go.sum +++ b/go.sum @@ -737,6 +737,8 @@ github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2 github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= +github.com/streamer45/silero-vad-go v0.2.1 h1:Li1/tTC4H/3cyw6q4weX+U8GWwEL3lTekK/nYa1Cvuk= +github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdOB+3saBVzjOzdZnUs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=