Skip to content

Commit

Permalink
centralized request middleware
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Lee <[email protected]>
  • Loading branch information
dave-gray101 committed Oct 16, 2024
1 parent 5f130fe commit 7dfddc7
Show file tree
Hide file tree
Showing 54 changed files with 904 additions and 679 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=a89f75e1b7b90cb2d4d4c52ca53ef9e9b466aa45
CPPLLAMA_VERSION?=9e041024481f6b249ab8918e18b9477f873b5a5e

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
Expand Down
2 changes: 1 addition & 1 deletion backend/python/autogptq/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
transformers
2 changes: 1 addition & 1 deletion backend/python/bark/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/common/template/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
2 changes: 1 addition & 1 deletion backend/python/coqui/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
coqui-tts
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/diffusers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
setuptools
grpcio==1.66.2
grpcio==1.67.0
pillow
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/exllama2/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
wheel
Expand Down
2 changes: 1 addition & 1 deletion backend/python/mamba/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/openvoice/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.66.2
grpcio==1.67.0
protobuf
librosa==0.9.1
faster-whisper==1.0.3
Expand Down
2 changes: 1 addition & 1 deletion backend/python/openvoice/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
librosa
faster-whisper
Expand Down
2 changes: 1 addition & 1 deletion backend/python/parler-tts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
llvmlite==0.43.0
2 changes: 1 addition & 1 deletion backend/python/rerankers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/sentencetransformers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
datasets
Expand Down
2 changes: 1 addition & 1 deletion backend/python/transformers-musicgen/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
scipy==1.14.0
certifi
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
2 changes: 1 addition & 1 deletion backend/python/vall-e-x/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/vllm/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
git clone https://github.com/vllm-project/vllm
fi
pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install
popd
Expand Down
2 changes: 1 addition & 1 deletion backend/python/vllm/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.66.2
grpcio==1.67.0
protobuf
certifi
setuptools
6 changes: 3 additions & 3 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model

var inferenceModel grpc.Backend
var err error

opts := ModelOptions(c, o, []model.Option{})
opts := ModelOptions(*c, o, []model.Option{})

if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
Expand Down Expand Up @@ -85,7 +85,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts := gRPCPredictOpts(*c, loader.ModelPath)
opts.Prompt = s
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
Expand Down
4 changes: 1 addition & 3 deletions core/backend/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)

func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {

modelFile := backendConfig.Model
func ModelTokenize(s string, modelFile string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {

var inferenceModel grpc.Backend
var err error
Expand Down
31 changes: 20 additions & 11 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,19 +433,20 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int

const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
FLAG_ANY BackendConfigUsecases = 0b0000000000
FLAG_CHAT BackendConfigUsecases = 0b0000000001
FLAG_COMPLETION BackendConfigUsecases = 0b0000000010
FLAG_EDIT BackendConfigUsecases = 0b0000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b0000001000
FLAG_RERANK BackendConfigUsecases = 0b0000010000
FLAG_IMAGE BackendConfigUsecases = 0b0000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b0001000000
FLAG_TTS BackendConfigUsecases = 0b0010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b0100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b1000000000

// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)

func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
Expand All @@ -460,6 +461,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_LLM": FLAG_LLM,
}
}
Expand Down Expand Up @@ -545,5 +547,12 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
}
}

if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
return false
}
}

return true
}
12 changes: 11 additions & 1 deletion core/config/backend_config_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
// Load a config file if present after the model name
cfg := &BackendConfig{
PredictionOptions: schema.PredictionOptions{
Model: modelName,
BasicModelRequest: schema.BasicModelRequest{
Model: modelName,
},
},
}

Expand Down Expand Up @@ -145,6 +147,14 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
return cfg, nil
}

func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
LoadOptionDebug(appConfig.Debug),
LoadOptionThreads(appConfig.Threads),
LoadOptionContextSize(appConfig.ContextSize),
LoadOptionF16(appConfig.F16))
}

// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
bcl.Lock()
Expand Down
9 changes: 5 additions & 4 deletions core/config/guesser.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ const (
type settingsConfig struct {
StopWords []string
TemplateConfig TemplateConfig
RepeatPenalty float64
RepeatPenalty float64
}

// default settings to adopt with a given model family
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
Gemma: {
RepeatPenalty: 1.0,
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
TemplateConfig: TemplateConfig{
Chat: "{{.Input }}\n<start_of_turn>model\n",
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
Expand Down Expand Up @@ -161,10 +161,11 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
}

// We try to guess only if we don't have a template defined already
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
f, err := gguf.ParseGGUFFile(guessPath)
if err != nil {
// Only valid for gguf files
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
return
}

Expand Down
12 changes: 7 additions & 5 deletions core/http/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
})
}

// Health Checks should always be exempt from auth, so register these first
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(app)

kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
Expand Down Expand Up @@ -156,13 +156,15 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
galleryService := services.NewGalleryService(appConfig)
galleryService.Start(appConfig.Context, cl)

routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
requestExtractor := middleware.NewRequestExtractor(cl, ml, appConfig)

routes.RegisterElevenLabsRoutes(app, requestExtractor, cl, ml, appConfig)
routes.RegisterLocalAIRoutes(app, requestExtractor, cl, ml, appConfig, galleryService)
routes.RegisterOpenAIRoutes(app, requestExtractor, cl, ml, appConfig)
if !appConfig.DisableWebUI {
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
}
routes.RegisterJINARoutes(app, cl, ml, appConfig)
routes.RegisterJINARoutes(app, requestExtractor, cl, ml, appConfig)

httpFS := http.FS(embedDirStatic)

Expand Down
47 changes: 0 additions & 47 deletions core/http/ctx/fiber.go

This file was deleted.

Loading

0 comments on commit 7dfddc7

Please sign in to comment.