diff --git a/.gitattributes b/.gitattributes index dfdb8b771ce0..ef774d4c65c4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ *.sh text eol=lf +backend/cpp/llama/*.hpp linguist-vendored \ No newline at end of file diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go index 23a725e32fd4..92d80a3a2ea9 100644 --- a/core/http/endpoints/localai/system.go +++ b/core/http/endpoints/localai/system.go @@ -21,10 +21,15 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf for b := range appConfig.ExternalGRPCBackends { availableBackends = append(availableBackends, b) } + + sysmodels := []schema.SysInfoModel{} + for _, m := range loadedModels { + sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID}) + } return c.JSON( schema.SystemInformationResponse{ Backends: availableBackends, - Models: loadedModels, + Models: sysmodels, }, ) } diff --git a/core/schema/localai.go b/core/schema/localai.go index 48379b747453..5206372e6cc0 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -2,7 +2,6 @@ package schema import ( "github.com/mudler/LocalAI/core/p2p" - "github.com/mudler/LocalAI/pkg/model" gopsutil "github.com/shirou/gopsutil/v3/process" ) @@ -77,7 +76,11 @@ type P2PNodesResponse struct { FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"` } +type SysInfoModel struct { + ID string `json:"id"` +} + type SystemInformationResponse struct { - Backends []string `json:"backends"` - Models []model.Model `json:"loaded_models"` + Backends []string `json:"backends"` + Models []SysInfoModel `json:"loaded_models"` } diff --git a/gallery/index.yaml b/gallery/index.yaml index 952b6d903412..482a54771b7e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -587,6 +587,21 @@ - filename: calme-3.1-llamaloi-3b.Q5_K_M.gguf sha256: 06b900c7252423329ca57a02a8b8d18a1294934709861d09af96e74694c9a3f1 uri: huggingface://MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF/calme-3.1-llamaloi-3b.Q5_K_M.gguf +- !!merge <<: *llama32 + name: "llama3.2-3b-enigma" + icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg + urls: + - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF + - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF + description: | + ValiantLabs/Llama3.2-3B-Enigma is an Enigma model built on Llama 3.2 3b. It is a high-quality code-instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated using Llama 3.1 405b and supplemented with generalist synthetic data. This model is suitable for both code-instruct and general chat applications. + overrides: + parameters: + model: Llama3.2-3B-Enigma.Q4_K_M.gguf + files: + - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf + sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4 + uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" @@ -1308,6 +1323,40 @@ - filename: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf sha256: b05dbc02eeb286c41122b103ac31431fc8dcbd80b8979422541a05cda53df61b uri: huggingface://mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF/EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "celestial-harmony-14b-v1.0-experimental-1016-i1" + urls: + - https://huggingface.co/ProdeusUnity/Celestial-Harmony-14b-v1.0-Experimental-1016 + - https://huggingface.co/mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF + description: | + Yet Another merge, this one for AuriAetherwiing, at their request. + This is a merge of pre-trained language models created using mergekit. + The following models were included in the merge: + EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1 + v000000/Qwen2.5-Lumen-14B + arcee-ai/SuperNova-Medius + overrides: + parameters: + model: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf + files: + - filename: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf + sha256: 536a6d98e30e9d52f91672daf49eeb7efe076e161a5da8beaca204adedd76864 + uri: huggingface://mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF/Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2.5-32b-arliai-rpmax-v1.3" + urls: + - https://huggingface.co/ArliAI/Qwen2.5-32B-ArliAI-RPMax-v1.3 + - https://huggingface.co/bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF + description: | + RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. + Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred. + overrides: + parameters: + model: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf + files: + - filename: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf + sha256: 51b369068b124165b1b8c253371b88b573af9dd350e331ce93d7e47b6b710003 + uri: huggingface://bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF/Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: @@ -2619,6 +2668,91 @@ - filename: Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf sha256: 92da5dad8a36ed5060becf78a83537d776079b7eaa4de73733d3ca57156286ab uri: huggingface://bartowski/Tess-R1-Limerick-Llama-3.1-70B-GGUF/Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "tess-3-llama-3.1-70b" + icon: https://huggingface.co/migtissera/Tess-M-v1.0/resolve/main/Tess.png + urls: + - https://huggingface.co/migtissera/Tess-3-Llama-3.1-70B + - https://huggingface.co/mradermacher/Tess-3-Llama-3.1-70B-GGUF + description: | + Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series created by Migel Tissera. + overrides: + parameters: + model: Tess-3-Llama-3.1-70B.Q4_K_M.gguf + files: + - filename: Tess-3-Llama-3.1-70B.Q4_K_M.gguf + sha256: 81625defcbea414282f490dd960b14afdecd7734e0d77d8db2da2bf5c21261aa + uri: huggingface://mradermacher/Tess-3-Llama-3.1-70B-GGUF/Tess-3-Llama-3.1-70B.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama3.1-8b-enigma" + icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg + urls: + - https://huggingface.co/ValiantLabs/Llama3.1-8B-Enigma + - https://huggingface.co/mradermacher/Llama3.1-8B-Enigma-GGUF + description: | + Enigma is a code-instruct model built on Llama 3.1 8b. + High quality code instruct performance within the Llama 3 Instruct chat format + Finetuned on synthetic code-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here! + Overall chat performance supplemented with generalist synthetic data. + This is the 2024-10-02 release of Enigma for Llama 3.1 8b, enhancing code-instruct and general chat capabilities. + overrides: + parameters: + model: Llama3.1-8B-Enigma.Q4_K_M.gguf + files: + - filename: Llama3.1-8B-Enigma.Q4_K_M.gguf + sha256: e98c9909ee3b74b11d50d4c4f17178502e42cd936215ede0c64a7b217ae665bb + uri: huggingface://mradermacher/Llama3.1-8B-Enigma-GGUF/Llama3.1-8B-Enigma.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama3.1-8b-cobalt" + urls: + - https://huggingface.co/ValiantLabs/Llama3.1-8B-Cobalt + - https://huggingface.co/mradermacher/Llama3.1-8B-Cobalt-GGUF + description: | + Cobalt is a math-instruct model built on Llama 3.1 8b. + High quality math instruct performance within the Llama 3 Instruct chat format + Finetuned on synthetic math-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here! + Version + This is the 2024-08-16 release of Cobalt for Llama 3.1 8b. + Help us and recommend Cobalt to your friends! We're excited for more Cobalt releases in the future. + overrides: + parameters: + model: Llama3.1-8B-Cobalt.Q4_K_M.gguf + files: + - filename: Llama3.1-8B-Cobalt.Q4_K_M.gguf + sha256: 44340f1ebbc3bf4e4e23d04ac3580c26fdc0b5717f23b45ce30743aa1eeed7ed + uri: huggingface://mradermacher/Llama3.1-8B-Cobalt-GGUF/Llama3.1-8B-Cobalt.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama-3.1-8b-arliai-rpmax-v1.3" + urls: + - https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.3 + - https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF + description: | + RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. + Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred. + overrides: + parameters: + model: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf + files: + - filename: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf + sha256: 66fcbbe96950cc3424cba866f929180d83f1bffdb0d4eedfa9b1f55cf0ea5c26 + uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "l3.1-8b-slush-i1" + icon: https://huggingface.co/crestf411/L3.1-8B-Slush/resolve/main/slush.jpg? + urls: + - https://huggingface.co/crestf411/L3.1-8B-Slush + - https://huggingface.co/mradermacher/L3.1-8B-Slush-i1-GGUF + description: | + Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge. + This is an initial experiment done on the at-this-point-infamous Llama 3.1 8B model, in an attempt to retain its smartness while addressing its abysmal lack of imagination/creativity. As always, feedback is welcome, and begone if you demand perfection. + The second stage, like the Sunfall series, follows the Silly Tavern preset, so ymmv in particular if you use some other tool and/or preset. + overrides: + parameters: + model: L3.1-8B-Slush.i1-Q4_K_M.gguf + files: + - filename: L3.1-8B-Slush.i1-Q4_K_M.gguf + sha256: 98c53cd1ec0e2b00400c5968cd076a589d0c889bca13ec52abfe4456cfa039be + uri: huggingface://mradermacher/L3.1-8B-Slush-i1-GGUF/L3.1-8B-Slush.i1-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" @@ -3382,6 +3516,21 @@ - filename: Mistral-Nemo-Prism-12B-Q4_K_M.gguf sha256: 96b922c6d55d94ffb91e869b8cccaf2b6dc449d75b1456f4d4578c92c8184c25 uri: huggingface://bartowski/Mistral-Nemo-Prism-12B-GGUF/Mistral-Nemo-Prism-12B-Q4_K_M.gguf +- !!merge <<: *mistral03 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "magnum-12b-v2.5-kto-i1" + icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/sWYs3iHkn36lw6FT_Y7nn.png + urls: + - https://huggingface.co/mradermacher/magnum-12b-v2.5-kto-i1-GGUF + description: | + v2.5 KTO is an experimental release; we are testing a hybrid reinforcement learning strategy of KTO + DPOP, using rejected data sampled from the original model as "rejected". For "chosen", we use data from the original finetuning dataset as "chosen". This was done on a limited portion of of primarily instruction following data; we plan to scale up a larger KTO dataset in the future for better generalization. This is the 5th in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of anthracite-org/magnum-12b-v2. + overrides: + parameters: + model: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf + files: + - filename: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf + sha256: 07e91d2c6d4e42312e65a69c54f16be467575f7a596fe052993b388e38b90d76 + uri: huggingface://mradermacher/magnum-12b-v2.5-kto-i1-GGUF/magnum-12b-v2.5-kto.i1-Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" diff --git a/pkg/model/loader.go b/pkg/model/loader.go index ade9944ad727..b32e3745efc9 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -103,13 +103,13 @@ FILE: return models, nil } -func (ml *ModelLoader) ListModels() []Model { +func (ml *ModelLoader) ListModels() []*Model { ml.mu.Lock() defer ml.mu.Unlock() - models := []Model{} + models := []*Model{} for _, model := range ml.models { - models = append(models, *model) + models = append(models, model) } return models