Merge branch 'master' into feat-request-middleware

mudler · Nov 14, 2024 · 990e752 · 990e752
2 parents 833bd99 + de2b574
commit 990e752
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 7 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,2 @@
 *.sh text eol=lf
+backend/cpp/llama/*.hpp linguist-vendored
diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
@@ -21,10 +21,15 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
 		}
+
+		sysmodels := []schema.SysInfoModel{}
+		for _, m := range loadedModels {
+			sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID})
+		}
 		return c.JSON(
 			schema.SystemInformationResponse{
 				Backends: availableBackends,
-				Models:   loadedModels,
+				Models:   sysmodels,
 			},
 		)
 	}

diff --git a/core/schema/localai.go b/core/schema/localai.go
@@ -2,7 +2,6 @@ package schema
 
 import (
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/pkg/model"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )
 
@@ -77,7 +76,11 @@ type P2PNodesResponse struct {
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }
 
+type SysInfoModel struct {
+	ID string `json:"id"`
+}
+
 type SystemInformationResponse struct {
-	Backends []string      `json:"backends"`
-	Models   []model.Model `json:"loaded_models"`
+	Backends []string       `json:"backends"`
+	Models   []SysInfoModel `json:"loaded_models"`
 }
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -587,6 +587,21 @@
     - filename: calme-3.1-llamaloi-3b.Q5_K_M.gguf
       sha256: 06b900c7252423329ca57a02a8b8d18a1294934709861d09af96e74694c9a3f1
       uri: huggingface://MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF/calme-3.1-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+  description: |
+    ValiantLabs/Llama3.2-3B-Enigma is an Enigma model built on Llama 3.2 3b. It is a high-quality code-instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated using Llama 3.1 405b and supplemented with generalist synthetic data. This model is suitable for both code-instruct and general chat applications.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
+      sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
+      uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"
@@ -1308,6 +1323,40 @@
     - filename: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
       sha256: b05dbc02eeb286c41122b103ac31431fc8dcbd80b8979422541a05cda53df61b
       uri: huggingface://mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF/EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "celestial-harmony-14b-v1.0-experimental-1016-i1"
+  urls:
+    - https://huggingface.co/ProdeusUnity/Celestial-Harmony-14b-v1.0-Experimental-1016
+    - https://huggingface.co/mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF
+  description: |
+    Yet Another merge, this one for AuriAetherwiing, at their request.
+    This is a merge of pre-trained language models created using mergekit.
+    The following models were included in the merge:
+        EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+        v000000/Qwen2.5-Lumen-14B
+        arcee-ai/SuperNova-Medius
+  overrides:
+    parameters:
+      model: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+  files:
+    - filename: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+      sha256: 536a6d98e30e9d52f91672daf49eeb7efe076e161a5da8beaca204adedd76864
+      uri: huggingface://mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF/Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Qwen2.5-32B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 51b369068b124165b1b8c253371b88b573af9dd350e331ce93d7e47b6b710003
+      uri: huggingface://bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF/Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:
@@ -2619,6 +2668,91 @@
     - filename: Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
       sha256: 92da5dad8a36ed5060becf78a83537d776079b7eaa4de73733d3ca57156286ab
       uri: huggingface://bartowski/Tess-R1-Limerick-Llama-3.1-70B-GGUF/Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tess-3-llama-3.1-70b"
+  icon: https://huggingface.co/migtissera/Tess-M-v1.0/resolve/main/Tess.png
+  urls:
+    - https://huggingface.co/migtissera/Tess-3-Llama-3.1-70B
+    - https://huggingface.co/mradermacher/Tess-3-Llama-3.1-70B-GGUF
+  description: |
+    Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series created by Migel Tissera.
+  overrides:
+    parameters:
+      model: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+  files:
+    - filename: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+      sha256: 81625defcbea414282f490dd960b14afdecd7734e0d77d8db2da2bf5c21261aa
+      uri: huggingface://mradermacher/Tess-3-Llama-3.1-70B-GGUF/Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Enigma
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Enigma-GGUF
+  description: |
+    Enigma is a code-instruct model built on Llama 3.1 8b.
+    High quality code instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic code-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Overall chat performance supplemented with generalist synthetic data.
+    This is the 2024-10-02 release of Enigma for Llama 3.1 8b, enhancing code-instruct and general chat capabilities.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Enigma.Q4_K_M.gguf
+      sha256: e98c9909ee3b74b11d50d4c4f17178502e42cd936215ede0c64a7b217ae665bb
+      uri: huggingface://mradermacher/Llama3.1-8B-Enigma-GGUF/Llama3.1-8B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-cobalt"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Cobalt
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Cobalt-GGUF
+  description: |
+    Cobalt is a math-instruct model built on Llama 3.1 8b.
+    High quality math instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic math-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Version
+    This is the 2024-08-16 release of Cobalt for Llama 3.1 8b.
+    Help us and recommend Cobalt to your friends! We're excited for more Cobalt releases in the future.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+      sha256: 44340f1ebbc3bf4e4e23d04ac3580c26fdc0b5717f23b45ce30743aa1eeed7ed
+      uri: huggingface://mradermacher/Llama3.1-8B-Cobalt-GGUF/Llama3.1-8B-Cobalt.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 66fcbbe96950cc3424cba866f929180d83f1bffdb0d4eedfa9b1f55cf0ea5c26
+      uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-slush-i1"
+  icon: https://huggingface.co/crestf411/L3.1-8B-Slush/resolve/main/slush.jpg?
+  urls:
+    - https://huggingface.co/crestf411/L3.1-8B-Slush
+    - https://huggingface.co/mradermacher/L3.1-8B-Slush-i1-GGUF
+  description: |
+    Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge.
+    This is an initial experiment done on the at-this-point-infamous Llama 3.1 8B model, in an attempt to retain its smartness while addressing its abysmal lack of imagination/creativity. As always, feedback is welcome, and begone if you demand perfection.
+    The second stage, like the Sunfall series, follows the Silly Tavern preset, so ymmv in particular if you use some other tool and/or preset.
+  overrides:
+    parameters:
+      model: L3.1-8B-Slush.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Slush.i1-Q4_K_M.gguf
+      sha256: 98c53cd1ec0e2b00400c5968cd076a589d0c889bca13ec52abfe4456cfa039be
+      uri: huggingface://mradermacher/L3.1-8B-Slush-i1-GGUF/L3.1-8B-Slush.i1-Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -3382,6 +3516,21 @@
     - filename: Mistral-Nemo-Prism-12B-Q4_K_M.gguf
       sha256: 96b922c6d55d94ffb91e869b8cccaf2b6dc449d75b1456f4d4578c92c8184c25
       uri: huggingface://bartowski/Mistral-Nemo-Prism-12B-GGUF/Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "magnum-12b-v2.5-kto-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/sWYs3iHkn36lw6FT_Y7nn.png
+  urls:
+    - https://huggingface.co/mradermacher/magnum-12b-v2.5-kto-i1-GGUF
+  description: |
+    v2.5 KTO is an experimental release; we are testing a hybrid reinforcement learning strategy of KTO + DPOP, using rejected data sampled from the original model as "rejected". For "chosen", we use data from the original finetuning dataset as "chosen". This was done on a limited portion of of primarily instruction following data; we plan to scale up a larger KTO dataset in the future for better generalization. This is the 5th in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of anthracite-org/magnum-12b-v2.
+  overrides:
+    parameters:
+      model: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+      sha256: 07e91d2c6d4e42312e65a69c54f16be467575f7a596fe052993b388e38b90d76
+      uri: huggingface://mradermacher/magnum-12b-v2.5-kto-i1-GGUF/magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

diff --git a/pkg/model/loader.go b/pkg/model/loader.go
@@ -103,13 +103,13 @@ FILE:
 	return models, nil
 }
 
-func (ml *ModelLoader) ListModels() []Model {
+func (ml *ModelLoader) ListModels() []*Model {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 
-	models := []Model{}
+	models := []*Model{}
 	for _, model := range ml.models {
-		models = append(models, *model)
+		models = append(models, model)
 	}
 
 	return models