diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index e1ac1e37e991..e2ffaf0ed24a 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -83,7 +83,7 @@ jobs: ${{ env.IMAGE_NAME }} # generate Docker tags based on the following events/attributes tags: | - type=raw,value={{branch}}-{{sha}},enable={{#if branch}}true{{else}}false{{/if}} + type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }} type=schedule,pattern=nightly type=schedule,pattern={{date 'YYYYMMDD'}} type=semver,pattern={{version}} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f74cbcb34bb..66ae41847b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,25 @@ -# v0.5.0 [Unreleased] +# v0.6.0 [Unreleased] + +## Features + +# v0.5.0 ## Notice + * llama.cpp backend (CPU, Metal) now requires a redownload of gguf model due to upstream format changes: https://github.com/TabbyML/tabby/pull/645 https://github.com/ggerganov/llama.cpp/pull/3252 * Due to indexing format changes, the `~/.tabby/index` needs to be manually removed before any further runs of `tabby scheduler`. +* `TABBY_REGISTRY` is replaced with `TABBY_DOWNLOAD_HOST` for the github based registry implementation. ## Features +* Improved dashboard UI. + ## Fixes and Improvements -* Switch cpu backend to llama.cpp: https://github.com/TabbyML/tabby/pull/638 +* Cpu backend is switched to llama.cpp: https://github.com/TabbyML/tabby/pull/638 * add `server.completion_timeout` to control the code completion interface timeout: https://github.com/TabbyML/tabby/pull/637 -* Switch cuda backend to llama.cpp: https://github.com/TabbyML/tabby/pull/656 -* Switch tokenizer to llama.cpp, so tabby no longer need to download additional tokenizer file: https://github.com/TabbyML/tabby/pull/683 +* Cuda backend is switched to llama.cpp: https://github.com/TabbyML/tabby/pull/656 +* Tokenizer implementation is switched to llama.cpp, so tabby no longer need to download additional tokenizer file: https://github.com/TabbyML/tabby/pull/683 # v0.4.0 diff --git a/Cargo.lock b/Cargo.lock index 269c161e07c8..8221708146b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1176,7 +1176,7 @@ dependencies = [ [[package]] name = "http-api-bindings" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "async-trait", "futures", @@ -1489,7 +1489,7 @@ checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db" [[package]] name = "llama-cpp-bindings" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "async-stream", "async-trait", @@ -2823,7 +2823,7 @@ dependencies = [ [[package]] name = "tabby" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "anyhow", "assert-json-diff", @@ -2872,7 +2872,7 @@ dependencies = [ [[package]] name = "tabby-common" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "anyhow", "chrono", @@ -2889,7 +2889,7 @@ dependencies = [ [[package]] name = "tabby-download" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "anyhow", "async-trait", @@ -2909,7 +2909,7 @@ dependencies = [ [[package]] name = "tabby-inference" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "async-stream", "async-trait", @@ -2922,7 +2922,7 @@ dependencies = [ [[package]] name = "tabby-scheduler" -version = "0.5.0-dev" +version = "0.5.0" dependencies = [ "anyhow", "file-rotate", diff --git a/Cargo.toml b/Cargo.toml index 33506afeb906..4f5213d59cbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ members = [ ] [workspace.package] -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" authors = ["Meng Zhang"] homepage = "https://github.com/TabbyML/tabby" diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml index a7490ba589ad..7a1cabda76db 100644 --- a/crates/http-api-bindings/Cargo.toml +++ b/crates/http-api-bindings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "http-api-bindings" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" [dependencies] diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml index 1c706d47b920..e50a12ccf117 100644 --- a/crates/llama-cpp-bindings/Cargo.toml +++ b/crates/llama-cpp-bindings/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "llama-cpp-bindings" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" [features] diff --git a/crates/tabby-common/Cargo.toml b/crates/tabby-common/Cargo.toml index 02ac08b50c23..727d733cec17 100644 --- a/crates/tabby-common/Cargo.toml +++ b/crates/tabby-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tabby-common" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" [dependencies] diff --git a/crates/tabby-download/Cargo.toml b/crates/tabby-download/Cargo.toml index aaead140c25e..5992a07e53d4 100644 --- a/crates/tabby-download/Cargo.toml +++ b/crates/tabby-download/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tabby-download" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" [dependencies] diff --git a/crates/tabby-inference/Cargo.toml b/crates/tabby-inference/Cargo.toml index 7df2574307db..457ab02b9843 100644 --- a/crates/tabby-inference/Cargo.toml +++ b/crates/tabby-inference/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tabby-inference" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index 92b058856a31..088fb3839d1e 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tabby-scheduler" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index ba88d452008b..eb569132d23e 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tabby" -version = "0.5.0-dev" +version = "0.5.0" edition = "2021" [features] diff --git a/crates/tabby/src/serve/completions/prompt.rs b/crates/tabby/src/serve/completions/prompt.rs index d78f4b861ee3..b5380303ce0b 100644 --- a/crates/tabby/src/serve/completions/prompt.rs +++ b/crates/tabby/src/serve/completions/prompt.rs @@ -109,8 +109,12 @@ fn collect_snippets(index_server: &IndexServer, language: &str, text: &str) -> V let mut ret = Vec::new(); let mut tokens = tokenize_text(text); - let language_query = index_server.language_query(language).unwrap(); - let body_query = index_server.body_query(&tokens).unwrap(); + let Ok(language_query) = index_server.language_query(language) else { + return vec![]; + }; + let Ok(body_query) = index_server.body_query(&tokens) else { + return vec![]; + }; let query = BooleanQuery::new(vec![ (Occur::Must, language_query), (Occur::Must, body_query), diff --git a/website/docs/models/index.md b/website/docs/models/index.md deleted file mode 100644 index b8ef18b827ff..000000000000 --- a/website/docs/models/index.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -sidebar_position: 4 ---- - -# 🧑‍🔬 Models Directory - -## Completion models (`--model`) - -We recommend using - -* For **1B to 3B models**, it's advisable to have at least **NVIDIA T4, 10 Series, or 20 Series GPUs**. -* For **7B to 13B models**, we recommend using **NVIDIA V100, A100, 30 Series, or 40 Series GPUs**. - -| Model ID | License | Infilling Support | -| --------------------------------------------------------------------- | :-----------------------------------------------------------------------------------------: | :---------------: | -| [TabbyML/CodeLlama-13B](https://huggingface.co/TabbyML/CodeLlama-13B) | [Llama2](https://github.com/facebookresearch/llama/blob/main/LICENSE) | ✅ | -| [TabbyML/CodeLlama-7B](https://huggingface.co/TabbyML/CodeLlama-7B) | [Llama2](https://github.com/facebookresearch/llama/blob/main/LICENSE) | ✅ | -| [TabbyML/StarCoder-7B](https://huggingface.co/TabbyML/StarCoder-7B) | [BigCode-OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) | ✅ | -| [TabbyML/StarCoder-3B](https://huggingface.co/TabbyML/StarCoder-3B) | [BigCode-OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) | ✅ | -| [TabbyML/StarCoder-1B](https://huggingface.co/TabbyML/StarCoder-1B) | [BigCode-OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) | ✅ | - -## Chat models (`--chat-model`) - -To ensure optimal response quality, and given that latency requirements are not stringent in this scenario, we recommend using a model with at least 3B parameters. - -| Model ID | License | -| ----------------------------------------------------------------------- | :---------------------------------------------------------------------------------: | -| [TabbyML/Mistral-7B](https://huggingface.co/TabbyML/Mistral-7B) | [Apache 2.0](https://opensource.org/licenses/Apache-2.0) | -| [TabbyML/WizardCoder-3B](https://huggingface.co/TabbyML/WizardCoder-3B) | [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) | - -## Alternative Registry - -By default, Tabby utilizes the [Hugging Face organization](https://huggingface.co/TabbyML) as its model registry. Mainland Chinese users have encountered challenges accessing Hugging Face for various reasons. The Tabby team has established a mirrored at [modelscope](https://www.modelscope.cn/organization/TabbyML), which can be utilized using the following environment variable: - -```bash -TABBY_REGISTRY=modelscope tabby serve --model TabbyML/StarCoder-1B -``` diff --git a/website/docs/models/index.mdx b/website/docs/models/index.mdx new file mode 100644 index 000000000000..2927a2cd54cc --- /dev/null +++ b/website/docs/models/index.mdx @@ -0,0 +1,18 @@ +--- +sidebar_position: 4 +hide_table_of_contents: true +--- + +import GitHubReadme from "./readme"; + +# 🧑‍🔬 Models Registry + + + +## Model Mirrors + +Mainland Chinese users might encounter challenges accessing Hugging Face. For models with mirrored to modelscope, you could download model by utilizing following environment variable: + +```bash +TABBY_DOWNLOAD_HOST=modelscope.cn tabby serve --model TabbyML/StarCoder-1B +``` diff --git a/website/docs/models/readme.tsx b/website/docs/models/readme.tsx new file mode 100644 index 000000000000..9344318d63bb --- /dev/null +++ b/website/docs/models/readme.tsx @@ -0,0 +1,74 @@ +import React, { useState, useEffect } from "react"; +import { marked } from "marked"; + +const GitHubReadme: React.FC<{ + src?: string; +}> = ({ + src, +}) => { + if (!src) { + console.error( + "react-github-readme-md: You must provide either a src or username and repo" + ); + return null; + } + + const [readmeContent, setReadmeContent] = useState(""); + + useEffect(() => { + // Function to fetch the README content from GitHub + const fetchReadme = async () => { + try { + let readmeUrl = ""; + + if (src) { + // Allow passing a URL directly as a prop + readmeUrl = src; + } + + if (!readmeUrl) { + throw new Error("Failed to fetch README path"); + } + + const response = await fetch(readmeUrl); + + if (!response.ok) { + throw new Error("Failed to fetch README"); + } + + const data = await response.text(); + + if (data) { + setReadmeContent(data.split("\n").splice(1).join("\n")); + } + } catch (error) { + console.error("react-github-readme-md: ", error); + } + }; + + fetchReadme(); + }, []); + + if (!readmeContent) { + return null; + } + + // Parse the markdown content into HTML + try { + const ghContent = marked.parse(readmeContent); + return ( +
+
+
+ ); + } catch (error) { + console.error("react-github-readme-md: ", error); + return null; + } + }; + +export default GitHubReadme; \ No newline at end of file diff --git a/website/package.json b/website/package.json index 98a32d9dfb63..512ff7b241f7 100644 --- a/website/package.json +++ b/website/package.json @@ -22,6 +22,7 @@ "axios": "^1.4.0", "clsx": "^1.2.1", "docusaurus-preset-openapi": "^0.6.4", + "marked": "^9.1.5", "postcss": "^8.4.24", "posthog-docusaurus": "^2.0.0", "prism-react-renderer": "^1.3.5", diff --git a/website/yarn.lock b/website/yarn.lock index afb00735bd71..be2c46dd4b5e 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -5559,6 +5559,11 @@ marked@2.0.1: resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.1.tgz#5e7ed7009bfa5c95182e4eb696f85e948cefcee3" integrity sha512-5+/fKgMv2hARmMW7DOpykr2iLhl0NgjyELk5yn92iE7z8Se1IS9n3UsFm86hFXIkvMBmVxki8+ckcpjBeyo/hw== +marked@^9.1.5: + version "9.1.5" + resolved "https://registry.yarnpkg.com/marked/-/marked-9.1.5.tgz#fcada4702ea64a5c05a4ff0e0639628aac8a1e5f" + integrity sha512-14QG3shv8Kg/xc0Yh6TNkMj90wXH9mmldi5941I2OevfJ/FQAFLEwtwU2/FfgSAOMlWHrEukWSGQf8MiVYNG2A== + mdast-squeeze-paragraphs@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/mdast-squeeze-paragraphs/-/mdast-squeeze-paragraphs-4.0.0.tgz#7c4c114679c3bee27ef10b58e2e015be79f1ef97"