Skip to content

Commit

Permalink
Merge branch 'main' into rocm-release
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/release.yml
#	crates/llama-cpp-bindings/src/llama.rs
  • Loading branch information
cromefire committed Dec 12, 2023
2 parents a5e69ca + ca80413 commit d4d38d7
Show file tree
Hide file tree
Showing 66 changed files with 354 additions and 237 deletions.
37 changes: 32 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ jobs:
container: ${{ matrix.container }}
strategy:
matrix:
binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-rocm5.7]
binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117,
x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm5.7]
include:
- os: macos-latest
target: aarch64-apple-darwin
Expand All @@ -40,6 +41,18 @@ jobs:
binary: x86_64-manylinux2014-cuda117
container: sameli/manylinux2014_x86_64_cuda_11.7
build_args: --features cuda
- os: windows-latest
target: x86_64-pc-windows-msvc
binary: x86_64-windows-msvc-cuda117
ext: .exe
build_args: --features cuda
windows_cuda: '11.7.1'
- os: windows-latest
target: x86_64-pc-windows-msvc
binary: x86_64-windows-msvc-cuda122
ext: .exe
build_args: --features cuda
windows_cuda: '12.2.0'
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
binary: x86_64-manylinux2014-rocm5.7
Expand Down Expand Up @@ -83,20 +96,34 @@ jobs:
~/.cargo/registry
~/.cargo/git
- run: bash ./ci/prepare_build_environment.sh
- name: Prepare build environment for macOS & Linux
run: bash ./ci/prepare_build_environment.sh
if: runner.os != 'Windows'

- name: Prepare build environment for Windows
run: ./ci/prepare_build_environment.ps1
if: runner.os == 'Windows'

- name: Install CUDA toolkit for Windows
uses: Jimver/[email protected]
with:
cuda: ${{ matrix.windows_cuda }}
method: 'network'
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
if: runner.os == 'Windows'

- name: Bulid release binary
run: cargo build ${{ matrix.build_args }} --release --target ${{ matrix.target }} --package tabby

- name: Rename release binary
run: mv target/${{ matrix.target }}/release/tabby tabby_${{ matrix.binary }}
run: mv target/${{ matrix.target }}/release/tabby${{ matrix.ext }} tabby_${{ matrix.binary }}${{ matrix.ext }}

- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
retention-days: 3
name: tabby_${{ matrix.binary }}
path: tabby_${{ matrix.binary }}
name: tabby_${{ matrix.binary }}${{ matrix.ext }}
path: tabby_${{ matrix.binary }}${{ matrix.ext }}

pre-release:
if: github.event_name == 'push'
Expand Down
53 changes: 43 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ utoipa = "3.3"
axum = "0.6"
hyper = "0.14"
juniper = "0.15"

[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3
1 change: 1 addition & 0 deletions ci/prepare_build_environment.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
choco install --yes protoc
39 changes: 34 additions & 5 deletions crates/llama-cpp-bindings/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ fn main() {
"Please init submodules with `git submodule update --init --recursive` and try again"
);

println!("cargo:rerun-if-changed=cc/*.h");
println!("cargo:rerun-if-changed=cc/*.cc");
println!("cargo:rerun-if-changed=include/engine.h");
println!("cargo:rerun-if-changed=src/engine.cc");
println!("cargo:rustc-link-lib=llama");
println!("cargo:rustc-link-lib=ggml_static");

build_llama_cpp();
build_cxx_binding();
}

fn build_llama_cpp() {
let mut config = Config::new("llama.cpp");
if cfg!(target_os = "macos") {
config.define("LLAMA_METAL", "ON");
Expand All @@ -26,9 +31,16 @@ fn main() {
if cfg!(feature = "cuda") {
config.define("LLAMA_CUBLAS", "ON");
config.define("CMAKE_POSITION_INDEPENDENT_CODE", "ON");
println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
if cfg!(target_os = "windows") {
let Ok(cuda_path) = env::var("CUDA_PATH") else {
panic!("CUDA_PATH is not set");
};
println!(r"cargo:rustc-link-search=native={}\lib\x64", cuda_path);
} else {
println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
println!("cargo:rustc-link-lib=culibos");
}
println!("cargo:rustc-link-lib=cudart");
println!("cargo:rustc-link-lib=culibos");
println!("cargo:rustc-link-lib=cublas");
println!("cargo:rustc-link-lib=cublasLt");
}
Expand Down Expand Up @@ -71,9 +83,26 @@ fn main() {
println!("cargo:rustc-link-lib=hipblas");
}

// By default, this value is automatically inferred from Rust’s compilation profile.
// For Windows platform, we always build llama.cpp in release mode.
// See https://github.com/TabbyML/tabby/pull/948 for more details.
if cfg!(target_os = "windows") {
config.profile("Release");
}

let dst = config.build();
println!("cargo:rustc-link-search=native={}/build", dst.display());
if cfg!(target_os = "windows") {
println!(
r"cargo:rustc-link-search=native={}\build\{}",
dst.display(),
config.get_profile()
);
} else {
println!("cargo:rustc-link-search=native={}/build", dst.display());
}
}

fn build_cxx_binding() {
cxx_build::bridge("src/lib.rs")
.file("src/engine.cc")
.include("include")
Expand Down
3 changes: 1 addition & 2 deletions crates/llama-cpp-bindings/src/engine.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#include "engine.h"

#include <functional>
#include <vector>
#include <deque>
Expand All @@ -9,6 +7,7 @@
#include <ggml.h>
#include <llama.h>

#include "llama-cpp-bindings/include/engine.h"
#include "llama-cpp-bindings/src/lib.rs.h"

namespace llama {
Expand Down
2 changes: 1 addition & 1 deletion crates/llama-cpp-bindings/src/llama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ impl LlamaServiceImpl {
};

for ffi::StepOutput { request_id, text } in result {
let mut stopped;
let mut stopped: bool;
let LlamaRunningRequest { tx, stop_condition } =
self.requests.get_mut(&request_id).unwrap();

Expand Down
1 change: 1 addition & 0 deletions crates/tabby-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ edition = "2021"

[dependencies]
filenamify = "0.1.0"
home = "0.5.5"
lazy_static = { workspace = true }
serde = { workspace = true }
serdeconv = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/tabby-common/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ lazy_static! {
static ref TABBY_ROOT: Mutex<Cell<PathBuf>> = {
Mutex::new(Cell::new(match env::var("TABBY_ROOT") {
Ok(x) => PathBuf::from(x),
Err(_) => PathBuf::from(env::var("HOME").unwrap()).join(".tabby"),
Err(_) => home::home_dir().unwrap().join(".tabby"),
}))
};
}
Expand Down
1 change: 1 addition & 0 deletions crates/tabby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,6 @@ vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] }

[dev-dependencies]
assert-json-diff = "2.0.2"
insta = { version = "1.34.0", features = ["yaml", "redactions"] }
reqwest.workspace = true
serde-jsonlines = "0.5.0"
7 changes: 3 additions & 4 deletions crates/tabby/src/routes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
mod metrics;

use std::{
net::{Ipv4Addr, SocketAddr},
net::{IpAddr, SocketAddr},
sync::Arc,
};

Expand All @@ -14,7 +14,7 @@ use tracing::info;

use crate::fatal;

pub async fn run_app(api: Router, ui: Option<Router>, port: u16) {
pub async fn run_app(api: Router, ui: Option<Router>, host: IpAddr, port: u16) {
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
let app = api
.layer(CorsLayer::permissive())
Expand All @@ -31,9 +31,8 @@ pub async fn run_app(api: Router, ui: Option<Router>, port: u16) {
app
};

let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, port));
let address = SocketAddr::from((host, port));
info!("Listening at {}", address);

Server::bind(&address)
.serve(app.into_make_service_with_connect_info::<SocketAddr>())
.await
Expand Down
9 changes: 6 additions & 3 deletions crates/tabby/src/serve.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{sync::Arc, time::Duration};
use std::{net::IpAddr, sync::Arc, time::Duration};

use axum::{routing, Router};
use clap::Args;
Expand Down Expand Up @@ -81,6 +81,9 @@ pub struct ServeArgs {
#[clap(long)]
chat_model: Option<String>,

#[clap(long, default_value = "0.0.0.0")]
host: IpAddr,

#[clap(long, default_value_t = 8080)]
port: u16,

Expand Down Expand Up @@ -108,7 +111,7 @@ pub async fn main(config: &Config, args: &ServeArgs) {
#[cfg(not(feature = "experimental-http"))]
load_model(args).await;

info!("Starting server, this might takes a few minutes...");
info!("Starting server, this might take a few minutes...");

let logger = Arc::new(create_logger());
let code = Arc::new(create_code_search());
Expand All @@ -129,7 +132,7 @@ pub async fn main(config: &Config, args: &ServeArgs) {
let ui = ui.fallback(|| async { axum::response::Redirect::permanent("/swagger-ui") });

start_heartbeat(args);
run_app(api, Some(ui), args.port).await
run_app(api, Some(ui), args.host, args.port).await
}

async fn load_model(args: &ServeArgs) {
Expand Down
Loading

0 comments on commit d4d38d7

Please sign in to comment.