Skip to content

Commit

Permalink
chore: add foundationdb to dev-full & document avx support (#1426)
Browse files Browse the repository at this point in the history
Fixes RVT-4097
  • Loading branch information
NathanFlurry committed Nov 23, 2024
1 parent 47d01df commit f6708f3
Show file tree
Hide file tree
Showing 20 changed files with 195 additions and 53 deletions.
28 changes: 18 additions & 10 deletions docker/client/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# MARK: Builder
FROM rust:1.82.0-bullseye AS builder
# The FDB version should match `cluster::workflows::server::install::install_scripts::components::fdb::FDB_VERSION`

RUN apt-get update && apt-get install --yes protobuf-compiler pkg-config libssl-dev g++ git libpq-dev wget && \
ln -s /bin/g++ /bin/musl-g++ && \
ln -s /bin/gcc-ar /bin/musl-ar
# TODO(RVT-4168): Copmile libfdb from scratch for ARM
FROM --platform=linux/amd64 rust:1.82.0-bullseye AS builder

RUN apt-get update && apt-get install --yes libclang-dev protobuf-compiler pkg-config libssl-dev g++ git libpq-dev wget curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"

WORKDIR /app
COPY . .
Expand All @@ -18,19 +20,25 @@ RUN \
mv target/release/rivet-client target/release/rivet-isolate-v8-runner target/release/rivet-container-runner /app/dist/

# MARK: Runner (Full)
FROM debian:12-slim AS full-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y --no-install-recommends ca-certificates
FROM --platform=linux/amd64 debian:12-slim AS full-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
apt-get install -y --no-install-recommends ca-certificates curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
COPY --from=builder /app/dist/rivet-client /app/dist/rivet-isolate-v8-runner /app/dist/rivet-container-runner /usr/local/bin/
ENTRYPOINT ["rivet-client"]

# MARK: Runner (Isolate V8)
FROM debian:12-slim AS isolate-v8-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y --no-install-recommends ca-certificates
FROM --platform=linux/amd64 debian:12-slim AS isolate-v8-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
apt-get install -y --no-install-recommends ca-certificates curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
COPY --from=builder /app/dist/rivet-isolate-v8-runner /usr/local/bin/
ENTRYPOINT ["rivet-client"]

# MARK: Runner (Container)
FROM debian:12-slim AS container-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y --no-install-recommends ca-certificates
FROM --platform=linux/amd64 debian:12-slim AS container-runner
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
apt-get install -y --no-install-recommends ca-certificates curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
COPY --from=builder /app/dist/rivet-container-runner /usr/local/bin/
ENTRYPOINT ["rivet-client"]
17 changes: 11 additions & 6 deletions docker/dev-full/client.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# MARK: Builder
FROM rust:1.82.0-bullseye AS builder
# TODO(RVT-4168): Copmile libfdb from scratch for ARM
FROM --platform=linux/amd64 rust:1.82.0-bullseye AS builder

RUN apt-get update && apt-get install --yes protobuf-compiler pkg-config libssl-dev g++ git libpq-dev wget && \
ln -s /bin/g++ /bin/musl-g++ && \
ln -s /bin/gcc-ar /bin/musl-ar
RUN apt-get update && apt-get install --yes libclang-dev protobuf-compiler pkg-config libssl-dev g++ git wget curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"

WORKDIR /app
COPY . .
Expand All @@ -18,8 +18,13 @@ RUN \
mv target/debug/rivet-client target/debug/rivet-isolate-v8-runner target/debug/rivet-container-runner /app/dist/

# MARK: Runner
FROM debian:12-slim
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && apt-get install -y --no-install-recommends ca-certificates
FROM --platform=linux/amd64 debian:12-slim
# The FDB version should match `cluster::workflows::server::install::install_scripts::components::fdb::FDB_VERSION`
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
apt-get install -y --no-install-recommends ca-certificates curl && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so" && \
curl -Lf -o /usr/local/bin/fdbcli "https://github.com/apple/foundationdb/releases/download/7.1.60/fdbcli.x86_64" && \
chmod +x /usr/local/bin/fdbcli
COPY --from=builder /app/dist/rivet-client /app/dist/rivet-isolate-v8-runner /app/dist/rivet-container-runner /usr/local/bin/
ENTRYPOINT ["rivet-client"]
CMD ["-c", "/etc/rivet-client/config.json"]
26 changes: 24 additions & 2 deletions docker/dev-full/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,15 @@ services:
build:
context: ../..
dockerfile: docker/dev-full/client.Dockerfile
# TODO(RVT-4168): Copmile libfdb from scratch for ARM
platform: linux/amd64
command: -c /etc/rivet-client/config.yaml
environment:
- RUST_BACKTRACE=1
stop_grace_period: 0s
depends_on:
foundationdb:
condition: service_healthy
volumes:
- ./rivet-client:/etc/rivet-client:ro
- client-data:/var/lib/rivet-client
Expand Down Expand Up @@ -153,6 +158,22 @@ services:
interval: 2s
timeout: 10s

foundationdb:
# See docs-internal/infrastructure/fdb/AVX.md
image: foundationdb/foundationdb:7.1.60
platform: linux/amd64
entrypoint: ["/usr/bin/tini", "-g", "--", "/usr/local/bin/entrypoint.sh"]
volumes:
- ./foundationdb/entrypoint.sh:/usr/local/bin/entrypoint.sh
- foundationdb-data:/var/fdb:rw
healthcheck:
test: ["CMD", "fdbcli", "--exec", "status"]
interval: 2s
timeout: 10s
retries: 10
networks:
- rivet-network

vector-client:
image: timberio/vector:0.42.0-distroless-static
command: -C /etc/vector
Expand All @@ -167,7 +188,7 @@ services:
image: timberio/vector:0.42.0-distroless-static
command: -C /etc/vector
volumes:
- vector-data:/var/lib/vector
- vector-server-data:/var/lib/vector
- ./vector-server:/etc/vector
networks:
- rivet-network
Expand All @@ -182,4 +203,5 @@ volumes:
redis-data:
clickhouse-data:
seaweedfs-data:
vector-data:
vector-server-data:
foundationdb-data:
20 changes: 20 additions & 0 deletions docker/dev-full/foundationdb/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

function configure_database() {
echo "Configuring database..."
until fdbcli --exec 'configure new single ssd' --timeout 10; do
sleep 2
done
echo "Database configured."
}

# Background process will wait until FoundationDB is up and configure it.
if [ ! -e /var/fdb/fdb.cluster ]; then
configure_database &
else
echo "Database already configured."
fi

# This will automatically populate the file contents with `docker:docker@$PUBLIC_IP:$FDB_PORT`
export FDB_NETWORKING_MODE=container
exec /var/fdb/scripts/fdb.bash "$@"
6 changes: 6 additions & 0 deletions docker/dev-full/rivet-client/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,11 @@ client:
memory: 0
logs:
redirect_logs: true
foundationdb:
username: docker
password: docker
address:
!static
- "foundationdb:4500"
vector:
address: vector-server:6100
13 changes: 7 additions & 6 deletions docker/monolith/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# syntax=docker/dockerfile:1.2

# MARK: Rivet Server Builder
FROM rust:1.82.0-slim AS server-builder
# TODO(RVT-4168): Copmile libfdb from scratch for ARM
FROM --platform=linux/amd64 rust:1.82.0-slim AS server-builder

RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
Expand Down Expand Up @@ -29,11 +30,11 @@ RUN \
mv target/debug/rivet-server /usr/bin/rivet-server

# MARK: Rivet Client Builder
FROM rust:1.82.0-bullseye AS client-builder
# TODO(RVT-4168): Switch back to ARM
FROM --platform=linux/amd64 rust:1.82.0-bullseye AS client-builder

RUN apt-get update && apt-get install --yes protobuf-compiler pkg-config libssl-dev g++ git libpq-dev && \
ln -s /bin/g++ /bin/musl-g++ && \
ln -s /bin/gcc-ar /bin/musl-ar
RUN apt-get update && apt-get install --yes protobuf-compiler pkg-config libssl-dev g++ git libpq-dev curl libclang-dev && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"

WORKDIR /app
COPY . .
Expand All @@ -51,7 +52,7 @@ RUN \
#
# Requires OpenSSL 1.1, so we pin this to Debian 11 (which uses OpenSSL 1.1)
# instead of 12 (which uses OpenSSL 3).
FROM debian:12-slim AS runner
FROM --platform=linux/amd64 debian:12-slim AS runner

ARG COCKROACHDB_VERSION=24.2.3
ARG NATS_VERSION=2.9.2
Expand Down
2 changes: 1 addition & 1 deletion docker/monolith/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ vector top --url http://0.0.0.0:9510/graphql
## Testing

```bash
docker build -f docker/monolith/Dockerfile -t rivet . && docker run --name rivet --rm -v "/tmp/rivet-data:/data" -p 8080:8080 -p 9000:9000 -p 20000-20100:20000-20100 rivet
docker build -f docker/monolith/Dockerfile -t rivet . && docker run --platform linux/amd64 --name rivet --rm -v "/tmp/rivet-data:/data" -p 8080:8080 -p 9000:9000 -p 20000-20100:20000-20100 rivet
```

## Port collisions
Expand Down
6 changes: 5 additions & 1 deletion docker/monolith/build-scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
TARGET_ARCH=$(uname -m | sed 's/aarch64/arm64/' | sed 's/x86_64/amd64/')

# Install required packages
#
# The FDB version should match `cluster::workflows::server::install::install_scripts::components::fdb::FDB_VERSION`
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libclang-dev \
ca-certificates \
openssl \
curl \
Expand All @@ -15,7 +18,8 @@ apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
(curl -L https://github.com/golang-migrate/migrate/releases/download/v4.18.1/migrate.linux-${TARGET_ARCH}.tar.gz | tar xvz) && \
mv migrate /usr/local/bin/migrate && \
curl -fsSL https://deno.land/x/install/install.sh | sh && \
ln -s /root/.deno/bin/deno /usr/local/bin/deno
ln -s /root/.deno/bin/deno /usr/local/bin/deno && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"

# === CockroachDB ===
useradd -m -s /bin/bash cockroachdb && \
Expand Down
2 changes: 2 additions & 0 deletions docker/monolith/rivet-client/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@ client:
memory: 0
logs:
redirect_logs: false
foundationdb:
address: ["foundationdb:4500"]
vector:
address: vector-server:6100
23 changes: 23 additions & 0 deletions docs-internal/infrastructure/fdb/AVX.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# AVX

AVX (Advanced Vector Extensions) is a SIMD instruction on Intel CPUs to make
FoundationDB run parallel data processing tasks faster.

## Even & odd versions

Even versions of FoundationDB are compiled without AVX. Odd versions are compiled with it.

For example, [7.1.61](https://github.com/apple/foundationdb/releases/tag/7.1.61) is the AVX version of [7.1.60](https://github.com/apple/foundationdb/releases/tag/7.1.60).

Make sure to pay attention to if you're using a versino of FDB with AVX.

## When to use AVX

Use AVX on production Linux servers.

## When not to use AVX

Don't use AVX for Docker images, since QEMU can't emulate AVX and will crash ([source](https://github.com/apple/foundationdb/issues/4111#issuecomment-1284040423)). Tihs will prevent ARM users from being able to run FoundationDB.

This might be easy to fix if we build a Docker image built on the prebuilt AMD & ARM binaries on GitHub.

14 changes: 14 additions & 0 deletions docs-internal/infrastructure/fdb/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Troubleshooting

## Cannot connect to cluster

This may be caused by many reasons.

### `fdb.cluster` does not match between client and host

The client and host must be able to address the server with the same IP.

Validate that the client config (e.g. `/var/lib/rivet-client/fdb.cluster` on a Rivet client) matches the file `/var/fdb/fdb.cluster` on the FoundationDB server.

If you're using DNS to resolve the cluster, make sure that the DNS address resolves to the correct location.

12 changes: 12 additions & 0 deletions docs-internal/infrastructure/pegboard/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ Common causes:

Trying to manually find and run the binary usually resolves these issues.

## `fdb ping missed`

The `rivet-client` container in `docker/dev-full/docker-compose` has the `fdbcli` CLI installed.

Check that the cluster can be connected to with:

```bash
fdbcli -C /var/lib/rivet-client/fdb.cluster --exec status
```

For further troupbleshooting, see [FoundationDB troubleshooting](../fdb/TROUBLESHOOTING.md).

## Getting logs of crashed client in Docker

If the container crashes, the logs have to be extracted from the volume.
Expand Down
7 changes: 7 additions & 0 deletions packages/infra/client/actor-kv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ impl ActorKv {
}
}

/// Initializes actor's KV.
///
/// If FDB is down, this will hang indefinitely until connected.
pub async fn init(&mut self) -> Result<()> {
tracing::info!("initializing actor KV");

let root = fdb::directory::DirectoryLayer::default();

let tx = self.db.create_trx()?;
Expand All @@ -69,6 +74,8 @@ impl ActorKv {

self.subspace = Some(kv_dir.subspace(&()).map_err(|err| anyhow!("{err:?}"))?);

tracing::info!("successfully initialized kv");

Ok(())
}

Expand Down
26 changes: 17 additions & 9 deletions packages/infra/client/config/src/manager.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::{
net::{IpAddr, Ipv4Addr, SocketAddr},
net::{IpAddr, Ipv4Addr},
path::{Path, PathBuf},
};

Expand Down Expand Up @@ -54,6 +54,7 @@ pub struct Client {
pub logs: Logs,
#[serde(default)]
pub metrics: Metrics,
pub foundationdb: FoundationDb,
#[serde(default)]
pub vector: Option<Vector>,
}
Expand All @@ -73,14 +74,6 @@ pub struct Cluster {
pub datacenter_id: Uuid,
pub api_endpoint: Url,
pub pegboard_endpoint: Url,
pub foundationdb: FoundationDb,
}

#[derive(Clone, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub enum FoundationDb {
Dynamic { fetch_endpoint: Url },
Addresses(Vec<SocketAddr>),
}

#[derive(Clone, Deserialize)]
Expand Down Expand Up @@ -228,6 +221,21 @@ impl Metrics {
}
}

#[derive(Clone, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct FoundationDb {
pub username: String,
pub password: String,
pub address: FoundationDbAddress,
}

#[derive(Clone, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub enum FoundationDbAddress {
Dynamic { fetch_endpoint: Url },
Static(Vec<String>),
}

#[derive(Clone, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Vector {
Expand Down
6 changes: 4 additions & 2 deletions packages/infra/client/isolate-v8-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ WORKDIR /app
COPY . .

# Installs shared libs
RUN apt-get update && apt-get install -y libclang-dev protobuf-compiler
RUN curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
#
# The FDB version should match `cluster::workflows::server::install::install_scripts::components::fdb::FDB_VERSION`
RUN apt-get update && apt-get install -y libclang-dev protobuf-compiler && \
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"

RUN \
--mount=type=cache,target=/root/.cargo/git \
Expand Down
4 changes: 2 additions & 2 deletions packages/infra/client/isolate-v8-runner/src/isolate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ pub async fn run_inner(
msg_tx: Option<smpsc::SyncSender<log_shipper::ReceivedMessage>>,
actor_config: config::actor::Config,
) -> Result<i32> {
tracing::info!(?actor_id, "Starting isolate");
tracing::info!(?actor_id, "starting isolate");

// Init KV store (create or open)
let mut kv = ActorKv::new(utils::fdb_handle(&config)?, actor_config.owner.clone());
kv.init().await?;

tracing::info!(?actor_id, "Isolate KV initialized");
tracing::info!(?actor_id, "isolate kv initialized");

// Should match the path from `Actor::download_image` in manager/src/actor/setup.rs
let entrypoint = actor_path.join("js-bundle").join("index.js");
Expand Down
Loading

0 comments on commit f6708f3

Please sign in to comment.