Skip to content

Commit

Permalink
feat: s3 replica and pitr (#17)
Browse files Browse the repository at this point in the history
This commit adds support for syncing to the local database from an S3 backup, and doing PITR on this synced database.
  • Loading branch information
losfair authored Nov 9, 2023
1 parent cc499da commit 7e77d08
Show file tree
Hide file tree
Showing 17 changed files with 2,807 additions and 82 deletions.
1,228 changes: 1,163 additions & 65 deletions Cargo.lock

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = ["denokv", "proto", "remote", "sqlite"]
members = ["denokv", "proto", "remote", "sqlite", "timemachine"]
resolver = "2"

[workspace.package]
Expand All @@ -12,16 +12,27 @@ edition = "2021"
denokv_proto = { version = "0.2.1", path = "./proto" }
denokv_sqlite = { version = "0.2.1", path = "./sqlite" }
denokv_remote = { version = "0.2.2", path = "./remote" }
denokv_timemachine = { version = "0.2.1", path = "./timemachine" }

anyhow = "1"
async-trait = "0.1"
aws-config = "0.55.3"
aws-credential-types = "0.55.3"
aws-sdk-s3 = "0.28.0"
aws-smithy-async = "0.55.3"
aws-smithy-client = "0.55.3"
aws-smithy-types = "0.55.3"
aws-types = "0.55.3"
axum = { version = "0.6", features = ["macros", "http2"] }
bytes = "1"
chrono = { version = "0.4", default-features = false, features = ["std", "serde"] }
clap = { version = "4", features = ["derive", "env"] }
constant_time_eq = "0.3"
env_logger = "0.10.0"
futures = "0.3.28"
hex = "0.4"
hyper = { version = "0.14", features = ["client"] }
hyper-proxy = "0.9.1"
log = "0.4.20"
num-bigint = "0.4"
prost = "0.11"
Expand All @@ -34,5 +45,8 @@ serde_json = "1.0.107"
tempfile = "3"
thiserror = "1"
tokio = { version = "1.33.0", features = ["full"] }
tracing = "0.1"
tracing-futures = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
url = "2"
uuid = { version = "1.4.1", features = ["v4", "serde"] }
37 changes: 35 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ backend.
To run `denokv`, just run:

```sh
docker run -it --init -p 4512:4512 -v ./data:/data ghcr.io/denoland/denokv --sqlite-path /data/denokv.sqlite --access-token <random-token>
docker run -it --init -p 4512:4512 -v ./data:/data ghcr.io/denoland/denokv --sqlite-path /data/denokv.sqlite serve --access-token <random-token>
```

Then run your Deno program and specify the access token in the
Expand Down Expand Up @@ -69,7 +69,7 @@ Then run the `denokv` Docker image, mounting the `/data` directory as a volume
and specifying a random access token.

```sh
docker run -it --init -p 4512:4512 -v ./data:/data ghcr.io/denoland/denokv --sqlite-path /data/denokv.sqlite --access-token --access-token <random-token>
docker run -it --init -p 4512:4512 -v ./data:/data ghcr.io/denoland/denokv --sqlite-path /data/denokv.sqlite serve --access-token <random-token>
```

You can now access the database from your Deno programs by specifying the access
Expand Down Expand Up @@ -155,6 +155,39 @@ const kv = await Deno.openKv("http://localhost:4512");
Make sure to specify your access token in the `DENO_KV_ACCESS_TOKEN` environment
variable.

## Running as a replica of a hosted KV database

`denokv` has a mode for running as a replica of a KV database hosted on Deno Deploy through the S3 backup feature.

To run as a replica:

```sh
docker run -it --init -p 4512:4512 -v ./data:/data \
-e AWS_ACCESS_KEY_ID="<aws-access-key-id>" \
-e AWS_SECRET_ACCESS_KEY="<aws-secret-access-key>" \
-e AWS_REGION="<aws-region>" \
ghcr.io/denoland/denokv --sqlite-path /data/denokv.sqlite serve \
--access-token <random-token> --sync-from-s3 --s3-bucket your-bucket --s3-prefix some-prefix/6aea9765-2b1e-41c7-8904-0bdcd70b21d3/
```

To sync the local database from S3, without updating the snapshot:

```sh
denokv --sqlite-path /data/denokv.sqlite pitr sync --s3-bucket your-bucket --s3-prefix some-prefix/6aea9765-2b1e-41c7-8904-0bdcd70b21d3/
```

To list recoverable points:

```sh
denokv --sqlite-path /data/denokv.sqlite pitr list
```

To checkout the snapshot at a specific recoverable point:

```sh
denokv --sqlite-path /data/denokv.sqlite pitr checkout 0100000002c0f4c10000
```

<!-- TBD: ### Node.js -->

## Advanced setup
Expand Down
11 changes: 11 additions & 0 deletions denokv/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,27 @@ name = "denokv"

[dependencies]
anyhow.workspace = true
aws-config.workspace = true
aws-sdk-s3.workspace = true
aws-smithy-async.workspace = true
aws-smithy-client.workspace = true
aws-smithy-types.workspace = true
axum.workspace = true
chrono.workspace = true
clap.workspace = true
constant_time_eq.workspace = true
denokv_proto.workspace = true
denokv_sqlite.workspace = true
denokv_timemachine.workspace = true
env_logger.workspace = true
futures.workspace = true
hex.workspace = true
hyper.workspace = true
hyper-proxy.workspace = true
log.workspace = true
prost.workspace = true
rand.workspace = true
rusqlite.workspace = true
serde.workspace = true
thiserror.workspace = true
tokio.workspace = true
Expand Down
93 changes: 89 additions & 4 deletions denokv/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,100 @@ use clap::Parser;

#[derive(Parser)]
pub struct Config {
/// The access token used by the CLI to connect to this KV instance.
#[clap(long, env = "DENO_KV_ACCESS_TOKEN")]
pub access_token: String,

/// The path to the SQLite database KV will persist to.
#[clap(long, env = "DENO_KV_SQLITE_PATH")]
pub sqlite_path: String,

#[command(subcommand)]
pub subcommand: SubCmd,
}

#[derive(Parser)]
pub enum SubCmd {
/// Starts the Deno KV HTTP server.
Serve(ServeOptions),

/// Point-in-time recovery tools.
Pitr(PitrOptions),
}

#[derive(Parser)]
pub struct ServeOptions {
/// The access token used by the CLI to connect to this KV instance.
#[clap(long, env = "DENO_KV_ACCESS_TOKEN")]
pub access_token: String,

/// The address to bind the Deno KV HTTP endpoint to.
#[clap(long = "addr", default_value = "0.0.0.0:4512")]
pub addr: SocketAddr,

/// Open in read-only mode.
#[clap(long)]
pub read_only: bool,

/// Sync changes from S3 continuously.
#[clap(long, conflicts_with = "read_only")]
pub sync_from_s3: bool,

#[command(flatten)]
pub replica: ReplicaOptions,
}

#[derive(Parser)]
pub struct ReplicaOptions {
/// The name of the S3 bucket to sync changes from.
#[clap(long, env = "DENO_KV_S3_BUCKET")]
pub s3_bucket: Option<String>,

/// Prefix in the S3 bucket.
#[clap(long, env = "DENO_KV_S3_PREFIX")]
pub s3_prefix: Option<String>,

/// S3 endpoint URL like `https://storage.googleapis.com`.
/// Only needed for S3-compatible services other than Amazon S3.
#[clap(long, env = "DENO_KV_S3_ENDPOINT")]
pub s3_endpoint: Option<String>,
}

#[derive(Parser)]
pub struct PitrOptions {
#[command(subcommand)]
pub subcommand: PitrSubCmd,
}

#[derive(Parser)]
pub enum PitrSubCmd {
/// Sync changes from S3 to the SQLite database, without updating the current snapshot.
Sync(SyncOptions),

/// List available recovery points.
List(PitrListOptions),

/// Show information about the current snapshot.
Info,

/// Checkout the snapshot at a specific versionstamp.
Checkout(PitrCheckoutOptions),
}

#[derive(Parser)]
pub struct SyncOptions {
#[command(flatten)]
pub replica: ReplicaOptions,
}

#[derive(Parser)]
pub struct PitrListOptions {
/// Start time in RFC3339 format, e.g. 2021-01-01T00:00:00Z
#[clap(long = "start")]
pub start: Option<String>,

/// End time in RFC3339 format, e.g. 2021-01-01T00:00:00Z
#[clap(long = "end")]
pub end: Option<String>,
}

#[derive(Parser)]
pub struct PitrCheckoutOptions {
pub versionstamp: String,
}
Loading

0 comments on commit 7e77d08

Please sign in to comment.