Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cluster provision functionality #2452

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions crates/admin/src/cluster_controller/grpc_svc_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

use std::num::NonZeroU16;
use std::time::Duration;

use bytes::{Bytes, BytesMut};
Expand Down Expand Up @@ -324,11 +323,6 @@ impl ClusterCtrlSvc for ClusterCtrlSvcHandler {

self.controller_handle
.update_cluster_configuration(
NonZeroU16::new(
u16::try_from(request.num_partitions)
.map_err(|_| Status::invalid_argument("num_partitions is too big"))?,
)
.ok_or(Status::invalid_argument("num_partitions cannot be zero"))?,
request
.replication_strategy
.ok_or_else(|| {
Expand Down
35 changes: 10 additions & 25 deletions crates/admin/src/cluster_controller/logs_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ use tracing::{debug, error, trace, trace_span, Instrument};
use xxhash_rust::xxh3::Xxh3Builder;

use restate_bifrost::{Bifrost, BifrostAdmin, Error as BifrostError};
use restate_core::metadata_store::{
retry_on_network_error, MetadataStoreClient, Precondition, ReadWriteError, WriteError,
};
use restate_core::metadata_store::{MetadataStoreClient, Precondition, ReadWriteError, WriteError};
use restate_core::{Metadata, MetadataWriter, ShutdownError, TaskCenterFutureExt};
use restate_types::config::Configuration;
use restate_types::errors::GenericError;
use restate_types::identifiers::PartitionId;
use restate_types::live::Pinned;
Expand Down Expand Up @@ -639,9 +636,9 @@ struct LogsControllerInner {
}

impl LogsControllerInner {
fn new(configuration: LogsConfiguration, retry_policy: RetryPolicy) -> Self {
fn new(current_logs: Arc<Logs>, retry_policy: RetryPolicy) -> Self {
Self {
current_logs: Arc::new(Logs::with_logs_configuration(configuration)),
current_logs,
logs_state: HashMap::with_hasher(Xxh3Builder::default()),
logs_write_in_progress: None,
retry_policy,
Expand Down Expand Up @@ -925,26 +922,11 @@ pub struct LogsController {
}

impl LogsController {
pub async fn init(
configuration: &Configuration,
pub fn new(
bifrost: Bifrost,
metadata_store_client: MetadataStoreClient,
metadata_writer: MetadataWriter,
) -> Result<Self> {
// obtain the latest logs or init it with an empty logs variant
let logs = retry_on_network_error(
configuration.common.network_error_retry_policy.clone(),
|| {
metadata_store_client.get_or_insert(BIFROST_CONFIG_KEY.clone(), || {
Logs::from_configuration(configuration)
})
},
)
.await?;

let logs_configuration = logs.configuration().clone();
metadata_writer.update(Arc::new(logs)).await?;

) -> Self {
//todo(azmy): make configurable
let retry_policy = RetryPolicy::exponential(
Duration::from_millis(10),
Expand All @@ -955,7 +937,10 @@ impl LogsController {

let mut this = Self {
effects: Some(Vec::new()),
inner: LogsControllerInner::new(logs_configuration, retry_policy),
inner: LogsControllerInner::new(
Metadata::with_current(|m| m.logs_snapshot()),
retry_policy,
),
bifrost,
metadata_store_client,
metadata_writer,
Expand All @@ -964,7 +949,7 @@ impl LogsController {
};

this.find_logs_tail();
Ok(this)
this
}

pub fn find_logs_tail(&mut self) {
Expand Down
80 changes: 9 additions & 71 deletions crates/admin/src/cluster_controller/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

mod state;

use std::num::NonZeroU16;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
Expand All @@ -37,7 +36,7 @@ use restate_types::partition_table::{
use restate_types::replicated_loglet::ReplicatedLogletParams;

use restate_bifrost::{Bifrost, BifrostAdmin, SealedSegment};
use restate_core::metadata_store::{retry_on_network_error, MetadataStoreClient};
use restate_core::metadata_store::MetadataStoreClient;
use restate_core::network::rpc_router::RpcRouter;
use restate_core::network::tonic_service_filter::{TonicServiceFilter, WaitForReady};
use restate_core::network::{
Expand Down Expand Up @@ -181,7 +180,6 @@ enum ClusterControllerCommand {
response_tx: oneshot::Sender<anyhow::Result<SnapshotId>>,
},
UpdateClusterConfiguration {
num_partitions: NonZeroU16,
replication_strategy: ReplicationStrategy,
default_provider: DefaultProvider,
response_tx: oneshot::Sender<anyhow::Result<()>>,
Expand Down Expand Up @@ -247,7 +245,6 @@ impl ClusterControllerHandle {

pub async fn update_cluster_configuration(
&self,
num_partitions: NonZeroU16,
replication_strategy: ReplicationStrategy,
default_provider: DefaultProvider,
) -> Result<anyhow::Result<()>, ShutdownError> {
Expand All @@ -256,7 +253,6 @@ impl ClusterControllerHandle {
let _ = self
.tx
.send(ClusterControllerCommand::UpdateClusterConfiguration {
num_partitions,
replication_strategy,
default_provider,
response_tx,
Expand Down Expand Up @@ -296,8 +292,6 @@ impl<T: TransportConnect> Service<T> {
}

pub async fn run(mut self) -> anyhow::Result<()> {
self.init_partition_table().await?;

let mut config_watcher = Configuration::watcher();
let mut cluster_state_watcher = self.cluster_state_refresher.cluster_state_watcher();

Expand Down Expand Up @@ -353,34 +347,6 @@ impl<T: TransportConnect> Service<T> {
}
}

/// creates partition table iff it does not exist
async fn init_partition_table(&mut self) -> anyhow::Result<()> {
let configuration = self.configuration.live_load();

let partition_table = retry_on_network_error(
configuration.common.network_error_retry_policy.clone(),
|| {
self.metadata_store_client
.get_or_insert(PARTITION_TABLE_KEY.clone(), || {
let partition_table = PartitionTable::with_equally_sized_partitions(
Version::MIN,
configuration.common.bootstrap_num_partitions.get(),
);

debug!("Initializing the partition table with '{partition_table:?}'");

partition_table
})
},
)
.await?;

self.metadata_writer
.update(Arc::new(partition_table))
.await?;

Ok(())
}
/// Triggers a snapshot creation for the given partition by issuing an RPC
/// to the node hosting the active leader.
async fn create_partition_snapshot(
Expand Down Expand Up @@ -437,23 +403,13 @@ impl<T: TransportConnect> Service<T> {

async fn update_cluster_configuration(
&self,
num_partitions: u16,
replication_strategy: ReplicationStrategy,
default_provider: DefaultProvider,
) -> anyhow::Result<()> {
let logs = self
.metadata_store_client
.read_modify_write(BIFROST_CONFIG_KEY.clone(), |current: Option<Logs>| {
let logs = match current {
Some(logs) => logs,
None => {
let mut builder = Logs::empty().into_builder();
builder.set_configuration(LogsConfiguration {
default_provider: default_provider.clone(),
});
return Ok(builder.build());
}
};
let logs = current.ok_or(ClusterConfigurationUpdateError::MissingLogs)?;

// we can only change the default provider
if logs.version() != Version::INVALID
Expand Down Expand Up @@ -498,25 +454,10 @@ impl<T: TransportConnect> Service<T> {
.read_modify_write(
PARTITION_TABLE_KEY.clone(),
|current: Option<PartitionTable>| {
let partition_table = match current {
Some(partition_table) => partition_table,
None => {
// while not possible because we always initialize a partition table
// we still can just create and return a new one
let mut builder = PartitionTableBuilder::default();
builder.with_equally_sized_partitions(num_partitions)?;
builder.set_replication_strategy(replication_strategy);

return Ok(builder.build());
}
};
let partition_table =
current.ok_or(ClusterConfigurationUpdateError::MissingPartitionTable)?;

let mut builder: PartitionTableBuilder = partition_table.into();
if builder.num_partitions() != 0 && builder.num_partitions() != num_partitions {
return Err(ClusterConfigurationUpdateError::RepartitionNotSupported);
} else if builder.num_partitions() != num_partitions {
builder.with_equally_sized_partitions(num_partitions)?;
}

if builder.replication_strategy() != replication_strategy {
builder.set_replication_strategy(replication_strategy);
Expand Down Expand Up @@ -600,17 +541,12 @@ impl<T: TransportConnect> Service<T> {
.await;
}
ClusterControllerCommand::UpdateClusterConfiguration {
num_partitions,
replication_strategy,
default_provider,
response_tx,
} => {
let result = self
.update_cluster_configuration(
num_partitions.get(),
replication_strategy,
default_provider,
)
.update_cluster_configuration(replication_strategy, default_provider)
.await;
let _ = response_tx.send(result);
}
Expand Down Expand Up @@ -658,12 +594,14 @@ async fn sync_cluster_controller_metadata() -> anyhow::Result<()> {
enum ClusterConfigurationUpdateError {
#[error("Unchanged")]
Unchanged,
#[error("Repartitioning is not supported")]
RepartitionNotSupported,
#[error("Changing default provider kind is not supported")]
ChangingDefaultProviderNotSupported,
#[error(transparent)]
BuildError(#[from] partition_table::BuilderError),
#[error("missing logs; cluster seems to be not provisioned")]
MissingLogs,
#[error("missing partition table; cluster seems to be not provisioned")]
MissingPartitionTable,
}

#[derive(Clone)]
Expand Down
6 changes: 2 additions & 4 deletions crates/admin/src/cluster_controller/service/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,11 @@ where
)
.await?;

let logs_controller = LogsController::init(
&configuration,
let logs_controller = LogsController::new(
service.bifrost.clone(),
service.metadata_store_client.clone(),
service.metadata_writer.clone(),
)
.await?;
);

let (log_trim_interval, log_trim_threshold) =
create_log_trim_interval(&configuration.admin);
Expand Down
8 changes: 4 additions & 4 deletions crates/bifrost/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ impl BifrostService {
pub fn handle(&self) -> Bifrost {
self.bifrost.clone()
}
/// Runs initialization phase, then returns a handle to join on shutdown.
/// In this phase the system should wait until this is completed before
/// continuing. For instance, a worker mark itself as `STARTING_UP` and not
/// accept any requests until this is completed.

/// Runs initialization phase. In this phase the system should wait until this is completed
/// before continuing. For instance, a worker mark itself as `STARTING_UP` and not accept any
/// requests until this is completed.
///
/// This requires to run within a task_center context.
pub async fn start(self) -> anyhow::Result<()> {
Expand Down
1 change: 1 addition & 0 deletions crates/core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.protoc_arg("--experimental_allow_proto3_optional")
.extern_path(".restate.node", "::restate_types::protobuf::node")
.extern_path(".restate.common", "::restate_types::protobuf::common")
.extern_path(".restate.cluster", "::restate_types::protobuf::cluster")
.compile_protos(
&["./protobuf/node_ctl_svc.proto"],
&["protobuf", "../types/protobuf"],
Expand Down
19 changes: 19 additions & 0 deletions crates/core/protobuf/node_ctl_svc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
syntax = "proto3";

import "google/protobuf/empty.proto";
import "restate/cluster.proto";
import "restate/common.proto";
import "restate/node.proto";

Expand All @@ -20,6 +21,24 @@ service NodeCtlSvc {
rpc GetIdent(google.protobuf.Empty) returns (IdentResponse);

rpc GetMetadata(GetMetadataRequest) returns (GetMetadataResponse);

// Provision the Restate cluster on this node.
rpc ProvisionCluster(ProvisionClusterRequest) returns (ProvisionClusterResponse);
}

message ProvisionClusterRequest {
bool dry_run = 1;
// if unset then the configured cluster num partitions will be used
optional uint32 num_partitions = 2;
// if unset then the configured cluster placement strategy will be used
optional restate.cluster.ReplicationStrategy placement_strategy = 3;
// if unset then the configured cluster default log provider will be used
optional restate.cluster.DefaultProvider log_provider = 4;
}

message ProvisionClusterResponse {
bool dry_run = 1;
restate.cluster.ClusterConfiguration cluster_configuration = 2;
}

message IdentResponse {
Expand Down
1 change: 1 addition & 0 deletions crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod metadata_store;
mod metric_definitions;
pub mod network;
pub mod partitions;
pub mod protobuf;
pub mod task_center;
pub mod worker_api;
pub use error::*;
Expand Down
Loading
Loading