Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bifrost] Auto provision logs metadata #2458

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 10 additions & 15 deletions crates/admin/src/cluster_controller/grpc_svc_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ use restate_types::protobuf::cluster::ClusterConfiguration;
use tonic::{async_trait, Request, Response, Status};
use tracing::info;

use restate_bifrost::{Bifrost, BifrostAdmin, Error as BiforstError};
use restate_bifrost::{Bifrost, Error as BiforstError};
use restate_core::{Metadata, MetadataWriter};
use restate_metadata_store::MetadataStoreClient;
use restate_types::identifiers::PartitionId;
use restate_types::logs::metadata::{Logs, SegmentIndex};
use restate_types::logs::{LogId, Lsn, SequenceNumber};
Expand All @@ -44,7 +43,6 @@ use super::service::ChainExtension;
use super::ClusterControllerHandle;

pub(crate) struct ClusterCtrlSvcHandler {
metadata_store_client: MetadataStoreClient,
controller_handle: ClusterControllerHandle,
bifrost: Bifrost,
metadata_writer: MetadataWriter,
Expand All @@ -53,20 +51,19 @@ pub(crate) struct ClusterCtrlSvcHandler {
impl ClusterCtrlSvcHandler {
pub fn new(
controller_handle: ClusterControllerHandle,
metadata_store_client: MetadataStoreClient,
bifrost: Bifrost,
metadata_writer: MetadataWriter,
) -> Self {
Self {
controller_handle,
metadata_store_client,
bifrost,
metadata_writer,
}
}

async fn get_logs(&self) -> Result<Logs, Status> {
self.metadata_store_client
self.metadata_writer
.metadata_store_client()
.get::<Logs>(BIFROST_CONFIG_KEY.clone())
.await
.map_err(|error| Status::unknown(format!("Failed to get log metadata: {error:?}")))?
Expand Down Expand Up @@ -120,7 +117,8 @@ impl ClusterCtrlSvc for ClusterCtrlSvcHandler {

let (trim_point, nodes_config) = tokio::join!(
self.bifrost.get_trim_point(log_id),
self.metadata_store_client
self.metadata_writer
.metadata_store_client()
.get::<NodesConfiguration>(NODES_CONFIG_KEY.clone()),
);

Expand Down Expand Up @@ -151,7 +149,8 @@ impl ClusterCtrlSvc for ClusterCtrlSvcHandler {
_request: Request<ListNodesRequest>,
) -> Result<Response<ListNodesResponse>, Status> {
let nodes_config = self
.metadata_store_client
.metadata_writer
.metadata_store_client()
.get::<NodesConfiguration>(NODES_CONFIG_KEY.clone())
.await
.map_err(|error| {
Expand Down Expand Up @@ -261,13 +260,9 @@ impl ClusterCtrlSvc for ClusterCtrlSvcHandler {
let request = request.into_inner();
let log_id: LogId = request.log_id.into();

let admin = BifrostAdmin::new(
&self.bifrost,
&self.metadata_writer,
&self.metadata_store_client,
);

let writable_loglet = admin
let writable_loglet = self
.bifrost
.admin()
.writeable_loglet(log_id)
.await
.map_err(|err| match err {
Expand Down
104 changes: 39 additions & 65 deletions crates/admin/src/cluster_controller/logs_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,30 @@

mod nodeset_selection;

use futures::never::Never;
use rand::prelude::IteratorRandom;
use rand::thread_rng;
use std::collections::HashMap;
use std::iter;
use std::ops::Deref;
use std::sync::Arc;
use std::time::Duration;

use futures::never::Never;
use rand::prelude::IteratorRandom;
use rand::thread_rng;
use tokio::sync::Semaphore;
use tokio::task::JoinSet;
use tracing::{debug, error, trace, trace_span, Instrument};
use xxhash_rust::xxh3::Xxh3Builder;

use restate_bifrost::{Bifrost, BifrostAdmin, Error as BifrostError};
use restate_core::metadata_store::{
retry_on_network_error, MetadataStoreClient, Precondition, ReadWriteError, WriteError,
};
use restate_bifrost::{Bifrost, Error as BifrostError};
use restate_core::metadata_store::{Precondition, ReadWriteError, WriteError};
use restate_core::{Metadata, MetadataWriter, ShutdownError, TaskCenterFutureExt};
use restate_types::config::Configuration;
use restate_types::errors::GenericError;
use restate_types::identifiers::PartitionId;
use restate_types::live::Pinned;
use restate_types::logs::builder::LogsBuilder;
use restate_types::logs::metadata::{
Chain, DefaultProvider, LogletConfig, LogletParams, Logs, LogsConfiguration,
NodeSetSelectionStrategy, ProviderKind, ReplicatedLogletConfig, SegmentIndex,
Chain, LogletConfig, LogletParams, Logs, LogsConfiguration, NodeSetSelectionStrategy,
ProviderConfiguration, ProviderKind, ReplicatedLogletConfig, SegmentIndex,
};
use restate_types::logs::{LogId, LogletId, Lsn, TailState};
use restate_types::metadata_store::keys::BIFROST_CONFIG_KEY;
Expand Down Expand Up @@ -320,17 +318,17 @@ fn try_provisioning(
node_set_selector_hints: impl NodeSetSelectorHints,
) -> Option<LogletConfiguration> {
match logs_configuration.default_provider {
DefaultProvider::Local => {
ProviderConfiguration::Local => {
let log_id = LogletId::new(log_id, SegmentIndex::OLDEST);
Some(LogletConfiguration::Local(log_id.into()))
}
#[cfg(any(test, feature = "memory-loglet"))]
DefaultProvider::InMemory => {
ProviderConfiguration::InMemory => {
let log_id = LogletId::new(log_id, SegmentIndex::OLDEST);
Some(LogletConfiguration::Memory(log_id.into()))
}
#[cfg(feature = "replicated-loglet")]
DefaultProvider::Replicated(ref config) => build_new_replicated_loglet_configuration(
ProviderConfiguration::Replicated(ref config) => build_new_replicated_loglet_configuration(
config,
LogletId::new(log_id, SegmentIndex::OLDEST),
&Metadata::with_current(|m| m.nodes_config_ref()),
Expand Down Expand Up @@ -436,10 +434,10 @@ impl LogletConfiguration {
) -> bool {
match (self, &logs_configuration.default_provider) {
#[cfg(any(test, feature = "memory-loglet"))]
(Self::Memory(_), DefaultProvider::InMemory) => false,
(Self::Local(_), DefaultProvider::Local) => false,
(Self::Memory(_), ProviderConfiguration::InMemory) => false,
(Self::Local(_), ProviderConfiguration::Local) => false,
#[cfg(feature = "replicated-loglet")]
(Self::Replicated(params), DefaultProvider::Replicated(config)) => {
(Self::Replicated(params), ProviderConfiguration::Replicated(config)) => {
let sequencer_change_required = !observed_cluster_state
.is_node_alive(params.sequencer)
&& !observed_cluster_state.alive_nodes.is_empty();
Expand Down Expand Up @@ -468,9 +466,10 @@ impl LogletConfiguration {

sequencer_change_required || nodeset_improvement_possible
}
_ => {
(x, y) => {
debug!(
"Changing provider type is not supporter at the moment. Ignoring reconfigure"
"Changing provider type from {} to {} is not supporter at the moment. Ignoring reconfigure",
x.as_provider(), y.kind(),
);
false
}
Expand Down Expand Up @@ -501,10 +500,14 @@ impl LogletConfiguration {

match logs_configuration.default_provider {
#[cfg(any(test, feature = "memory-loglet"))]
DefaultProvider::InMemory => Some(LogletConfiguration::Memory(loglet_id.next().into())),
DefaultProvider::Local => Some(LogletConfiguration::Local(loglet_id.next().into())),
ProviderConfiguration::InMemory => {
Some(LogletConfiguration::Memory(loglet_id.next().into()))
}
ProviderConfiguration::Local => {
Some(LogletConfiguration::Local(loglet_id.next().into()))
}
#[cfg(feature = "replicated-loglet")]
DefaultProvider::Replicated(ref config) => {
ProviderConfiguration::Replicated(ref config) => {
let previous_params = match self {
Self::Replicated(previous_params) => Some(previous_params),
_ => None,
Expand Down Expand Up @@ -639,9 +642,9 @@ struct LogsControllerInner {
}

impl LogsControllerInner {
fn new(configuration: LogsConfiguration, retry_policy: RetryPolicy) -> Self {
fn new(current_logs: Arc<Logs>, retry_policy: RetryPolicy) -> Self {
Self {
current_logs: Arc::new(Logs::with_logs_configuration(configuration)),
current_logs,
logs_state: HashMap::with_hasher(Xxh3Builder::default()),
logs_write_in_progress: None,
retry_policy,
Expand Down Expand Up @@ -918,33 +921,13 @@ pub struct LogsController {
effects: Option<Vec<Effect>>,
inner: LogsControllerInner,
bifrost: Bifrost,
metadata_store_client: MetadataStoreClient,
metadata_writer: MetadataWriter,
async_operations: JoinSet<Event>,
find_logs_tail_semaphore: Arc<Semaphore>,
}

impl LogsController {
pub async fn init(
configuration: &Configuration,
bifrost: Bifrost,
metadata_store_client: MetadataStoreClient,
metadata_writer: MetadataWriter,
) -> Result<Self> {
// obtain the latest logs or init it with an empty logs variant
let logs = retry_on_network_error(
configuration.common.network_error_retry_policy.clone(),
|| {
metadata_store_client.get_or_insert(BIFROST_CONFIG_KEY.clone(), || {
Logs::from_configuration(configuration)
})
},
)
.await?;

let logs_configuration = logs.configuration().clone();
metadata_writer.update(Arc::new(logs)).await?;

pub async fn init(bifrost: Bifrost, metadata_writer: MetadataWriter) -> Result<Self> {
//todo(azmy): make configurable
let retry_policy = RetryPolicy::exponential(
Duration::from_millis(10),
Expand All @@ -955,9 +938,11 @@ impl LogsController {

let mut this = Self {
effects: Some(Vec::new()),
inner: LogsControllerInner::new(logs_configuration, retry_policy),
inner: LogsControllerInner::new(
Metadata::with_current(|m| m.logs_snapshot()),
retry_policy,
),
bifrost,
metadata_store_client,
metadata_writer,
async_operations: JoinSet::default(),
find_logs_tail_semaphore: Arc::new(Semaphore::new(1)),
Expand All @@ -976,17 +961,12 @@ impl LogsController {

let logs = Arc::clone(&self.inner.current_logs);
let bifrost = self.bifrost.clone();
let metadata_store_client = self.metadata_store_client.clone();
let metadata_writer = self.metadata_writer.clone();
let find_tail = async move {
let bifrost_admin =
BifrostAdmin::new(&bifrost, &metadata_writer, &metadata_store_client);

let mut updates = LogsTailUpdates::default();
for (log_id, chain) in logs.iter() {
let tail_segment = chain.tail();

let writable_loglet = match bifrost_admin.writeable_loglet(*log_id).await {
let writable_loglet = match bifrost.admin().writeable_loglet(*log_id).await {
Ok(loglet) => loglet,
Err(BifrostError::Shutdown(_)) => break,
Err(err) => {
Expand Down Expand Up @@ -1090,7 +1070,6 @@ impl LogsController {
logs: Arc<Logs>,
mut debounce: Option<RetryIter<'static>>,
) {
let metadata_store_client = self.metadata_store_client.clone();
let metadata_writer = self.metadata_writer.clone();

self.async_operations.spawn(async move {
Expand All @@ -1100,7 +1079,7 @@ impl LogsController {
tokio::time::sleep(delay).await;
}

if let Err(err) = metadata_store_client
if let Err(err) = metadata_writer.metadata_store_client()
.put(
BIFROST_CONFIG_KEY.clone(),
logs.deref(),
Expand All @@ -1112,7 +1091,7 @@ impl LogsController {
WriteError::FailedPrecondition(_) => {
debug!("Detected a concurrent modification of logs. Fetching the latest logs now.");
// There was a concurrent modification of the logs. Fetch the latest version.
match metadata_store_client
match metadata_writer.metadata_store_client()
.get::<Logs>(BIFROST_CONFIG_KEY.clone())
.await
{
Expand Down Expand Up @@ -1158,8 +1137,6 @@ impl LogsController {
mut debounce: Option<RetryIter<'static>>,
) {
let bifrost = self.bifrost.clone();
let metadata_store_client = self.metadata_store_client.clone();
let metadata_writer = self.metadata_writer.clone();

self.async_operations.spawn(
async move {
Expand All @@ -1169,10 +1146,7 @@ impl LogsController {
tokio::time::sleep(delay).await;
}

let bifrost_admin =
BifrostAdmin::new(&bifrost, &metadata_writer, &metadata_store_client);

match bifrost_admin.seal(log_id, segment_index).await {
match bifrost.admin().seal(log_id, segment_index).await {
Ok(sealed_segment) => {
if sealed_segment.tail.is_sealed() {
Event::SealSucceeded {
Expand Down Expand Up @@ -1279,7 +1253,7 @@ pub mod tests {

use enumset::{enum_set, EnumSet};
use restate_types::logs::metadata::{
DefaultProvider, LogsConfiguration, NodeSetSelectionStrategy, ReplicatedLogletConfig,
LogsConfiguration, NodeSetSelectionStrategy, ProviderConfiguration, ReplicatedLogletConfig,
};
use restate_types::logs::LogletId;
use restate_types::nodes_config::{
Expand Down Expand Up @@ -1452,7 +1426,7 @@ pub mod tests {

fn logs_configuration(replication_factor: u8) -> LogsConfiguration {
LogsConfiguration {
default_provider: DefaultProvider::Replicated(ReplicatedLogletConfig {
default_provider: ProviderConfiguration::Replicated(ReplicatedLogletConfig {
replication_property: ReplicationProperty::new(
NonZeroU8::new(replication_factor).expect("must be non zero"),
),
Expand Down Expand Up @@ -1537,7 +1511,7 @@ pub mod tests {
&nodes.observed_state
));

let DefaultProvider::Replicated(ref replicated_loglet_config) =
let ProviderConfiguration::Replicated(ref replicated_loglet_config) =
logs_config.default_provider
else {
unreachable!()
Expand Down Expand Up @@ -1571,7 +1545,7 @@ pub mod tests {

let logs_config = logs_configuration(2);

let DefaultProvider::Replicated(ref replicated_loglet_config) =
let ProviderConfiguration::Replicated(ref replicated_loglet_config) =
logs_config.default_provider
else {
unreachable!()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ use std::cmp::{max, Ordering};
use itertools::Itertools;
use rand::prelude::IteratorRandom;
use rand::Rng;
use restate_types::logs::metadata::NodeSetSelectionStrategy;
use tracing::trace;

use restate_types::logs::metadata::NodeSetSelectionStrategy;
use restate_types::nodes_config::NodesConfiguration;
use restate_types::replicated_loglet::{LocationScope, NodeSet, ReplicationProperty};

Expand Down
Loading
Loading