Skip to content

Commit

Permalink
Add bootstrap change capture consumer (#685)
Browse files Browse the repository at this point in the history
* Add bootstrap change capture consumer
  • Loading branch information
zx0401 authored Oct 25, 2023
1 parent f7291f6 commit e16291a
Show file tree
Hide file tree
Showing 15 changed files with 1,624 additions and 59 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.linkedin.davinci.consumer;

import com.linkedin.venice.pubsub.api.PubSubMessage;
import java.util.Collection;
import java.util.Set;
import java.util.concurrent.CompletableFuture;


/**
* This interface is meant for users where local state must be built off of the entirety of a venice data set
* (i.e. Non-idempotent event ingestion), rather than dealing with an event at a time. THIS IS EXPENSIVE.
* It's highly recommended that users use the {@link VeniceChangelogConsumer} interface as a means to consume Venice
* Change capture data.
*
* Implementations of this interface rely on access to a compacted view to the data and scanning the entirety of that
* compacted view initial calls to poll(). This is the only supported pattern with this interface today. {@link VeniceChangelogConsumer}
* enables finer control. This interface is intentionally limited as implementors rely on local checkpointing and
* maintenance of state which might be easily corrupted with byzantine seek() calls.
* @param <K>
* @param <V>
*/
public interface BootstrappingVeniceChangelogConsumer<K, V> {
/**
* Start performs both a topic subscription and catch up. The client will look at the latest offset in the server and
* sync bootstrap data up to that point in changes. Once that is done for all partitions, the future will complete.
*
* NOTE: This future may take some time to complete depending on how much data needs to be ingested in order to catch
* up with the time that this client started.
*
* @param partitions which partition id's to catch up with
* @return a future that completes once catch up is complete for all passed in partitions.
*/
CompletableFuture<Void> start(Set<Integer> partitions);

CompletableFuture<Void> start();

void stop() throws Exception;

/**
* polls for the next batch of change events. The first records returned following calling 'start()' will be from the bootstrap state.
* Once this state is consumed, subsequent calls to poll will be based off of recent updates to the Venice store.
* @param timeoutInMs
* @return
*/
Collection<PubSubMessage<K, ChangeEvent<V>, VeniceChangeCoordinate>> poll(long timeoutInMs);

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ public class ChangelogClientConfig<T extends SpecificRecord> {
private String controllerD2ServiceName;
private int controllerRequestRetryCount;

private String bootstrapFileSystemPath;

public ChangelogClientConfig(String storeName) {
this.innerClientConfig = new ClientConfig<>(storeName);
}
Expand Down Expand Up @@ -120,6 +122,15 @@ public ClientConfig<T> getInnerClientConfig() {
return this.innerClientConfig;
}

public ChangelogClientConfig<T> setBootstrapFileSystemPath(String bootstrapFileSystemPath) {
this.bootstrapFileSystemPath = bootstrapFileSystemPath;
return this;
}

public String getBootstrapFileSystemPath() {
return this.bootstrapFileSystemPath;
}

public static <V extends SpecificRecord> ChangelogClientConfig<V> cloneConfig(ChangelogClientConfig<V> config) {
ChangelogClientConfig<V> newConfig = new ChangelogClientConfig<V>().setStoreName(config.getStoreName())
.setLocalD2ZkHosts(config.getLocalD2ZkHosts())
Expand All @@ -130,7 +141,8 @@ public static <V extends SpecificRecord> ChangelogClientConfig<V> cloneConfig(Ch
.setD2ControllerClient(config.getD2ControllerClient())
.setControllerD2ServiceName(config.controllerD2ServiceName)
.setD2Client(config.getD2Client())
.setControllerRequestRetryCount(config.getControllerRequestRetryCount());
.setControllerRequestRetryCount(config.getControllerRequestRetryCount())
.setBootstrapFileSystemPath(config.getBootstrapFileSystemPath());
return newConfig;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ public class ImmutableChangeCapturePubSubMessage<K, V> implements PubSubMessage<
private final VeniceChangeCoordinate offset;
private final long timestamp;
private final int payloadSize;
private final boolean isEndOfBootstrap;

public ImmutableChangeCapturePubSubMessage(
K key,
V value,
PubSubTopicPartition topicPartition,
long offset,
long timestamp,
int payloadSize) {
int payloadSize,
boolean isEndOfBootstrap) {
this.key = key;
this.value = value;
this.topicPartition = Objects.requireNonNull(topicPartition);
Expand All @@ -30,6 +32,7 @@ public ImmutableChangeCapturePubSubMessage(
this.topicPartition.getPubSubTopic().getName(),
new ApacheKafkaOffsetPosition(offset),
this.topicPartition.getPartitionNumber());
this.isEndOfBootstrap = isEndOfBootstrap;
}

@Override
Expand Down Expand Up @@ -61,4 +64,9 @@ public long getPubSubMessageTime() {
public int getPayloadSize() {
return payloadSize;
}

@Override
public boolean isEndOfBootstrap() {
return isEndOfBootstrap;
}
}
Loading

0 comments on commit e16291a

Please sign in to comment.