diff --git a/.wordlist.txt b/.wordlist.txt
index cb44c07c9..18df8593c 100644
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -121,6 +121,7 @@ SSD
SSDs
SSL
SSO
+Sample
SearchRequest
ShardIteratorType
Signifier
@@ -336,6 +337,7 @@ repo
rowset
rowsets
runtime
+sample
sasl
scalability
scalable
diff --git a/docs/examples/api-reference/sources/source.py b/docs/examples/api-reference/sources/source.py
index 837b41285..79949e223 100644
--- a/docs/examples/api-reference/sources/source.py
+++ b/docs/examples/api-reference/sources/source.py
@@ -18,6 +18,7 @@ def test_source_decorator(client):
import pandas as pd
from fennel.connectors import source, S3, ref, eval
from fennel.datasets import dataset, field
+ from fennel.connectors.connectors import Sample
s3 = S3(name="my_s3") # using IAM role based access
@@ -41,6 +42,7 @@ def test_source_decorator(client):
), # converting age dtype to int
},
env="prod",
+ sample=Sample(0.2, using=["email"]),
bounded=True,
idleness="1h",
)
diff --git a/docs/pages/api-reference/decorators/source.md b/docs/pages/api-reference/decorators/source.md
index 3953f61d7..f42da5616 100644
--- a/docs/pages/api-reference/decorators/source.md
+++ b/docs/pages/api-reference/decorators/source.md
@@ -49,6 +49,21 @@ can ever arrive. And if such rows do arrive, Fennel has the liberty of discardin
them and not including them in the computation.
+
+When specifying sampling for a dataset, it can be provided in two ways:
+1. **Simply specify the sampling rate** when you want to sample the dataset without specifying the columns used for sampling.
+ - **Sampling Rate**: A float between 0 and 1 that determines the proportion of the dataset to include.
+2. **Use the `Sample` object** when you want to specify both the sampling rate and the specific columns used for sampling.
+ - **Sampling Rate**: A float between 0 and 1 that determines the proportion of the dataset to include.
+ - **Using**: A list of columns used to hash for sampling the data. Preproc columns and the timestamp field cannot be included in this list.
+
+Default Behavior When No Columns Are Specified
+1. For Keyed Datasets:
+All key columns are used for sampling, excluding any preproc columns.
+2. For Non-Keyed Datasets:
+All columns are used for sampling except for the timestamp and preproc columns.
+
+
Specifies how should valid change data be constructed from the ingested data.
diff --git a/fennel/connectors/connectors.py b/fennel/connectors/connectors.py
index d20b0cf16..a2781f1a1 100644
--- a/fennel/connectors/connectors.py
+++ b/fennel/connectors/connectors.py
@@ -94,6 +94,21 @@ def preproc_has_indirection(preproc: Optional[Dict[str, PreProcValue]]):
return False
+class Sample:
+ rate: float
+ using: List[str]
+
+ def __init__(self, rate, using):
+ if rate < 0 or rate > 1:
+ raise ValueError("Sample rate should be between 0 and 1")
+ if using is None or len(using) == 0:
+ raise ValueError(
+ f"Using must be a non-empty list, try using sample={rate} instead"
+ )
+ self.rate = rate
+ self.using = using
+
+
def source(
conn: DataConnector,
disorder: Duration,
@@ -106,6 +121,7 @@ def source(
bounded: bool = False,
idleness: Optional[Duration] = None,
where: Optional[Callable] = None,
+ sample: Optional[Union[float, Sample]] = None,
) -> Callable[[T], Any]:
"""
Decorator to specify the source of data for a dataset. The source can be
@@ -184,6 +200,24 @@ def source(
"idleness parameter should not be passed when bounded is set as False"
)
+ if sample is not None:
+ if isinstance(sample, float):
+ if sample < 0 or sample > 1:
+ raise ValueError("Sample rate should be between 0 and 1")
+ elif isinstance(sample, Sample):
+ disallowed_columns = []
+ if preproc is not None:
+ disallowed_columns.extend(list(preproc.keys()))
+ for column in sample.using:
+ if column in disallowed_columns:
+ raise ValueError(
+ f"Column {column} is part of preproc so cannot be used for sampling"
+ )
+ else:
+ raise ValueError(
+ "Sample should be either a float or a Sample object"
+ )
+
def decorator(dataset_cls: T):
connector = copy.deepcopy(conn)
connector.every = every if every is not None else DEFAULT_EVERY
@@ -196,6 +230,7 @@ def decorator(dataset_cls: T):
connector.bounded = bounded
connector.idleness = idleness
connector.where = where
+ connector.sample = sample
connectors = getattr(dataset_cls, SOURCE_FIELD, [])
connectors.append(connector)
setattr(dataset_cls, SOURCE_FIELD, connectors)
@@ -711,6 +746,7 @@ class DataConnector:
bounded: bool = False
idleness: Optional[Duration] = None
where: Optional[Callable] = None
+ sample: Optional[Union[float, Sample]] = None
how: Optional[Literal["incremental", "recreate"] | SnapshotData] = None
create: Optional[bool] = None
renames: Optional[Dict[str, str]] = {}
diff --git a/fennel/connectors/test_connectors.py b/fennel/connectors/test_connectors.py
index 9ac07900e..79d9a7673 100644
--- a/fennel/connectors/test_connectors.py
+++ b/fennel/connectors/test_connectors.py
@@ -25,7 +25,7 @@
PubSub,
eval,
)
-from fennel.connectors.connectors import CSV, Postgres
+from fennel.connectors.connectors import CSV, Postgres, Sample
from fennel.datasets import dataset, field, pipeline, Dataset
from fennel.expr import col, lit
from fennel.integrations.aws import Secret
@@ -1004,6 +1004,406 @@ def create_user_transactions(cls, dataset: Dataset):
)
+def test_source_with_sample():
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.1, ["user_id", "name"]),
+ since=datetime.strptime("2021-08-10T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ source_request = sync_request.sources[0]
+ s = {
+ "table": {
+ "mysql_table": {
+ "db": {
+ "name": "mysql",
+ "mysql": {
+ "host": "localhost",
+ "database": "test",
+ "user": "root",
+ "password": "root",
+ "port": 3306,
+ },
+ },
+ "table_name": "users_mysql",
+ },
+ },
+ "dataset": "UserInfoDatasetMySql",
+ "dsVersion": 1,
+ "every": "3600s",
+ "cdc": "Upsert",
+ "disorder": "1209600s",
+ "samplingStrategy": {
+ "columnsUsed": ["name", "user_id"],
+ "samplingRate": 0.1,
+ },
+ "cursor": "added_on",
+ "timestampField": "timestamp",
+ "startingFrom": "2021-08-10T00:00:00Z",
+ }
+ expected_source_request = ParseDict(s, connector_proto.Source())
+ assert source_request == expected_source_request, error_message(
+ source_request, expected_source_request
+ )
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=0.1,
+ since=datetime.strptime("2021-08-10T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ source_request = sync_request.sources[0]
+ s = {
+ "table": {
+ "mysql_table": {
+ "db": {
+ "name": "mysql",
+ "mysql": {
+ "host": "localhost",
+ "database": "test",
+ "user": "root",
+ "password": "root",
+ "port": 3306,
+ },
+ },
+ "table_name": "users_mysql",
+ },
+ },
+ "dataset": "UserInfoDatasetMySql",
+ "dsVersion": 1,
+ "every": "3600s",
+ "cdc": "Upsert",
+ "disorder": "1209600s",
+ "samplingStrategy": {
+ "samplingRate": 0.1,
+ "columnsUsed": ["user_id"],
+ },
+ "cursor": "added_on",
+ "timestampField": "timestamp",
+ "startingFrom": "2021-08-10T00:00:00Z",
+ }
+ expected_source_request = ParseDict(s, connector_proto.Source())
+ assert source_request == expected_source_request, error_message(
+ source_request, expected_source_request
+ )
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="append",
+ sample=0.1,
+ since=datetime.strptime("2021-08-10T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ source_request = sync_request.sources[0]
+ s = {
+ "table": {
+ "mysql_table": {
+ "db": {
+ "name": "mysql",
+ "mysql": {
+ "host": "localhost",
+ "database": "test",
+ "user": "root",
+ "password": "root",
+ "port": 3306,
+ },
+ },
+ "table_name": "users_mysql",
+ },
+ },
+ "dataset": "UserInfoDatasetMySql",
+ "dsVersion": 1,
+ "every": "3600s",
+ "cdc": "Append",
+ "disorder": "1209600s",
+ "samplingStrategy": {
+ "samplingRate": 0.1,
+ "columnsUsed": ["country", "name", "user_id"],
+ },
+ "cursor": "added_on",
+ "timestampField": "timestamp",
+ "startingFrom": "2021-08-10T00:00:00Z",
+ }
+ expected_source_request = ParseDict(s, connector_proto.Source())
+ assert source_request == expected_source_request, error_message(
+ source_request, expected_source_request
+ )
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.1, ["user_id", "age"]),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ try:
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert str(e) == "Column age is not part of dataset columns"
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.1, ["user_id", "timestamp"]),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ try:
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "Timestamp column: timestamp cannot be part of sampling columns"
+ )
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ preproc={"name": "abc"},
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.1, ["user_id", "name"]),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "Column name is part of preproc so cannot be used for sampling"
+ )
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ preproc={"user_id": 1},
+ disorder="14d",
+ cdc="upsert",
+ sample=0.1,
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "No columns left to sample from. all key columns are part of preproc"
+ )
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ preproc={"user_id": 1},
+ disorder="14d",
+ cdc="append",
+ sample=0.1,
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "No columns left to sample from. all columns are part of preproc"
+ )
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(1.1, ["user_id", "name"]),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert str(e) == "Sample rate should be between 0 and 1"
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=1.1,
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert str(e) == "Sample rate should be between 0 and 1"
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.5, None),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "Using must be a non-empty list, try using sample=0.5 instead"
+ )
+
+ try:
+
+ @meta(owner="test@test.com")
+ @source(
+ mysql.table("users_mysql", cursor="added_on"),
+ every="1h",
+ disorder="14d",
+ cdc="upsert",
+ sample=Sample(0.5, []),
+ )
+ @dataset
+ class UserInfoDatasetMySql:
+ user_id: int = field(key=True)
+ name: str
+ country: Optional[str]
+ timestamp: datetime = field(timestamp=True)
+
+ # mysql source
+ view = InternalTestClient()
+ view.add(UserInfoDatasetMySql)
+ sync_request = view._get_sync_request_proto()
+ except ValueError as e:
+ assert (
+ str(e)
+ == "Using must be a non-empty list, try using sample=0.5 instead"
+ )
+
+
def test_multiple_sources_mysql():
@meta(owner="test@test.com")
@source(
diff --git a/fennel/gen/connector_pb2.py b/fennel/gen/connector_pb2.py
index 52ca69dd1..5fb047ba6 100644
--- a/fennel/gen/connector_pb2.py
+++ b/fennel/gen/connector_pb2.py
@@ -22,7 +22,7 @@
import fennel.gen.secret_pb2 as secret__pb2
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x63onnector.proto\x12\x16\x66\x65nnel.proto.connector\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x10\x65xpression.proto\x1a\rkinesis.proto\x1a\x0cpycode.proto\x1a\x15schema_registry.proto\x1a\x0cschema.proto\x1a\x0csecret.proto\"\x8c\x05\n\x0b\x45xtDatabase\x12\x0c\n\x04name\x18\x01 \x01(\t\x12.\n\x05mysql\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.connector.MySQLH\x00\x12\x34\n\x08postgres\x18\x03 \x01(\x0b\x32 .fennel.proto.connector.PostgresH\x00\x12\x36\n\treference\x18\x04 \x01(\x0b\x32!.fennel.proto.connector.ReferenceH\x00\x12(\n\x02s3\x18\x05 \x01(\x0b\x32\x1a.fennel.proto.connector.S3H\x00\x12\x34\n\x08\x62igquery\x18\x06 \x01(\x0b\x32 .fennel.proto.connector.BigqueryH\x00\x12\x36\n\tsnowflake\x18\x07 \x01(\x0b\x32!.fennel.proto.connector.SnowflakeH\x00\x12.\n\x05kafka\x18\x08 \x01(\x0b\x32\x1d.fennel.proto.connector.KafkaH\x00\x12\x32\n\x07webhook\x18\t \x01(\x0b\x32\x1f.fennel.proto.connector.WebhookH\x00\x12\x32\n\x07kinesis\x18\n \x01(\x0b\x32\x1f.fennel.proto.connector.KinesisH\x00\x12\x34\n\x08redshift\x18\x0b \x01(\x0b\x32 .fennel.proto.connector.RedshiftH\x00\x12.\n\x05mongo\x18\x0c \x01(\x0b\x32\x1d.fennel.proto.connector.MongoH\x00\x12\x30\n\x06pubsub\x18\r \x01(\x0b\x32\x1e.fennel.proto.connector.PubSubH\x00\x42\t\n\x07variant\"M\n\x0cPubSubFormat\x12\x32\n\x04json\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.JsonFormatH\x00\x42\t\n\x07variant\"\xbc\x01\n\x0bKafkaFormat\x12\x32\n\x04json\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.JsonFormatH\x00\x12\x32\n\x04\x61vro\x18\x02 \x01(\x0b\x32\".fennel.proto.connector.AvroFormatH\x00\x12:\n\x08protobuf\x18\x03 \x01(\x0b\x32&.fennel.proto.connector.ProtobufFormatH\x00\x42\t\n\x07variant\"\x0c\n\nJsonFormat\"S\n\nAvroFormat\x12\x45\n\x0fschema_registry\x18\x01 \x01(\x0b\x32,.fennel.proto.schema_registry.SchemaRegistry\"W\n\x0eProtobufFormat\x12\x45\n\x0fschema_registry\x18\x01 \x01(\x0b\x32,.fennel.proto.schema_registry.SchemaRegistry\"\xde\x01\n\tReference\x12;\n\x06\x64\x62type\x18\x01 \x01(\x0e\x32+.fennel.proto.connector.Reference.ExtDBType\"\x93\x01\n\tExtDBType\x12\t\n\x05MYSQL\x10\x00\x12\x0c\n\x08POSTGRES\x10\x01\x12\x06\n\x02S3\x10\x02\x12\t\n\x05KAFKA\x10\x03\x12\x0c\n\x08\x42IGQUERY\x10\x04\x12\r\n\tSNOWFLAKE\x10\x05\x12\x0b\n\x07WEBHOOK\x10\x06\x12\x0b\n\x07KINESIS\x10\x07\x12\x0c\n\x08REDSHIFT\x10\x08\x12\t\n\x05MONGO\x10\t\x12\n\n\x06PUBSUB\x10\n\"E\n\x07Webhook\x12\x0c\n\x04name\x18\x01 \x01(\t\x12,\n\tretention\x18\x02 \x01(\x0b\x32\x19.google.protobuf.Duration\"\x8c\x02\n\x05MySQL\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x07 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x05 \x01(\r\x12\x13\n\x0bjdbc_params\x18\x06 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\x8f\x02\n\x08Postgres\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x07 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x05 \x01(\r\x12\x13\n\x0bjdbc_params\x18\x06 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xb0\x02\n\x02S3\x12\x1f\n\x15\x61ws_secret_access_key\x18\x01 \x01(\tH\x00\x12\x46\n\x1c\x61ws_secret_access_key_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x1b\n\x11\x61ws_access_key_id\x18\x02 \x01(\tH\x01\x12\x42\n\x18\x61ws_access_key_id_secret\x18\x05 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x15\n\x08role_arn\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x1f\n\x1d\x61ws_secret_access_key_variantB\x1b\n\x19\x61ws_access_key_id_variantB\x0b\n\t_role_arn\"\xb6\x01\n\x08\x42igquery\x12\x1d\n\x13service_account_key\x18\x02 \x01(\tH\x00\x12\x44\n\x1aservice_account_key_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\ndataset_id\x18\x01 \x01(\t\x12\x12\n\nproject_id\x18\x03 \x01(\tB\x1d\n\x1bservice_account_key_variant\"\xa1\x02\n\tSnowflake\x12\x0e\n\x04user\x18\x02 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x03 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\t \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0f\n\x07\x61\x63\x63ount\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x04 \x01(\t\x12\x11\n\twarehouse\x18\x05 \x01(\t\x12\x0c\n\x04role\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xf9\x02\n\x05Kafka\x12\x1d\n\x13sasl_plain_username\x18\x05 \x01(\tH\x00\x12>\n\x14sasl_username_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x1d\n\x13sasl_plain_password\x18\x06 \x01(\tH\x01\x12>\n\x14sasl_password_secret\x18\t \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x19\n\x11\x62ootstrap_servers\x18\x01 \x01(\t\x12\x19\n\x11security_protocol\x18\x02 \x01(\t\x12\x16\n\x0esasl_mechanism\x18\x03 \x01(\t\x12\x1c\n\x10sasl_jaas_config\x18\x04 \x01(\tB\x02\x18\x01\x12\x14\n\x08group_id\x18\x07 \x01(\tB\x02\x18\x01\x42\x17\n\x15sasl_username_variantB\x17\n\x15sasl_password_variant\"\x1b\n\x07Kinesis\x12\x10\n\x08role_arn\x18\x01 \x01(\t\"\xd3\x01\n\x0b\x43redentials\x12\x12\n\x08username\x18\x01 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x03 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x02 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x42\x12\n\x10username_variantB\x12\n\x10password_variant\"}\n\x16RedshiftAuthentication\x12\x1c\n\x12s3_access_role_arn\x18\x01 \x01(\tH\x00\x12:\n\x0b\x63redentials\x18\x02 \x01(\x0b\x32#.fennel.proto.connector.CredentialsH\x00\x42\t\n\x07variant\"\x99\x01\n\x08Redshift\x12\x10\n\x08\x64\x61tabase\x18\x01 \x01(\t\x12\x0c\n\x04host\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x03 \x01(\r\x12\x0e\n\x06schema\x18\x04 \x01(\t\x12O\n\x17redshift_authentication\x18\x05 \x01(\x0b\x32..fennel.proto.connector.RedshiftAuthentication\"\xe9\x01\n\x05Mongo\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x05 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x06 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xa0\x01\n\x06PubSub\x12\x1d\n\x13service_account_key\x18\x02 \x01(\tH\x00\x12\x44\n\x1aservice_account_key_secret\x18\x03 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\nproject_id\x18\x01 \x01(\tB\x1d\n\x1bservice_account_key_variant\"\xc0\x05\n\x08\x45xtTable\x12\x39\n\x0bmysql_table\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.MySQLTableH\x00\x12\x39\n\x08pg_table\x18\x02 \x01(\x0b\x32%.fennel.proto.connector.PostgresTableH\x00\x12\x33\n\x08s3_table\x18\x03 \x01(\x0b\x32\x1f.fennel.proto.connector.S3TableH\x00\x12\x39\n\x0bkafka_topic\x18\x04 \x01(\x0b\x32\".fennel.proto.connector.KafkaTopicH\x00\x12\x41\n\x0fsnowflake_table\x18\x05 \x01(\x0b\x32&.fennel.proto.connector.SnowflakeTableH\x00\x12?\n\x0e\x62igquery_table\x18\x06 \x01(\x0b\x32%.fennel.proto.connector.BigqueryTableH\x00\x12;\n\x08\x65ndpoint\x18\x07 \x01(\x0b\x32\'.fennel.proto.connector.WebhookEndpointH\x00\x12?\n\x0ekinesis_stream\x18\x08 \x01(\x0b\x32%.fennel.proto.connector.KinesisStreamH\x00\x12?\n\x0eredshift_table\x18\t \x01(\x0b\x32%.fennel.proto.connector.RedshiftTableH\x00\x12\x43\n\x10mongo_collection\x18\n \x01(\x0b\x32\'.fennel.proto.connector.MongoCollectionH\x00\x12;\n\x0cpubsub_topic\x18\x0b \x01(\x0b\x32#.fennel.proto.connector.PubSubTopicH\x00\x42\t\n\x07variant\"Q\n\nMySQLTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"z\n\rPostgresTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\x12\x16\n\tslot_name\x18\x03 \x01(\tH\x00\x88\x01\x01\x42\x0c\n\n_slot_name\"\xf7\x01\n\x07S3Table\x12\x0e\n\x06\x62ucket\x18\x01 \x01(\t\x12\x13\n\x0bpath_prefix\x18\x02 \x01(\t\x12\x11\n\tdelimiter\x18\x04 \x01(\t\x12\x0e\n\x06\x66ormat\x18\x05 \x01(\t\x12/\n\x02\x64\x62\x18\x06 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\npre_sorted\x18\x07 \x01(\x08\x12\x13\n\x0bpath_suffix\x18\x08 \x01(\t\x12.\n\x06spread\x18\x03 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x0f\n\x07headers\x18\t \x03(\tB\t\n\x07_spread\"\x81\x01\n\nKafkaTopic\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\r\n\x05topic\x18\x02 \x01(\t\x12\x33\n\x06\x66ormat\x18\x03 \x01(\x0b\x32#.fennel.proto.connector.KafkaFormat\"T\n\rBigqueryTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"U\n\x0eSnowflakeTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"\x81\x01\n\x0fWebhookEndpoint\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x10\n\x08\x65ndpoint\x18\x02 \x01(\t\x12+\n\x08\x64uration\x18\x03 \x01(\x0b\x32\x19.google.protobuf.Duration\"\xd3\x01\n\rKinesisStream\x12\x12\n\nstream_arn\x18\x01 \x01(\t\x12\x39\n\rinit_position\x18\x02 \x01(\x0e\x32\".fennel.proto.kinesis.InitPosition\x12\x32\n\x0einit_timestamp\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12/\n\x02\x64\x62\x18\x05 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\"T\n\rRedshiftTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"\x86\x01\n\x0bPubSubTopic\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x10\n\x08topic_id\x18\x02 \x01(\t\x12\x34\n\x06\x66ormat\x18\x03 \x01(\x0b\x32$.fennel.proto.connector.PubSubFormat\"\xb1\x02\n\x0cPreProcValue\x12\r\n\x03ref\x18\x01 \x01(\tH\x00\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.schema.ValueH\x00\x12\x39\n\x04\x65val\x18\x03 \x01(\x0b\x32).fennel.proto.connector.PreProcValue.EvalH\x00\x1a\x9e\x01\n\x04\x45val\x12+\n\x06schema\x18\x01 \x01(\x0b\x32\x1b.fennel.proto.schema.Schema\x12-\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expression.ExprH\x00\x12-\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCodeH\x00\x42\x0b\n\teval_typeB\t\n\x07variant\"[\n\x0fMongoCollection\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x17\n\x0f\x63ollection_name\x18\x02 \x01(\t\"2\n\x0cSnapshotData\x12\x0e\n\x06marker\x18\x01 \x01(\t\x12\x12\n\nnum_retain\x18\x02 \x01(\r\"\r\n\x0bIncremental\"\n\n\x08Recreate\"\xbc\x01\n\x05Style\x12:\n\x0bincremental\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.IncrementalH\x00\x12\x34\n\x08recreate\x18\x02 \x01(\x0b\x32 .fennel.proto.connector.RecreateH\x00\x12\x38\n\x08snapshot\x18\x03 \x01(\x0b\x32$.fennel.proto.connector.SnapshotDataH\x00\x42\x07\n\x05Style\"\xb1\x05\n\x06Source\x12/\n\x05table\x18\x01 \x01(\x0b\x32 .fennel.proto.connector.ExtTable\x12\x0f\n\x07\x64\x61taset\x18\x02 \x01(\t\x12\x12\n\nds_version\x18\x03 \x01(\r\x12(\n\x05\x65very\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\x13\n\x06\x63ursor\x18\x05 \x01(\tH\x00\x88\x01\x01\x12+\n\x08\x64isorder\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\x17\n\x0ftimestamp_field\x18\x07 \x01(\t\x12\x30\n\x03\x63\x64\x63\x18\x08 \x01(\x0e\x32#.fennel.proto.connector.CDCStrategy\x12\x31\n\rstarting_from\x18\t \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12=\n\x08pre_proc\x18\n \x03(\x0b\x32+.fennel.proto.connector.Source.PreProcEntry\x12\x0f\n\x07version\x18\x0b \x01(\r\x12\x0f\n\x07\x62ounded\x18\x0c \x01(\x08\x12\x30\n\x08idleness\x18\r \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12)\n\x05until\x18\x0e \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x30\n\x06\x66ilter\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCodeH\x02\x88\x01\x01\x1aT\n\x0cPreProcEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.fennel.proto.connector.PreProcValue:\x02\x38\x01\x42\t\n\x07_cursorB\x0b\n\t_idlenessB\t\n\x07_filter\"\xee\x03\n\x04Sink\x12/\n\x05table\x18\x01 \x01(\x0b\x32 .fennel.proto.connector.ExtTable\x12\x0f\n\x07\x64\x61taset\x18\x02 \x01(\t\x12\x12\n\nds_version\x18\x03 \x01(\r\x12\x35\n\x03\x63\x64\x63\x18\x04 \x01(\x0e\x32#.fennel.proto.connector.CDCStrategyH\x00\x88\x01\x01\x12(\n\x05\x65very\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12/\n\x03how\x18\x06 \x01(\x0b\x32\x1d.fennel.proto.connector.StyleH\x01\x88\x01\x01\x12\x0e\n\x06\x63reate\x18\x07 \x01(\x08\x12:\n\x07renames\x18\x08 \x03(\x0b\x32).fennel.proto.connector.Sink.RenamesEntry\x12.\n\x05since\x18\t \x01(\x0b\x32\x1a.google.protobuf.TimestampH\x02\x88\x01\x01\x12.\n\x05until\x18\n \x01(\x0b\x32\x1a.google.protobuf.TimestampH\x03\x88\x01\x01\x1a.\n\x0cRenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x06\n\x04_cdcB\x06\n\x04_howB\x08\n\x06_sinceB\x08\n\x06_until*K\n\x0b\x43\x44\x43Strategy\x12\n\n\x06\x41ppend\x10\x00\x12\n\n\x06Upsert\x10\x01\x12\x0c\n\x08\x44\x65\x62\x65zium\x10\x02\x12\n\n\x06Native\x10\x03\x12\n\n\x06\x44\x65lete\x10\x04\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x63onnector.proto\x12\x16\x66\x65nnel.proto.connector\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x10\x65xpression.proto\x1a\rkinesis.proto\x1a\x0cpycode.proto\x1a\x15schema_registry.proto\x1a\x0cschema.proto\x1a\x0csecret.proto\"\x8c\x05\n\x0b\x45xtDatabase\x12\x0c\n\x04name\x18\x01 \x01(\t\x12.\n\x05mysql\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.connector.MySQLH\x00\x12\x34\n\x08postgres\x18\x03 \x01(\x0b\x32 .fennel.proto.connector.PostgresH\x00\x12\x36\n\treference\x18\x04 \x01(\x0b\x32!.fennel.proto.connector.ReferenceH\x00\x12(\n\x02s3\x18\x05 \x01(\x0b\x32\x1a.fennel.proto.connector.S3H\x00\x12\x34\n\x08\x62igquery\x18\x06 \x01(\x0b\x32 .fennel.proto.connector.BigqueryH\x00\x12\x36\n\tsnowflake\x18\x07 \x01(\x0b\x32!.fennel.proto.connector.SnowflakeH\x00\x12.\n\x05kafka\x18\x08 \x01(\x0b\x32\x1d.fennel.proto.connector.KafkaH\x00\x12\x32\n\x07webhook\x18\t \x01(\x0b\x32\x1f.fennel.proto.connector.WebhookH\x00\x12\x32\n\x07kinesis\x18\n \x01(\x0b\x32\x1f.fennel.proto.connector.KinesisH\x00\x12\x34\n\x08redshift\x18\x0b \x01(\x0b\x32 .fennel.proto.connector.RedshiftH\x00\x12.\n\x05mongo\x18\x0c \x01(\x0b\x32\x1d.fennel.proto.connector.MongoH\x00\x12\x30\n\x06pubsub\x18\r \x01(\x0b\x32\x1e.fennel.proto.connector.PubSubH\x00\x42\t\n\x07variant\"?\n\x10SamplingStrategy\x12\x15\n\rsampling_rate\x18\x01 \x01(\x01\x12\x14\n\x0c\x63olumns_used\x18\x02 \x03(\t\"M\n\x0cPubSubFormat\x12\x32\n\x04json\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.JsonFormatH\x00\x42\t\n\x07variant\"\xbc\x01\n\x0bKafkaFormat\x12\x32\n\x04json\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.JsonFormatH\x00\x12\x32\n\x04\x61vro\x18\x02 \x01(\x0b\x32\".fennel.proto.connector.AvroFormatH\x00\x12:\n\x08protobuf\x18\x03 \x01(\x0b\x32&.fennel.proto.connector.ProtobufFormatH\x00\x42\t\n\x07variant\"\x0c\n\nJsonFormat\"S\n\nAvroFormat\x12\x45\n\x0fschema_registry\x18\x01 \x01(\x0b\x32,.fennel.proto.schema_registry.SchemaRegistry\"W\n\x0eProtobufFormat\x12\x45\n\x0fschema_registry\x18\x01 \x01(\x0b\x32,.fennel.proto.schema_registry.SchemaRegistry\"\xde\x01\n\tReference\x12;\n\x06\x64\x62type\x18\x01 \x01(\x0e\x32+.fennel.proto.connector.Reference.ExtDBType\"\x93\x01\n\tExtDBType\x12\t\n\x05MYSQL\x10\x00\x12\x0c\n\x08POSTGRES\x10\x01\x12\x06\n\x02S3\x10\x02\x12\t\n\x05KAFKA\x10\x03\x12\x0c\n\x08\x42IGQUERY\x10\x04\x12\r\n\tSNOWFLAKE\x10\x05\x12\x0b\n\x07WEBHOOK\x10\x06\x12\x0b\n\x07KINESIS\x10\x07\x12\x0c\n\x08REDSHIFT\x10\x08\x12\t\n\x05MONGO\x10\t\x12\n\n\x06PUBSUB\x10\n\"E\n\x07Webhook\x12\x0c\n\x04name\x18\x01 \x01(\t\x12,\n\tretention\x18\x02 \x01(\x0b\x32\x19.google.protobuf.Duration\"\x8c\x02\n\x05MySQL\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x07 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x05 \x01(\r\x12\x13\n\x0bjdbc_params\x18\x06 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\x8f\x02\n\x08Postgres\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x07 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x05 \x01(\r\x12\x13\n\x0bjdbc_params\x18\x06 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xb0\x02\n\x02S3\x12\x1f\n\x15\x61ws_secret_access_key\x18\x01 \x01(\tH\x00\x12\x46\n\x1c\x61ws_secret_access_key_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x1b\n\x11\x61ws_access_key_id\x18\x02 \x01(\tH\x01\x12\x42\n\x18\x61ws_access_key_id_secret\x18\x05 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x15\n\x08role_arn\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x1f\n\x1d\x61ws_secret_access_key_variantB\x1b\n\x19\x61ws_access_key_id_variantB\x0b\n\t_role_arn\"\xb6\x01\n\x08\x42igquery\x12\x1d\n\x13service_account_key\x18\x02 \x01(\tH\x00\x12\x44\n\x1aservice_account_key_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\ndataset_id\x18\x01 \x01(\t\x12\x12\n\nproject_id\x18\x03 \x01(\tB\x1d\n\x1bservice_account_key_variant\"\xa1\x02\n\tSnowflake\x12\x0e\n\x04user\x18\x02 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x03 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\t \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0f\n\x07\x61\x63\x63ount\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x04 \x01(\t\x12\x11\n\twarehouse\x18\x05 \x01(\t\x12\x0c\n\x04role\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xf9\x02\n\x05Kafka\x12\x1d\n\x13sasl_plain_username\x18\x05 \x01(\tH\x00\x12>\n\x14sasl_username_secret\x18\x08 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x1d\n\x13sasl_plain_password\x18\x06 \x01(\tH\x01\x12>\n\x14sasl_password_secret\x18\t \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x19\n\x11\x62ootstrap_servers\x18\x01 \x01(\t\x12\x19\n\x11security_protocol\x18\x02 \x01(\t\x12\x16\n\x0esasl_mechanism\x18\x03 \x01(\t\x12\x1c\n\x10sasl_jaas_config\x18\x04 \x01(\tB\x02\x18\x01\x12\x14\n\x08group_id\x18\x07 \x01(\tB\x02\x18\x01\x42\x17\n\x15sasl_username_variantB\x17\n\x15sasl_password_variant\"\x1b\n\x07Kinesis\x12\x10\n\x08role_arn\x18\x01 \x01(\t\"\xd3\x01\n\x0b\x43redentials\x12\x12\n\x08username\x18\x01 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x03 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x02 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x04 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x42\x12\n\x10username_variantB\x12\n\x10password_variant\"}\n\x16RedshiftAuthentication\x12\x1c\n\x12s3_access_role_arn\x18\x01 \x01(\tH\x00\x12:\n\x0b\x63redentials\x18\x02 \x01(\x0b\x32#.fennel.proto.connector.CredentialsH\x00\x42\t\n\x07variant\"\x99\x01\n\x08Redshift\x12\x10\n\x08\x64\x61tabase\x18\x01 \x01(\t\x12\x0c\n\x04host\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x03 \x01(\r\x12\x0e\n\x06schema\x18\x04 \x01(\t\x12O\n\x17redshift_authentication\x18\x05 \x01(\x0b\x32..fennel.proto.connector.RedshiftAuthentication\"\xe9\x01\n\x05Mongo\x12\x0e\n\x04user\x18\x03 \x01(\tH\x00\x12\x39\n\x0fusername_secret\x18\x05 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\x08password\x18\x04 \x01(\tH\x01\x12\x39\n\x0fpassword_secret\x18\x06 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x01\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x02 \x01(\tB\x12\n\x10username_variantB\x12\n\x10password_variant\"\xa0\x01\n\x06PubSub\x12\x1d\n\x13service_account_key\x18\x02 \x01(\tH\x00\x12\x44\n\x1aservice_account_key_secret\x18\x03 \x01(\x0b\x32\x1e.fennel.proto.secret.SecretRefH\x00\x12\x12\n\nproject_id\x18\x01 \x01(\tB\x1d\n\x1bservice_account_key_variant\"\xc0\x05\n\x08\x45xtTable\x12\x39\n\x0bmysql_table\x18\x01 \x01(\x0b\x32\".fennel.proto.connector.MySQLTableH\x00\x12\x39\n\x08pg_table\x18\x02 \x01(\x0b\x32%.fennel.proto.connector.PostgresTableH\x00\x12\x33\n\x08s3_table\x18\x03 \x01(\x0b\x32\x1f.fennel.proto.connector.S3TableH\x00\x12\x39\n\x0bkafka_topic\x18\x04 \x01(\x0b\x32\".fennel.proto.connector.KafkaTopicH\x00\x12\x41\n\x0fsnowflake_table\x18\x05 \x01(\x0b\x32&.fennel.proto.connector.SnowflakeTableH\x00\x12?\n\x0e\x62igquery_table\x18\x06 \x01(\x0b\x32%.fennel.proto.connector.BigqueryTableH\x00\x12;\n\x08\x65ndpoint\x18\x07 \x01(\x0b\x32\'.fennel.proto.connector.WebhookEndpointH\x00\x12?\n\x0ekinesis_stream\x18\x08 \x01(\x0b\x32%.fennel.proto.connector.KinesisStreamH\x00\x12?\n\x0eredshift_table\x18\t \x01(\x0b\x32%.fennel.proto.connector.RedshiftTableH\x00\x12\x43\n\x10mongo_collection\x18\n \x01(\x0b\x32\'.fennel.proto.connector.MongoCollectionH\x00\x12;\n\x0cpubsub_topic\x18\x0b \x01(\x0b\x32#.fennel.proto.connector.PubSubTopicH\x00\x42\t\n\x07variant\"Q\n\nMySQLTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"z\n\rPostgresTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\x12\x16\n\tslot_name\x18\x03 \x01(\tH\x00\x88\x01\x01\x42\x0c\n\n_slot_name\"\xf7\x01\n\x07S3Table\x12\x0e\n\x06\x62ucket\x18\x01 \x01(\t\x12\x13\n\x0bpath_prefix\x18\x02 \x01(\t\x12\x11\n\tdelimiter\x18\x04 \x01(\t\x12\x0e\n\x06\x66ormat\x18\x05 \x01(\t\x12/\n\x02\x64\x62\x18\x06 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\npre_sorted\x18\x07 \x01(\x08\x12\x13\n\x0bpath_suffix\x18\x08 \x01(\t\x12.\n\x06spread\x18\x03 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x0f\n\x07headers\x18\t \x03(\tB\t\n\x07_spread\"\x81\x01\n\nKafkaTopic\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\r\n\x05topic\x18\x02 \x01(\t\x12\x33\n\x06\x66ormat\x18\x03 \x01(\x0b\x32#.fennel.proto.connector.KafkaFormat\"T\n\rBigqueryTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"U\n\x0eSnowflakeTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"\x81\x01\n\x0fWebhookEndpoint\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x10\n\x08\x65ndpoint\x18\x02 \x01(\t\x12+\n\x08\x64uration\x18\x03 \x01(\x0b\x32\x19.google.protobuf.Duration\"\xd3\x01\n\rKinesisStream\x12\x12\n\nstream_arn\x18\x01 \x01(\t\x12\x39\n\rinit_position\x18\x02 \x01(\x0e\x32\".fennel.proto.kinesis.InitPosition\x12\x32\n\x0einit_timestamp\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12/\n\x02\x64\x62\x18\x05 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\"T\n\rRedshiftTable\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x12\n\ntable_name\x18\x02 \x01(\t\"\x86\x01\n\x0bPubSubTopic\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x10\n\x08topic_id\x18\x02 \x01(\t\x12\x34\n\x06\x66ormat\x18\x03 \x01(\x0b\x32$.fennel.proto.connector.PubSubFormat\"\xb1\x02\n\x0cPreProcValue\x12\r\n\x03ref\x18\x01 \x01(\tH\x00\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.schema.ValueH\x00\x12\x39\n\x04\x65val\x18\x03 \x01(\x0b\x32).fennel.proto.connector.PreProcValue.EvalH\x00\x1a\x9e\x01\n\x04\x45val\x12+\n\x06schema\x18\x01 \x01(\x0b\x32\x1b.fennel.proto.schema.Schema\x12-\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expression.ExprH\x00\x12-\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCodeH\x00\x42\x0b\n\teval_typeB\t\n\x07variant\"[\n\x0fMongoCollection\x12/\n\x02\x64\x62\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.ExtDatabase\x12\x17\n\x0f\x63ollection_name\x18\x02 \x01(\t\"2\n\x0cSnapshotData\x12\x0e\n\x06marker\x18\x01 \x01(\t\x12\x12\n\nnum_retain\x18\x02 \x01(\r\"\r\n\x0bIncremental\"\n\n\x08Recreate\"\xbc\x01\n\x05Style\x12:\n\x0bincremental\x18\x01 \x01(\x0b\x32#.fennel.proto.connector.IncrementalH\x00\x12\x34\n\x08recreate\x18\x02 \x01(\x0b\x32 .fennel.proto.connector.RecreateH\x00\x12\x38\n\x08snapshot\x18\x03 \x01(\x0b\x32$.fennel.proto.connector.SnapshotDataH\x00\x42\x07\n\x05Style\"\x91\x06\n\x06Source\x12/\n\x05table\x18\x01 \x01(\x0b\x32 .fennel.proto.connector.ExtTable\x12\x0f\n\x07\x64\x61taset\x18\x02 \x01(\t\x12\x12\n\nds_version\x18\x03 \x01(\r\x12(\n\x05\x65very\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\x13\n\x06\x63ursor\x18\x05 \x01(\tH\x00\x88\x01\x01\x12+\n\x08\x64isorder\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\x17\n\x0ftimestamp_field\x18\x07 \x01(\t\x12\x30\n\x03\x63\x64\x63\x18\x08 \x01(\x0e\x32#.fennel.proto.connector.CDCStrategy\x12\x31\n\rstarting_from\x18\t \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12=\n\x08pre_proc\x18\n \x03(\x0b\x32+.fennel.proto.connector.Source.PreProcEntry\x12\x0f\n\x07version\x18\x0b \x01(\r\x12\x0f\n\x07\x62ounded\x18\x0c \x01(\x08\x12\x30\n\x08idleness\x18\r \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12)\n\x05until\x18\x0e \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x30\n\x06\x66ilter\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCodeH\x02\x88\x01\x01\x12H\n\x11sampling_strategy\x18\x10 \x01(\x0b\x32(.fennel.proto.connector.SamplingStrategyH\x03\x88\x01\x01\x1aT\n\x0cPreProcEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.fennel.proto.connector.PreProcValue:\x02\x38\x01\x42\t\n\x07_cursorB\x0b\n\t_idlenessB\t\n\x07_filterB\x14\n\x12_sampling_strategy\"\xee\x03\n\x04Sink\x12/\n\x05table\x18\x01 \x01(\x0b\x32 .fennel.proto.connector.ExtTable\x12\x0f\n\x07\x64\x61taset\x18\x02 \x01(\t\x12\x12\n\nds_version\x18\x03 \x01(\r\x12\x35\n\x03\x63\x64\x63\x18\x04 \x01(\x0e\x32#.fennel.proto.connector.CDCStrategyH\x00\x88\x01\x01\x12(\n\x05\x65very\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12/\n\x03how\x18\x06 \x01(\x0b\x32\x1d.fennel.proto.connector.StyleH\x01\x88\x01\x01\x12\x0e\n\x06\x63reate\x18\x07 \x01(\x08\x12:\n\x07renames\x18\x08 \x03(\x0b\x32).fennel.proto.connector.Sink.RenamesEntry\x12.\n\x05since\x18\t \x01(\x0b\x32\x1a.google.protobuf.TimestampH\x02\x88\x01\x01\x12.\n\x05until\x18\n \x01(\x0b\x32\x1a.google.protobuf.TimestampH\x03\x88\x01\x01\x1a.\n\x0cRenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x06\n\x04_cdcB\x06\n\x04_howB\x08\n\x06_sinceB\x08\n\x06_until*K\n\x0b\x43\x44\x43Strategy\x12\n\n\x06\x41ppend\x10\x00\x12\n\n\x06Upsert\x10\x01\x12\x0c\n\x08\x44\x65\x62\x65zium\x10\x02\x12\n\n\x06Native\x10\x03\x12\n\n\x06\x44\x65lete\x10\x04\x62\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -37,92 +37,94 @@
_globals['_SOURCE_PREPROCENTRY']._serialized_options = b'8\001'
_globals['_SINK_RENAMESENTRY']._loaded_options = None
_globals['_SINK_RENAMESENTRY']._serialized_options = b'8\001'
- _globals['_CDCSTRATEGY']._serialized_start=8147
- _globals['_CDCSTRATEGY']._serialized_end=8222
+ _globals['_CDCSTRATEGY']._serialized_start=8308
+ _globals['_CDCSTRATEGY']._serialized_end=8383
_globals['_EXTDATABASE']._serialized_start=207
_globals['_EXTDATABASE']._serialized_end=859
- _globals['_PUBSUBFORMAT']._serialized_start=861
- _globals['_PUBSUBFORMAT']._serialized_end=938
- _globals['_KAFKAFORMAT']._serialized_start=941
- _globals['_KAFKAFORMAT']._serialized_end=1129
- _globals['_JSONFORMAT']._serialized_start=1131
- _globals['_JSONFORMAT']._serialized_end=1143
- _globals['_AVROFORMAT']._serialized_start=1145
- _globals['_AVROFORMAT']._serialized_end=1228
- _globals['_PROTOBUFFORMAT']._serialized_start=1230
- _globals['_PROTOBUFFORMAT']._serialized_end=1317
- _globals['_REFERENCE']._serialized_start=1320
- _globals['_REFERENCE']._serialized_end=1542
- _globals['_REFERENCE_EXTDBTYPE']._serialized_start=1395
- _globals['_REFERENCE_EXTDBTYPE']._serialized_end=1542
- _globals['_WEBHOOK']._serialized_start=1544
- _globals['_WEBHOOK']._serialized_end=1613
- _globals['_MYSQL']._serialized_start=1616
- _globals['_MYSQL']._serialized_end=1884
- _globals['_POSTGRES']._serialized_start=1887
- _globals['_POSTGRES']._serialized_end=2158
- _globals['_S3']._serialized_start=2161
- _globals['_S3']._serialized_end=2465
- _globals['_BIGQUERY']._serialized_start=2468
- _globals['_BIGQUERY']._serialized_end=2650
- _globals['_SNOWFLAKE']._serialized_start=2653
- _globals['_SNOWFLAKE']._serialized_end=2942
- _globals['_KAFKA']._serialized_start=2945
- _globals['_KAFKA']._serialized_end=3322
- _globals['_KINESIS']._serialized_start=3324
- _globals['_KINESIS']._serialized_end=3351
- _globals['_CREDENTIALS']._serialized_start=3354
- _globals['_CREDENTIALS']._serialized_end=3565
- _globals['_REDSHIFTAUTHENTICATION']._serialized_start=3567
- _globals['_REDSHIFTAUTHENTICATION']._serialized_end=3692
- _globals['_REDSHIFT']._serialized_start=3695
- _globals['_REDSHIFT']._serialized_end=3848
- _globals['_MONGO']._serialized_start=3851
- _globals['_MONGO']._serialized_end=4084
- _globals['_PUBSUB']._serialized_start=4087
- _globals['_PUBSUB']._serialized_end=4247
- _globals['_EXTTABLE']._serialized_start=4250
- _globals['_EXTTABLE']._serialized_end=4954
- _globals['_MYSQLTABLE']._serialized_start=4956
- _globals['_MYSQLTABLE']._serialized_end=5037
- _globals['_POSTGRESTABLE']._serialized_start=5039
- _globals['_POSTGRESTABLE']._serialized_end=5161
- _globals['_S3TABLE']._serialized_start=5164
- _globals['_S3TABLE']._serialized_end=5411
- _globals['_KAFKATOPIC']._serialized_start=5414
- _globals['_KAFKATOPIC']._serialized_end=5543
- _globals['_BIGQUERYTABLE']._serialized_start=5545
- _globals['_BIGQUERYTABLE']._serialized_end=5629
- _globals['_SNOWFLAKETABLE']._serialized_start=5631
- _globals['_SNOWFLAKETABLE']._serialized_end=5716
- _globals['_WEBHOOKENDPOINT']._serialized_start=5719
- _globals['_WEBHOOKENDPOINT']._serialized_end=5848
- _globals['_KINESISSTREAM']._serialized_start=5851
- _globals['_KINESISSTREAM']._serialized_end=6062
- _globals['_REDSHIFTTABLE']._serialized_start=6064
- _globals['_REDSHIFTTABLE']._serialized_end=6148
- _globals['_PUBSUBTOPIC']._serialized_start=6151
- _globals['_PUBSUBTOPIC']._serialized_end=6285
- _globals['_PREPROCVALUE']._serialized_start=6288
- _globals['_PREPROCVALUE']._serialized_end=6593
- _globals['_PREPROCVALUE_EVAL']._serialized_start=6424
- _globals['_PREPROCVALUE_EVAL']._serialized_end=6582
- _globals['_MONGOCOLLECTION']._serialized_start=6595
- _globals['_MONGOCOLLECTION']._serialized_end=6686
- _globals['_SNAPSHOTDATA']._serialized_start=6688
- _globals['_SNAPSHOTDATA']._serialized_end=6738
- _globals['_INCREMENTAL']._serialized_start=6740
- _globals['_INCREMENTAL']._serialized_end=6753
- _globals['_RECREATE']._serialized_start=6755
- _globals['_RECREATE']._serialized_end=6765
- _globals['_STYLE']._serialized_start=6768
- _globals['_STYLE']._serialized_end=6956
- _globals['_SOURCE']._serialized_start=6959
- _globals['_SOURCE']._serialized_end=7648
- _globals['_SOURCE_PREPROCENTRY']._serialized_start=7529
- _globals['_SOURCE_PREPROCENTRY']._serialized_end=7613
- _globals['_SINK']._serialized_start=7651
- _globals['_SINK']._serialized_end=8145
- _globals['_SINK_RENAMESENTRY']._serialized_start=8063
- _globals['_SINK_RENAMESENTRY']._serialized_end=8109
+ _globals['_SAMPLINGSTRATEGY']._serialized_start=861
+ _globals['_SAMPLINGSTRATEGY']._serialized_end=924
+ _globals['_PUBSUBFORMAT']._serialized_start=926
+ _globals['_PUBSUBFORMAT']._serialized_end=1003
+ _globals['_KAFKAFORMAT']._serialized_start=1006
+ _globals['_KAFKAFORMAT']._serialized_end=1194
+ _globals['_JSONFORMAT']._serialized_start=1196
+ _globals['_JSONFORMAT']._serialized_end=1208
+ _globals['_AVROFORMAT']._serialized_start=1210
+ _globals['_AVROFORMAT']._serialized_end=1293
+ _globals['_PROTOBUFFORMAT']._serialized_start=1295
+ _globals['_PROTOBUFFORMAT']._serialized_end=1382
+ _globals['_REFERENCE']._serialized_start=1385
+ _globals['_REFERENCE']._serialized_end=1607
+ _globals['_REFERENCE_EXTDBTYPE']._serialized_start=1460
+ _globals['_REFERENCE_EXTDBTYPE']._serialized_end=1607
+ _globals['_WEBHOOK']._serialized_start=1609
+ _globals['_WEBHOOK']._serialized_end=1678
+ _globals['_MYSQL']._serialized_start=1681
+ _globals['_MYSQL']._serialized_end=1949
+ _globals['_POSTGRES']._serialized_start=1952
+ _globals['_POSTGRES']._serialized_end=2223
+ _globals['_S3']._serialized_start=2226
+ _globals['_S3']._serialized_end=2530
+ _globals['_BIGQUERY']._serialized_start=2533
+ _globals['_BIGQUERY']._serialized_end=2715
+ _globals['_SNOWFLAKE']._serialized_start=2718
+ _globals['_SNOWFLAKE']._serialized_end=3007
+ _globals['_KAFKA']._serialized_start=3010
+ _globals['_KAFKA']._serialized_end=3387
+ _globals['_KINESIS']._serialized_start=3389
+ _globals['_KINESIS']._serialized_end=3416
+ _globals['_CREDENTIALS']._serialized_start=3419
+ _globals['_CREDENTIALS']._serialized_end=3630
+ _globals['_REDSHIFTAUTHENTICATION']._serialized_start=3632
+ _globals['_REDSHIFTAUTHENTICATION']._serialized_end=3757
+ _globals['_REDSHIFT']._serialized_start=3760
+ _globals['_REDSHIFT']._serialized_end=3913
+ _globals['_MONGO']._serialized_start=3916
+ _globals['_MONGO']._serialized_end=4149
+ _globals['_PUBSUB']._serialized_start=4152
+ _globals['_PUBSUB']._serialized_end=4312
+ _globals['_EXTTABLE']._serialized_start=4315
+ _globals['_EXTTABLE']._serialized_end=5019
+ _globals['_MYSQLTABLE']._serialized_start=5021
+ _globals['_MYSQLTABLE']._serialized_end=5102
+ _globals['_POSTGRESTABLE']._serialized_start=5104
+ _globals['_POSTGRESTABLE']._serialized_end=5226
+ _globals['_S3TABLE']._serialized_start=5229
+ _globals['_S3TABLE']._serialized_end=5476
+ _globals['_KAFKATOPIC']._serialized_start=5479
+ _globals['_KAFKATOPIC']._serialized_end=5608
+ _globals['_BIGQUERYTABLE']._serialized_start=5610
+ _globals['_BIGQUERYTABLE']._serialized_end=5694
+ _globals['_SNOWFLAKETABLE']._serialized_start=5696
+ _globals['_SNOWFLAKETABLE']._serialized_end=5781
+ _globals['_WEBHOOKENDPOINT']._serialized_start=5784
+ _globals['_WEBHOOKENDPOINT']._serialized_end=5913
+ _globals['_KINESISSTREAM']._serialized_start=5916
+ _globals['_KINESISSTREAM']._serialized_end=6127
+ _globals['_REDSHIFTTABLE']._serialized_start=6129
+ _globals['_REDSHIFTTABLE']._serialized_end=6213
+ _globals['_PUBSUBTOPIC']._serialized_start=6216
+ _globals['_PUBSUBTOPIC']._serialized_end=6350
+ _globals['_PREPROCVALUE']._serialized_start=6353
+ _globals['_PREPROCVALUE']._serialized_end=6658
+ _globals['_PREPROCVALUE_EVAL']._serialized_start=6489
+ _globals['_PREPROCVALUE_EVAL']._serialized_end=6647
+ _globals['_MONGOCOLLECTION']._serialized_start=6660
+ _globals['_MONGOCOLLECTION']._serialized_end=6751
+ _globals['_SNAPSHOTDATA']._serialized_start=6753
+ _globals['_SNAPSHOTDATA']._serialized_end=6803
+ _globals['_INCREMENTAL']._serialized_start=6805
+ _globals['_INCREMENTAL']._serialized_end=6818
+ _globals['_RECREATE']._serialized_start=6820
+ _globals['_RECREATE']._serialized_end=6830
+ _globals['_STYLE']._serialized_start=6833
+ _globals['_STYLE']._serialized_end=7021
+ _globals['_SOURCE']._serialized_start=7024
+ _globals['_SOURCE']._serialized_end=7809
+ _globals['_SOURCE_PREPROCENTRY']._serialized_start=7668
+ _globals['_SOURCE_PREPROCENTRY']._serialized_end=7752
+ _globals['_SINK']._serialized_start=7812
+ _globals['_SINK']._serialized_end=8306
+ _globals['_SINK_RENAMESENTRY']._serialized_start=8224
+ _globals['_SINK_RENAMESENTRY']._serialized_end=8270
# @@protoc_insertion_point(module_scope)
diff --git a/fennel/gen/connector_pb2.pyi b/fennel/gen/connector_pb2.pyi
index ca83dab06..06ebd48f8 100644
--- a/fennel/gen/connector_pb2.pyi
+++ b/fennel/gen/connector_pb2.pyi
@@ -129,6 +129,26 @@ class ExtDatabase(google.protobuf.message.Message):
global___ExtDatabase = ExtDatabase
+@typing_extensions.final
+class SamplingStrategy(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ SAMPLING_RATE_FIELD_NUMBER: builtins.int
+ COLUMNS_USED_FIELD_NUMBER: builtins.int
+ sampling_rate: builtins.float
+ @property
+ def columns_used(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+ """columns_used is a list of columns used for sampling"""
+ def __init__(
+ self,
+ *,
+ sampling_rate: builtins.float = ...,
+ columns_used: collections.abc.Iterable[builtins.str] | None = ...,
+ ) -> None: ...
+ def ClearField(self, field_name: typing_extensions.Literal["columns_used", b"columns_used", "sampling_rate", b"sampling_rate"]) -> None: ...
+
+global___SamplingStrategy = SamplingStrategy
+
@typing_extensions.final
class PubSubFormat(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
@@ -225,7 +245,7 @@ class Reference(google.protobuf.message.Message):
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
- class _ExtDBTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Reference._ExtDBType.ValueType], builtins.type): # noqa: F821
+ class _ExtDBTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Reference._ExtDBType.ValueType], builtins.type):
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
MYSQL: Reference._ExtDBType.ValueType # 0
POSTGRES: Reference._ExtDBType.ValueType # 1
@@ -1179,6 +1199,7 @@ class Source(google.protobuf.message.Message):
IDLENESS_FIELD_NUMBER: builtins.int
UNTIL_FIELD_NUMBER: builtins.int
FILTER_FIELD_NUMBER: builtins.int
+ SAMPLING_STRATEGY_FIELD_NUMBER: builtins.int
@property
def table(self) -> global___ExtTable: ...
dataset: builtins.str
@@ -1202,6 +1223,8 @@ class Source(google.protobuf.message.Message):
def until(self) -> google.protobuf.timestamp_pb2.Timestamp: ...
@property
def filter(self) -> pycode_pb2.PyCode: ...
+ @property
+ def sampling_strategy(self) -> global___SamplingStrategy: ...
def __init__(
self,
*,
@@ -1220,15 +1243,18 @@ class Source(google.protobuf.message.Message):
idleness: google.protobuf.duration_pb2.Duration | None = ...,
until: google.protobuf.timestamp_pb2.Timestamp | None = ...,
filter: pycode_pb2.PyCode | None = ...,
+ sampling_strategy: global___SamplingStrategy | None = ...,
) -> None: ...
- def HasField(self, field_name: typing_extensions.Literal["_cursor", b"_cursor", "_filter", b"_filter", "_idleness", b"_idleness", "cursor", b"cursor", "disorder", b"disorder", "every", b"every", "filter", b"filter", "idleness", b"idleness", "starting_from", b"starting_from", "table", b"table", "until", b"until"]) -> builtins.bool: ...
- def ClearField(self, field_name: typing_extensions.Literal["_cursor", b"_cursor", "_filter", b"_filter", "_idleness", b"_idleness", "bounded", b"bounded", "cdc", b"cdc", "cursor", b"cursor", "dataset", b"dataset", "disorder", b"disorder", "ds_version", b"ds_version", "every", b"every", "filter", b"filter", "idleness", b"idleness", "pre_proc", b"pre_proc", "starting_from", b"starting_from", "table", b"table", "timestamp_field", b"timestamp_field", "until", b"until", "version", b"version"]) -> None: ...
+ def HasField(self, field_name: typing_extensions.Literal["_cursor", b"_cursor", "_filter", b"_filter", "_idleness", b"_idleness", "_sampling_strategy", b"_sampling_strategy", "cursor", b"cursor", "disorder", b"disorder", "every", b"every", "filter", b"filter", "idleness", b"idleness", "sampling_strategy", b"sampling_strategy", "starting_from", b"starting_from", "table", b"table", "until", b"until"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["_cursor", b"_cursor", "_filter", b"_filter", "_idleness", b"_idleness", "_sampling_strategy", b"_sampling_strategy", "bounded", b"bounded", "cdc", b"cdc", "cursor", b"cursor", "dataset", b"dataset", "disorder", b"disorder", "ds_version", b"ds_version", "every", b"every", "filter", b"filter", "idleness", b"idleness", "pre_proc", b"pre_proc", "sampling_strategy", b"sampling_strategy", "starting_from", b"starting_from", "table", b"table", "timestamp_field", b"timestamp_field", "until", b"until", "version", b"version"]) -> None: ...
@typing.overload
def WhichOneof(self, oneof_group: typing_extensions.Literal["_cursor", b"_cursor"]) -> typing_extensions.Literal["cursor"] | None: ...
@typing.overload
def WhichOneof(self, oneof_group: typing_extensions.Literal["_filter", b"_filter"]) -> typing_extensions.Literal["filter"] | None: ...
@typing.overload
def WhichOneof(self, oneof_group: typing_extensions.Literal["_idleness", b"_idleness"]) -> typing_extensions.Literal["idleness"] | None: ...
+ @typing.overload
+ def WhichOneof(self, oneof_group: typing_extensions.Literal["_sampling_strategy", b"_sampling_strategy"]) -> typing_extensions.Literal["sampling_strategy"] | None: ...
global___Source = Source
diff --git a/fennel/gen/dataset_pb2.pyi b/fennel/gen/dataset_pb2.pyi
index 146a4b2af..22f8d2ff9 100644
--- a/fennel/gen/dataset_pb2.pyi
+++ b/fennel/gen/dataset_pb2.pyi
@@ -337,7 +337,7 @@ class Join(google.protobuf.message.Message):
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
- class _HowEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Join._How.ValueType], builtins.type): # noqa: F821
+ class _HowEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Join._How.ValueType], builtins.type):
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
Left: Join._How.ValueType # 0
Inner: Join._How.ValueType # 1
diff --git a/fennel/gen/expectations_pb2.pyi b/fennel/gen/expectations_pb2.pyi
index 28a9a9a95..ae399ad75 100644
--- a/fennel/gen/expectations_pb2.pyi
+++ b/fennel/gen/expectations_pb2.pyi
@@ -27,7 +27,7 @@ class Expectations(google.protobuf.message.Message):
ValueType = typing.NewType("ValueType", builtins.int)
V: typing_extensions.TypeAlias = ValueType
- class _EntityTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Expectations._EntityType.ValueType], builtins.type): # noqa: F821
+ class _EntityTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Expectations._EntityType.ValueType], builtins.type):
DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
Dataset: Expectations._EntityType.ValueType # 0
Featureset: Expectations._EntityType.ValueType # 1
diff --git a/fennel/internal_lib/to_proto/to_proto.py b/fennel/internal_lib/to_proto/to_proto.py
index 61426f0e6..b2dcde7ea 100644
--- a/fennel/internal_lib/to_proto/to_proto.py
+++ b/fennel/internal_lib/to_proto/to_proto.py
@@ -35,7 +35,7 @@
import fennel.gen.schema_registry_pb2 as schema_registry_proto
import fennel.gen.services_pb2 as services_proto
from fennel.connectors import kinesis
-from fennel.connectors.connectors import CSV, SnapshotData
+from fennel.connectors.connectors import CSV, SnapshotData, Sample, PreProcValue
from fennel.datasets import Dataset, Pipeline, Field
from fennel.datasets.datasets import (
indices_from_ds,
@@ -938,6 +938,9 @@ def _webhook_to_source_proto(
cdc=to_cdc_proto(connector.cdc),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -981,6 +984,9 @@ def _kafka_conn_to_source_proto(
starting_from=_to_timestamp_proto(connector.since),
until=_to_timestamp_proto(connector.until),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1084,6 +1090,9 @@ def _s3_conn_to_source_proto(
cdc=to_cdc_proto(connector.cdc),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1104,6 +1113,88 @@ def _renames_to_proto(renames: Optional[Dict[str, str]]) -> Mapping[str, str]:
return renames
+def get_sampling_rate(sample: Optional[Union[float, Sample]]):
+ if isinstance(sample, float):
+ if sample < 0 or sample > 1:
+ raise ValueError("Sample rate should be between 0 and 1")
+ return sample
+ if isinstance(sample, Sample):
+ if sample.rate < 0 or sample.rate > 1:
+ raise ValueError("Sample rate should be between 0 and 1")
+ return sample.rate
+ raise ValueError(f"Invalid sample type: {type(sample)}")
+
+
+def get_sampling_columns(
+ sample: Optional[Union[float, Sample]],
+ dataset: Dataset,
+ preproc: Optional[Dict[str, PreProcValue]],
+):
+ disallowed_columns = [dataset.timestamp_field]
+ if preproc is not None:
+ disallowed_columns.extend(list(preproc.keys()))
+
+ if isinstance(sample, Sample) and len(sample.using) != 0:
+ input_columns = sample.using
+ all_columns = [str(column) for column in dataset.fields]
+ for column in input_columns:
+ if column == dataset.timestamp_field:
+ raise ValueError(
+ f"Timestamp column: {column} cannot be part of sampling columns"
+ )
+ if column in disallowed_columns:
+ raise ValueError(
+ f"Column {column} is part of preproc so cannot be used for sampling"
+ )
+ if column not in all_columns:
+ raise ValueError(
+ f"Column {column} is not part of dataset columns"
+ )
+ return sorted(input_columns)
+
+ if len(dataset.key_fields) != 0:
+ key_columns = dataset.key_fields
+ sample_columns = sorted(
+ [
+ column
+ for column in key_columns
+ if column not in disallowed_columns
+ ]
+ )
+ if len(sample_columns) == 0:
+ raise ValueError(
+ "No columns left to sample from. all key columns are part of preproc"
+ )
+ return sample_columns
+ else:
+ all_columns = [str(column) for column in dataset.fields]
+ sample_columns = sorted(
+ [
+ column
+ for column in all_columns
+ if column not in disallowed_columns
+ ]
+ )
+ if len(sample_columns) == 0:
+ raise ValueError(
+ "No columns left to sample from. all columns are part of preproc"
+ )
+ return sample_columns
+
+
+def _sample_to_proto(
+ sample: Optional[Union[float, Sample]],
+ dataset: Dataset,
+ preproc: Optional[Dict[str, PreProcValue]],
+) -> Optional[connector_proto.SamplingStrategy]:
+ if sample is None:
+ return None
+ return connector_proto.SamplingStrategy(
+ sampling_rate=get_sampling_rate(sample),
+ columns_used=get_sampling_columns(sample, dataset, preproc),
+ )
+
+
def _how_to_proto(
how: Optional[Literal["incremental", "recreate"] | SnapshotData]
) -> Optional[connector_proto.Style]:
@@ -1300,6 +1391,9 @@ def _bigquery_conn_to_source_proto(
cdc=to_cdc_proto(connector.cdc),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1377,6 +1471,9 @@ def _redshift_conn_to_source_proto(
starting_from=_to_timestamp_proto(connector.since),
until=_to_timestamp_proto(connector.until),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1478,6 +1575,9 @@ def _mongo_conn_to_source_proto(
starting_from=_to_timestamp_proto(connector.since),
until=_to_timestamp_proto(connector.until),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1554,6 +1654,9 @@ def _pubsub_conn_to_source_proto(
cdc=to_cdc_proto(connector.cdc),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1641,6 +1744,9 @@ def _snowflake_conn_to_source_proto(
starting_from=_to_timestamp_proto(connector.since),
until=_to_timestamp_proto(connector.until),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1749,6 +1855,9 @@ def _mysql_conn_to_source_proto(
until=_to_timestamp_proto(connector.until),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1844,6 +1953,9 @@ def _pg_conn_to_source_proto(
until=_to_timestamp_proto(connector.until),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness
@@ -1933,6 +2045,9 @@ def _kinesis_conn_to_source_proto(
until=_to_timestamp_proto(connector.until),
pre_proc=_pre_proc_to_proto(dataset, connector.pre_proc),
bounded=connector.bounded,
+ sampling_strategy=_sample_to_proto(
+ connector.sample, dataset, connector.pre_proc
+ ),
idleness=(
to_duration_proto(connector.idleness)
if connector.idleness