-
-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(subscriptions): Use RPC in subscriptions pipeline #6499
Changes from 31 commits
fb123b4
0f862e9
0e90ffc
efa8aaa
69cfc87
01f952c
f532342
2cd4713
6c05e65
958926d
9a8f16e
67372da
c16e31b
d152941
76a7f0d
5726ade
66270bf
baaa03c
7191833
c99d49c
17a49fe
83f1693
49e6e9b
e7fa5fb
15d29f6
df861f5
356d368
baf5566
6e9424a
6321e0c
f7f8448
f4b81d3
dc11a7e
a642b1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,24 @@ | ||
import base64 | ||
import json | ||
from datetime import datetime | ||
from typing import cast | ||
|
||
import rapidjson | ||
from arroyo.backends.kafka import KafkaPayload | ||
from google.protobuf.message import Message as ProtobufMessage | ||
from sentry_kafka_schemas.schema_types import events_subscription_results_v1 | ||
|
||
from snuba.datasets.entities.entity_key import EntityKey | ||
from snuba.query.exceptions import InvalidQueryException | ||
from snuba.subscriptions.data import ( | ||
RPCSubscriptionData, | ||
ScheduledSubscriptionTask, | ||
SnQLSubscriptionData, | ||
Subscription, | ||
SubscriptionData, | ||
SubscriptionIdentifier, | ||
SubscriptionTaskResult, | ||
SubscriptionType, | ||
SubscriptionWithMetadata, | ||
) | ||
from snuba.utils.codecs import Codec, Encoder | ||
|
@@ -33,6 +37,9 @@ def decode(self, value: bytes) -> SubscriptionData: | |
except json.JSONDecodeError: | ||
raise InvalidQueryException("Invalid JSON") | ||
|
||
if data.get("subscription_type") == SubscriptionType.RPC.value: | ||
return RPCSubscriptionData.from_dict(data, self.entity_key) | ||
|
||
return SnQLSubscriptionData.from_dict(data, self.entity_key) | ||
|
||
|
||
|
@@ -42,11 +49,22 @@ def encode(self, value: SubscriptionTaskResult) -> KafkaPayload: | |
subscription_id = str(subscription.identifier) | ||
request, result = value.result | ||
|
||
if isinstance(request, ProtobufMessage): | ||
original_body = { | ||
"request": base64.b64encode(request.SerializeToString()).decode( | ||
"utf-8" | ||
), | ||
"request_name": request.__class__.__name__, | ||
"request_version": request.__class__.__module__.split(".", 3)[2], | ||
} | ||
else: | ||
original_body = {**request.original_body} | ||
|
||
data: events_subscription_results_v1.SubscriptionResult = { | ||
"version": 3, | ||
"payload": { | ||
"subscription_id": subscription_id, | ||
"request": {**request.original_body}, | ||
"request": original_body, | ||
"result": { | ||
"data": result["data"], | ||
"meta": result["meta"], | ||
|
@@ -98,15 +116,20 @@ def decode(self, value: KafkaPayload) -> ScheduledSubscriptionTask: | |
|
||
entity_key = EntityKey(scheduled_subscription_dict["entity"]) | ||
|
||
data = scheduled_subscription_dict["task"]["data"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: the docstring of this class should probably change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
subscription: SubscriptionData | ||
if data.get("subscription_type") == SubscriptionType.RPC.value: | ||
subscription = RPCSubscriptionData.from_dict(data, entity_key) | ||
else: | ||
subscription = SnQLSubscriptionData.from_dict(data, entity_key) | ||
|
||
return ScheduledSubscriptionTask( | ||
datetime.fromisoformat(scheduled_subscription_dict["timestamp"]), | ||
SubscriptionWithMetadata( | ||
entity_key, | ||
Subscription( | ||
SubscriptionIdentifier.from_string(subscription_identifier), | ||
SnQLSubscriptionData.from_dict( | ||
scheduled_subscription_dict["task"]["data"], entity_key | ||
), | ||
subscription, | ||
), | ||
scheduled_subscription_dict["tick_upper_offset"], | ||
), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,22 +5,25 @@ | |
from abc import ABC, abstractmethod | ||
from concurrent.futures import Future | ||
from dataclasses import dataclass, field | ||
from datetime import datetime, timedelta | ||
from datetime import UTC, datetime, timedelta | ||
from enum import Enum | ||
from functools import partial | ||
from typing import ( | ||
Any, | ||
Generic, | ||
Iterator, | ||
List, | ||
Mapping, | ||
NamedTuple, | ||
NewType, | ||
Optional, | ||
Tuple, | ||
TypeVar, | ||
Union, | ||
) | ||
from uuid import UUID | ||
|
||
from google.protobuf.timestamp_pb2 import Timestamp | ||
from sentry_protos.snuba.v1.endpoint_create_subscription_pb2 import ( | ||
CreateSubscriptionRequest, | ||
) | ||
|
@@ -50,7 +53,11 @@ | |
from snuba.request.validation import build_request, parse_snql_query | ||
from snuba.subscriptions.utils import Tick | ||
from snuba.utils.metrics import MetricsBackend | ||
from snuba.utils.metrics.gauge import Gauge | ||
from snuba.utils.metrics.timer import Timer | ||
from snuba.web import QueryResult | ||
from snuba.web.query import run_query | ||
from snuba.web.rpc.v1.endpoint_time_series import EndpointTimeSeries | ||
|
||
SUBSCRIPTION_REFERRER = "subscription" | ||
|
||
|
@@ -79,6 +86,8 @@ class SubscriptionType(Enum): | |
|
||
PartitionId = NewType("PartitionId", int) | ||
|
||
TRequest = TypeVar("TRequest") | ||
|
||
|
||
@dataclass(frozen=True) | ||
class SubscriptionIdentifier: | ||
|
@@ -95,7 +104,7 @@ def from_string(cls, value: str) -> SubscriptionIdentifier: | |
|
||
|
||
@dataclass(frozen=True, kw_only=True) | ||
class SubscriptionData(ABC): | ||
class _SubscriptionData(ABC, Generic[TRequest]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
project_id: int | ||
resolution_sec: int | ||
time_window_sec: int | ||
|
@@ -127,14 +136,25 @@ def build_request( | |
timer: Timer, | ||
metrics: Optional[MetricsBackend] = None, | ||
referrer: str = SUBSCRIPTION_REFERRER, | ||
) -> Request: | ||
) -> TRequest: | ||
raise NotImplementedError | ||
|
||
@abstractmethod | ||
def run_query( | ||
self, | ||
dataset: Dataset, | ||
request: TRequest, | ||
timer: Timer, | ||
robust: bool = False, | ||
concurrent_queries_gauge: Optional[Gauge] = None, | ||
) -> QueryResult: | ||
raise NotImplementedError | ||
|
||
@classmethod | ||
@abstractmethod | ||
def from_dict( | ||
cls, data: Mapping[str, Any], entity_key: EntityKey | ||
) -> SubscriptionData: | ||
) -> _SubscriptionData[TRequest]: | ||
raise NotImplementedError | ||
|
||
@abstractmethod | ||
|
@@ -143,7 +163,7 @@ def to_dict(self) -> Mapping[str, Any]: | |
|
||
|
||
@dataclass(frozen=True, kw_only=True) | ||
class RPCSubscriptionData(SubscriptionData): | ||
class RPCSubscriptionData(_SubscriptionData[TimeSeriesRequest]): | ||
""" | ||
Represents the state of an RPC subscription. | ||
""" | ||
|
@@ -192,8 +212,58 @@ def build_request( | |
timer: Timer, | ||
metrics: Optional[MetricsBackend] = None, | ||
referrer: str = SUBSCRIPTION_REFERRER, | ||
) -> Request: | ||
raise NotImplementedError | ||
) -> TimeSeriesRequest: | ||
|
||
request_class = EndpointTimeSeries().request_class()() | ||
request_class.ParseFromString(base64.b64decode(self.time_series_request)) | ||
|
||
# TODO: update it to round to the lowest granularity | ||
# rounded_ts = int(timestamp.replace(tzinfo=UTC).timestamp() / 15) * 15 | ||
Comment on lines
+220
to
+221
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this TODO still relevant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, waiting on https://github.com/getsentry/projects/issues/364 to be finished |
||
rounded_ts = ( | ||
int(timestamp.replace(tzinfo=UTC).timestamp() / self.time_window_sec) | ||
* self.time_window_sec | ||
) | ||
rounded_start = datetime.utcfromtimestamp(rounded_ts) | ||
|
||
start_time_proto = Timestamp() | ||
start_time_proto.FromDatetime( | ||
rounded_start - timedelta(seconds=self.time_window_sec) | ||
) | ||
end_time_proto = Timestamp() | ||
end_time_proto.FromDatetime(rounded_start) | ||
request_class.meta.start_timestamp.CopyFrom(start_time_proto) | ||
request_class.meta.end_timestamp.CopyFrom(end_time_proto) | ||
|
||
request_class.granularity_secs = self.time_window_sec | ||
|
||
return request_class | ||
|
||
def run_query( | ||
self, | ||
dataset: Dataset, | ||
request: TimeSeriesRequest, | ||
timer: Timer, | ||
robust: bool = False, | ||
concurrent_queries_gauge: Optional[Gauge] = None, | ||
) -> QueryResult: | ||
response = EndpointTimeSeries().execute(request) | ||
if not response.result_timeseries: | ||
result: Result = { | ||
"meta": [], | ||
"data": [{request.aggregations[0].label: None}], | ||
"trace_output": "", | ||
} | ||
return QueryResult( | ||
result=result, extra={"stats": {}, "sql": "", "experiments": {}} | ||
) | ||
|
||
timeseries = response.result_timeseries[0] | ||
data = [{timeseries.label: timeseries.data_points[0].data}] | ||
|
||
result = {"meta": [], "data": data, "trace_output": ""} | ||
return QueryResult( | ||
result=result, extra={"stats": {}, "sql": "", "experiments": {}} | ||
) | ||
|
||
@classmethod | ||
def from_dict( | ||
|
@@ -263,7 +333,7 @@ def to_dict(self) -> Mapping[str, Any]: | |
|
||
|
||
@dataclass(frozen=True, kw_only=True) | ||
class SnQLSubscriptionData(SubscriptionData): | ||
class SnQLSubscriptionData(_SubscriptionData[Request]): | ||
""" | ||
Represents the state of a subscription. | ||
""" | ||
|
@@ -379,10 +449,26 @@ def build_request( | |
) | ||
return request | ||
|
||
def run_query( | ||
self, | ||
dataset: Dataset, | ||
request: Request, | ||
timer: Timer, | ||
robust: bool = False, | ||
concurrent_queries_gauge: Optional[Gauge] = None, | ||
) -> QueryResult: | ||
return run_query( | ||
dataset, | ||
request, | ||
timer, | ||
robust=robust, | ||
concurrent_queries_gauge=concurrent_queries_gauge, | ||
) | ||
|
||
@classmethod | ||
def from_dict( | ||
cls, data: Mapping[str, Any], entity_key: EntityKey | ||
) -> SubscriptionData: | ||
) -> SnQLSubscriptionData: | ||
entity: Entity = get_entity(entity_key) | ||
|
||
metadata = {} | ||
|
@@ -407,6 +493,7 @@ def to_dict(self) -> Mapping[str, Any]: | |
"time_window": self.time_window_sec, | ||
"resolution": self.resolution_sec, | ||
"query": self.query, | ||
"subscription_type": SubscriptionType.SNQL.value, | ||
} | ||
|
||
subscription_processors = self.entity.get_subscription_processors() | ||
|
@@ -416,6 +503,10 @@ def to_dict(self) -> Mapping[str, Any]: | |
return subscription_data_dict | ||
|
||
|
||
SubscriptionData = Union[RPCSubscriptionData, SnQLSubscriptionData] | ||
SubscriptionRequest = Union[Request, TimeSeriesRequest] | ||
|
||
|
||
class Subscription(NamedTuple): | ||
identifier: SubscriptionIdentifier | ||
data: SubscriptionData | ||
|
@@ -461,9 +552,9 @@ def find(self, tick: Tick) -> Iterator[ScheduledSubscriptionTask]: | |
|
||
class SubscriptionTaskResultFuture(NamedTuple): | ||
task: ScheduledSubscriptionTask | ||
future: Future[Tuple[Request, Result]] | ||
future: Future[Tuple[SubscriptionRequest, Result]] | ||
|
||
|
||
class SubscriptionTaskResult(NamedTuple): | ||
task: ScheduledSubscriptionTask | ||
result: Tuple[Request, Result] | ||
result: Tuple[SubscriptionRequest, Result] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔪