diff --git a/Cargo.lock b/Cargo.lock index 605afcc3f..3242c114a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6633,6 +6633,7 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry-semantic-conventions", "opentelemetry_sdk", + "terminal", "tracing", "tracing-appender", "tracing-opentelemetry", @@ -6797,6 +6798,7 @@ dependencies = [ "spin-common", "spin-core", "spin-expressions", + "spin-telemetry", "spin-testing", "spin-trigger", "spin-world", diff --git a/crates/telemetry/Cargo.toml b/crates/telemetry/Cargo.toml index 659afd779..25a46692c 100644 --- a/crates/telemetry/Cargo.toml +++ b/crates/telemetry/Cargo.toml @@ -17,3 +17,4 @@ tracing-appender = "0.2.2" tracing-opentelemetry = "0.23.0" tracing-subscriber = { version = "0.3.17", features = ["env-filter", "json", "registry"] } url = "2.2.2" +terminal = { path = "../terminal" } \ No newline at end of file diff --git a/crates/telemetry/src/env.rs b/crates/telemetry/src/env.rs index e455209fc..d5d4b1b8f 100644 --- a/crates/telemetry/src/env.rs +++ b/crates/telemetry/src/env.rs @@ -1,18 +1,44 @@ -/// Returns a boolean indicating if the OTEL layer should be enabled. +use std::env::VarError; + +use opentelemetry_otlp::{ + OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, OTEL_EXPORTER_OTLP_PROTOCOL, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, +}; + +const OTEL_SDK_DISABLED: &str = "OTEL_SDK_DISABLED"; +const OTEL_EXPORTER_OTLP_TRACES_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"; +const OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"; + +/// Returns a boolean indicating if the OTEL tracing layer should be enabled. /// /// It is considered enabled if any of the following environment variables are set and not empty: /// - `OTEL_EXPORTER_OTLP_ENDPOINT` /// - `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` +/// +/// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty. +pub(crate) fn otel_tracing_enabled() -> bool { + any_vars_set(&[ + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, + ]) && !otel_sdk_disabled() +} + +/// Returns a boolean indicating if the OTEL metrics layer should be enabled. +/// +/// It is considered enabled if any of the following environment variables are set and not empty: +/// - `OTEL_EXPORTER_OTLP_ENDPOINT` /// - `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` /// /// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty. -pub(crate) fn otel_enabled() -> bool { - const ENABLING_VARS: &[&str] = &[ - "OTEL_EXPORTER_OTLP_ENDPOINT", - "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", - "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", - ]; - ENABLING_VARS +pub(crate) fn otel_metrics_enabled() -> bool { + any_vars_set(&[ + OTEL_EXPORTER_OTLP_ENDPOINT, + OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, + ]) && !otel_sdk_disabled() +} + +fn any_vars_set(enabling_vars: &[&str]) -> bool { + enabling_vars .iter() .any(|key| std::env::var_os(key).is_some_and(|val| !val.is_empty())) } @@ -21,7 +47,7 @@ pub(crate) fn otel_enabled() -> bool { /// /// It is considered disabled if the environment variable `OTEL_SDK_DISABLED` is set and not empty. pub(crate) fn otel_sdk_disabled() -> bool { - std::env::var_os("OTEL_SDK_DISABLED").is_some_and(|val| !val.is_empty()) + std::env::var_os(OTEL_SDK_DISABLED).is_some_and(|val| !val.is_empty()) } /// The protocol to use for OTLP exporter. @@ -34,15 +60,41 @@ pub(crate) enum OtlpProtocol { impl OtlpProtocol { /// Returns the protocol to be used for exporting traces as defined by the environment. pub(crate) fn traces_protocol_from_env() -> Self { - let trace_protocol = std::env::var("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"); - let general_protocol = std::env::var("OTEL_EXPORTER_OTLP_PROTOCOL"); - let protocol = trace_protocol.unwrap_or(general_protocol.unwrap_or_default()); + Self::protocol_from_env( + std::env::var(OTEL_EXPORTER_OTLP_TRACES_PROTOCOL), + std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL), + ) + } + + /// Returns the protocol to be used for exporting metrics as defined by the environment. + pub(crate) fn metrics_protocol_from_env() -> Self { + Self::protocol_from_env( + std::env::var(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL), + std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL), + ) + } + + fn protocol_from_env( + specific_protocol: Result, + general_protocol: Result, + ) -> Self { + let protocol = + specific_protocol.unwrap_or(general_protocol.unwrap_or("http/protobuf".to_string())); + + static WARN_ONCE: std::sync::Once = std::sync::Once::new(); match protocol.as_str() { "grpc" => Self::Grpc, "http/protobuf" => Self::HttpProtobuf, "http/json" => Self::HttpJson, - _ => Self::HttpProtobuf, + s => { + WARN_ONCE.call_once(|| { + terminal::warn!( + "'{s}' is not a valid OTLP protocol, defaulting to http/protobuf" + ); + }); + Self::HttpProtobuf + } } } } diff --git a/crates/telemetry/src/lib.rs b/crates/telemetry/src/lib.rs index 207216582..92f2d56f3 100644 --- a/crates/telemetry/src/lib.rs +++ b/crates/telemetry/src/lib.rs @@ -1,12 +1,13 @@ use std::io::IsTerminal; -use env::otel_enabled; -use env::otel_sdk_disabled; +use env::otel_metrics_enabled; +use env::otel_tracing_enabled; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing_subscriber::{fmt, prelude::*, registry, EnvFilter, Layer}; pub mod detector; mod env; +pub mod metrics; mod propagation; mod traces; @@ -16,9 +17,34 @@ pub use propagation::inject_trace_context; /// Initializes telemetry for Spin using the [tracing] library. /// /// Under the hood this involves initializing a [tracing::Subscriber] with multiple [Layer]s. One -/// [Layer] emits [tracing] events to stderr, and another sends spans to an OTEL collector. +/// [Layer] emits [tracing] events to stderr, another sends spans to an OTel collector, and another +/// sends metrics to an OTel collector. /// -/// Configuration is pulled from the environment. +/// Configuration for the OTel layers is pulled from the environment. +/// +/// Examples of emitting traces from Spin: +/// +/// ```no_run +/// # use tracing::instrument; +/// # use tracing::Level; +/// #[instrument(name = "span_name", err(level = Level::INFO), fields(otel.name = "dynamically set name"))] +/// fn func_you_want_to_trace() -> anyhow::Result { +/// Ok("Hello, world!".to_string()) +/// } +/// ``` +/// +/// Some notes on tracing: +/// +/// - If you don't want the span to be collected by default emit it at a trace or debug level. +/// - Make sure you `.in_current_span()` any spawned tasks so the span context is propagated. +/// - Use the otel.name attribute to dynamically set the span name. +/// - Use the err argument to have instrument automatically handle errors. +/// +/// Examples of emitting metrics from Spin: +/// +/// ```no_run +/// spin_telemetry::metrics::monotonic_counter!(spin.metric_name = 1, metric_attribute = "value"); +/// ``` pub fn init(spin_version: String) -> anyhow::Result { // This layer will print all tracing library log messages to stderr. let fmt_layer = fmt::layer() @@ -30,19 +56,27 @@ pub fn init(spin_version: String) -> anyhow::Result { .add_directive("watchexec=off".parse()?), ); - // We only want to build the otel layer if the user passed some endpoint configuration and it wasn't explicitly disabled. - let build_otel_layer = !otel_sdk_disabled() && otel_enabled(); - let otel_layer = if build_otel_layer { - // In this case we want to set the error handler to log errors to the tracing layer. - opentelemetry::global::set_error_handler(otel_error_handler)?; + // Even if metrics or tracing aren't enabled we're okay to turn on the global error handler + opentelemetry::global::set_error_handler(otel_error_handler)?; + + let otel_tracing_layer = if otel_tracing_enabled() { + Some(traces::otel_tracing_layer(spin_version.clone())?) + } else { + None + }; - Some(traces::otel_tracing_layer(spin_version)?) + let otel_metrics_layer = if otel_metrics_enabled() { + Some(metrics::otel_metrics_layer(spin_version)?) } else { None }; // Build a registry subscriber with the layers we want to use. - registry().with(otel_layer).with(fmt_layer).init(); + registry() + .with(otel_tracing_layer) + .with(otel_metrics_layer) + .with(fmt_layer) + .init(); // Used to propagate trace information in the standard W3C TraceContext format. Even if the otel // layer is disabled we still want to propagate trace context. diff --git a/crates/telemetry/src/metrics.rs b/crates/telemetry/src/metrics.rs new file mode 100644 index 000000000..42de6a053 --- /dev/null +++ b/crates/telemetry/src/metrics.rs @@ -0,0 +1,121 @@ +use std::time::Duration; + +use anyhow::{bail, Result}; +use opentelemetry_otlp::MetricsExporterBuilder; +use opentelemetry_sdk::{ + metrics::{ + reader::{DefaultAggregationSelector, DefaultTemporalitySelector}, + PeriodicReader, SdkMeterProvider, + }, + resource::{EnvResourceDetector, TelemetryResourceDetector}, + runtime, Resource, +}; +use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer}; +use tracing_subscriber::{filter::Filtered, layer::Layered, EnvFilter, Registry}; + +use crate::{detector::SpinResourceDetector, env::OtlpProtocol}; + +/// Constructs a layer for the tracing subscriber that sends metrics to an OTEL collector. +/// +/// It pulls OTEL configuration from the environment based on the variables defined +/// [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/) and +/// [here](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration). +pub(crate) fn otel_metrics_layer(spin_version: String) -> Result { + let resource = Resource::from_detectors( + Duration::from_secs(5), + vec![ + // Set service.name from env OTEL_SERVICE_NAME > env OTEL_RESOURCE_ATTRIBUTES > spin + // Set service.version from Spin metadata + Box::new(SpinResourceDetector::new(spin_version)), + // Sets fields from env OTEL_RESOURCE_ATTRIBUTES + Box::new(EnvResourceDetector::new()), + // Sets telemetry.sdk{name, language, version} + Box::new(TelemetryResourceDetector), + ], + ); + + // This will configure the exporter based on the OTEL_EXPORTER_* environment variables. We + // currently default to using the HTTP exporter but in the future we could select off of the + // combination of OTEL_EXPORTER_OTLP_PROTOCOL and OTEL_EXPORTER_OTLP_TRACES_PROTOCOL to + // determine whether we should use http/protobuf or grpc. + let exporter_builder: MetricsExporterBuilder = match OtlpProtocol::metrics_protocol_from_env() { + OtlpProtocol::Grpc => opentelemetry_otlp::new_exporter().tonic().into(), + OtlpProtocol::HttpProtobuf => opentelemetry_otlp::new_exporter().http().into(), + OtlpProtocol::HttpJson => bail!("http/json OTLP protocol is not supported"), + }; + let exporter = exporter_builder.build_metrics_exporter( + Box::new(DefaultTemporalitySelector::new()), + Box::new(DefaultAggregationSelector::new()), + )?; + + let reader = PeriodicReader::builder(exporter, runtime::Tokio).build(); + let meter_provider = SdkMeterProvider::builder() + .with_reader(reader) + .with_resource(resource) + .build(); + + Ok(MetricsLayer::new(meter_provider)) +} + +#[macro_export] +/// Records an increment to the named counter with the given attributes. +/// +/// The increment may only be an i64 or f64. You must not mix types for the same metric. +/// +/// ```no_run +/// # use spin_telemetry::metrics::counter; +/// counter!(spin.metric_name = 1, metric_attribute = "value"); +/// ``` +macro_rules! counter { + ($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => { + tracing::trace!(counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*); + } +} + +#[macro_export] +/// Adds an additional value to the distribution of the named histogram with the given attributes. +/// +/// The increment may only be an i64 or f64. You must not mix types for the same metric. +/// +/// ```no_run +/// # use spin_telemetry::metrics::histogram; +/// histogram!(spin.metric_name = 1.5, metric_attribute = "value"); +/// ``` +macro_rules! histogram { + ($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => { + tracing::trace!(histogram.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*); + } +} + +#[macro_export] +/// Records an increment to the named monotonic counter with the given attributes. +/// +/// The increment may only be a positive i64 or f64. You must not mix types for the same metric. +/// +/// ```no_run +/// # use spin_telemetry::metrics::monotonic_counter; +/// monotonic_counter!(spin.metric_name = 1, metric_attribute = "value"); +/// ``` +macro_rules! monotonic_counter { + ($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => { + tracing::trace!(monotonic_counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*); + } +} + +pub use counter; +pub use histogram; +pub use monotonic_counter; + +/// This really large type alias is require to make the registry.with() pattern happy. +type CustomMetricsLayer = MetricsLayer< + Layered< + Option< + Filtered< + OpenTelemetryLayer, + EnvFilter, + Registry, + >, + >, + Registry, + >, +>; diff --git a/crates/telemetry/src/traces.rs b/crates/telemetry/src/traces.rs index 80d6e0c08..2112fb096 100644 --- a/crates/telemetry/src/traces.rs +++ b/crates/telemetry/src/traces.rs @@ -1,8 +1,7 @@ use std::time::Duration; use anyhow::bail; -use opentelemetry_otlp::{SpanExporterBuilder, WithExportConfig}; -use opentelemetry_otlp::{OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}; +use opentelemetry_otlp::SpanExporterBuilder; use opentelemetry_sdk::{ resource::{EnvResourceDetector, TelemetryResourceDetector}, trace::Tracer, @@ -41,18 +40,11 @@ pub(crate) fn otel_tracing_layer( // currently default to using the HTTP exporter but in the future we could select off of the // combination of OTEL_EXPORTER_OTLP_PROTOCOL and OTEL_EXPORTER_OTLP_TRACES_PROTOCOL to // determine whether we should use http/protobuf or grpc. - let mut exporter: SpanExporterBuilder = match OtlpProtocol::traces_protocol_from_env() { + let exporter: SpanExporterBuilder = match OtlpProtocol::traces_protocol_from_env() { OtlpProtocol::Grpc => opentelemetry_otlp::new_exporter().tonic().into(), OtlpProtocol::HttpProtobuf => opentelemetry_otlp::new_exporter().http().into(), OtlpProtocol::HttpJson => bail!("http/json OTLP protocol is not supported"), }; - if let Some(endpoint) = fix_endpoint_bug() { - match exporter { - SpanExporterBuilder::Tonic(inner) => exporter = inner.with_endpoint(endpoint).into(), - SpanExporterBuilder::Http(inner) => exporter = inner.with_endpoint(endpoint).into(), - _ => {} - } - } let tracer = opentelemetry_otlp::new_pipeline() .tracing() @@ -71,36 +63,3 @@ pub(crate) fn otel_tracing_layer( .with_threads(false) .with_filter(env_filter)) } - -// This mitigation was taken from https://github.com/neondatabase/neon/blob/main/libs/tracing-utils/src/lib.rs -// -// opentelemetry-otlp v0.15.0 has a bug in how it uses the -// OTEL_EXPORTER_OTLP_ENDPOINT env variable. According to the -// OpenTelemetry spec at -// , -// the full exporter URL is formed by appending "/v1/traces" to the value -// of OTEL_EXPORTER_OTLP_ENDPOINT. However, opentelemetry-otlp only does -// that with the grpc-tonic exporter. Other exporters, like the HTTP -// exporter, use the URL from OTEL_EXPORTER_OTLP_ENDPOINT as is, without -// appending "/v1/traces". -// -// See https://github.com/open-telemetry/opentelemetry-rust/pull/950 -// -// Work around that by checking OTEL_EXPORTER_OTLP_ENDPOINT, and setting -// the endpoint url with the "/v1/traces" path ourselves. If the bug is -// fixed in a later version, we can remove this code. But if we don't -// remember to remove this, it won't do any harm either, as the crate will -// just ignore the OTEL_EXPORTER_OTLP_ENDPOINT setting when the endpoint -// is set directly with `with_endpoint`. -fn fix_endpoint_bug() -> Option { - if std::env::var(OTEL_EXPORTER_OTLP_TRACES_ENDPOINT).is_err() { - if let Ok(mut endpoint) = std::env::var(OTEL_EXPORTER_OTLP_ENDPOINT) { - if !endpoint.ends_with('/') { - endpoint.push('/'); - } - endpoint.push_str("v1/traces"); - return Some(endpoint); - } - } - None -} diff --git a/crates/trigger-http/src/lib.rs b/crates/trigger-http/src/lib.rs index 4d735507f..7720a8d23 100644 --- a/crates/trigger-http/src/lib.rs +++ b/crates/trigger-http/src/lib.rs @@ -257,6 +257,13 @@ impl HttpTrigger { // Route to app component match self.router.route(&path) { Ok(component_id) => { + spin_telemetry::metrics::monotonic_counter!( + spin.request_count = 1, + trigger_type = "http", + app_id = &self.engine.app_name, + component_id = component_id + ); + let trigger = self.component_trigger_configs.get(component_id).unwrap(); let executor = trigger.executor.as_ref().unwrap_or(&HttpExecutorType::Http); diff --git a/crates/trigger-redis/Cargo.toml b/crates/trigger-redis/Cargo.toml index 49e74b2ac..70a90a61d 100644 --- a/crates/trigger-redis/Cargo.toml +++ b/crates/trigger-redis/Cargo.toml @@ -21,6 +21,7 @@ spin-world = { path = "../world" } redis = { version = "0.21", features = ["tokio-comp"] } tracing = { workspace = true } tokio = { version = "1.23", features = ["full"] } +spin-telemetry = { path = "../telemetry" } [dev-dependencies] spin-testing = { path = "../testing" } diff --git a/crates/trigger-redis/src/spin.rs b/crates/trigger-redis/src/spin.rs index f4d8f837d..290265a21 100644 --- a/crates/trigger-redis/src/spin.rs +++ b/crates/trigger-redis/src/spin.rs @@ -22,6 +22,13 @@ impl RedisExecutor for SpinRedisExecutor { ) -> Result<()> { tracing::trace!("Executing request using the Spin executor for component {component_id}"); + spin_telemetry::metrics::monotonic_counter!( + spin.request_count = 1, + trigger_type = "redis", + app_id = engine.app_name, + component_id = component_id + ); + let (instance, store) = engine.prepare_instance(component_id).await?; match Self::execute_impl(store, instance, channel, payload.to_vec()).await { diff --git a/examples/spin-timer/Cargo.lock b/examples/spin-timer/Cargo.lock index cc89f9ec6..e3a2c2b8c 100644 --- a/examples/spin-timer/Cargo.lock +++ b/examples/spin-timer/Cargo.lock @@ -4271,6 +4271,7 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry-semantic-conventions", "opentelemetry_sdk", + "terminal", "tracing", "tracing-appender", "tracing-opentelemetry",