Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(flags): add more metrics for new flags service #26447

Merged
merged 10 commits into from
Nov 27, 2024
46 changes: 42 additions & 4 deletions rust/feature-flags/src/cohort/cohort_cache_manager.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use crate::api::errors::FlagError;
use crate::cohort::cohort_models::Cohort;
use crate::flags::flag_matching::{PostgresReader, TeamId};
use crate::metrics::metrics_consts::{
COHORT_CACHE_HIT_COUNTER, COHORT_CACHE_MISS_COUNTER, DB_COHORT_ERRORS_COUNTER,
DB_COHORT_READS_COUNTER,
};
use moka::future::Cache;
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -63,22 +67,56 @@ impl CohortCacheManager {
/// If the cohorts are not present in the cache or have expired, it fetches them from the database,
/// caches the result upon successful retrieval, and then returns it.
pub async fn get_cohorts(&self, team_id: TeamId) -> Result<Vec<Cohort>, FlagError> {
// First check cache before acquiring lock
if let Some(cached_cohorts) = self.cache.get(&team_id).await {
common_metrics::inc(
COHORT_CACHE_HIT_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
return Ok(cached_cohorts.clone());
}

// Acquire the lock before fetching
let _lock = self.fetch_lock.lock().await;

// Double-check the cache after acquiring the lock
// Double-check the cache after acquiring lock
if let Some(cached_cohorts) = self.cache.get(&team_id).await {
common_metrics::inc(
COHORT_CACHE_HIT_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
return Ok(cached_cohorts.clone());
}

let fetched_cohorts = Cohort::list_from_pg(self.reader.clone(), team_id).await?;
self.cache.insert(team_id, fetched_cohorts.clone()).await;
// If we get here, we have a cache miss
common_metrics::inc(
COHORT_CACHE_MISS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

Ok(fetched_cohorts)
// Attempt to fetch from DB
match Cohort::list_from_pg(self.reader.clone(), team_id).await {
Ok(fetched_cohorts) => {
common_metrics::inc(
DB_COHORT_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
self.cache.insert(team_id, fetched_cohorts.clone()).await;
Ok(fetched_cohorts)
}
Err(e) => {
common_metrics::inc(
DB_COHORT_ERRORS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
Err(e)
}
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions rust/feature-flags/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ pub struct Config {
#[envconfig(from = "TEAM_IDS_TO_TRACK", default = "all")]
pub team_ids_to_track: TeamIdsToTrack,

#[envconfig(from = "CACHE_MAX_ENTRIES", default = "100000")]
pub cache_max_entries: u64,
#[envconfig(from = "CACHE_MAX_COHORT_ENTRIES", default = "100000")]
pub cache_max_cohort_entries: u64,

#[envconfig(from = "CACHE_TTL_SECONDS", default = "300")]
pub cache_ttl_seconds: u64,
Expand All @@ -120,7 +120,7 @@ impl Config {
maxmind_db_path: "".to_string(),
enable_metrics: false,
team_ids_to_track: TeamIdsToTrack::All,
cache_max_entries: 100_000,
cache_max_cohort_entries: 100_000,
cache_ttl_seconds: 300,
}
}
Expand Down
103 changes: 90 additions & 13 deletions rust/feature-flags/src/flags/flag_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ use crate::cohort::cohort_cache_manager::CohortCacheManager;
use crate::cohort::cohort_models::{Cohort, CohortId};
use crate::flags::flag_match_reason::FeatureFlagMatchReason;
use crate::flags::flag_models::{FeatureFlag, FeatureFlagList, FlagGroupType};
use crate::metrics::metrics_consts::{FLAG_EVALUATION_ERROR_COUNTER, FLAG_HASH_KEY_WRITES_COUNTER};
use crate::metrics::metrics_consts::{
DB_GROUP_PROPERTIES_READS_COUNTER, DB_PERSON_AND_GROUP_PROPERTIES_READS_COUNTER,
DB_PERSON_PROPERTIES_READS_COUNTER, FLAG_EVALUATION_ERROR_COUNTER,
FLAG_HASH_KEY_WRITES_COUNTER, PROPERTY_CACHE_HITS_COUNTER, PROPERTY_CACHE_MISSES_COUNTER,
};
use crate::metrics::metrics_utils::parse_exception_for_prometheus_label;
use crate::properties::property_matching::match_property;
use crate::properties::property_models::{OperatorType, PropertyFilter};
Expand Down Expand Up @@ -107,11 +111,22 @@ impl GroupTypeMappingCache {
Ok(mapping) if !mapping.is_empty() => mapping,
Ok(_) => {
self.failed_to_fetch_flags = true;
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
return Err(FlagError::NoGroupTypeMappings);
}
Err(e) => {
self.failed_to_fetch_flags = true;
let reason = parse_exception_for_prometheus_label(&e);
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
return Err(e);
}
};
Expand All @@ -135,7 +150,12 @@ impl GroupTypeMappingCache {
self.group_indexes_to_types.clone_from(&result);
Ok(result)
} else {
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
Err(FlagError::NoGroupTypeMappings)
}
}
Expand Down Expand Up @@ -164,7 +184,12 @@ impl GroupTypeMappingCache {
.collect();

if mapping.is_empty() {
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
Err(FlagError::NoGroupTypeMappings)
} else {
Ok(mapping)
Expand Down Expand Up @@ -328,7 +353,6 @@ impl FeatureFlagMatcher {
.await
{
error!("Failed to set feature flag hash key overrides: {:?}", e);
// Increment the counter for failed write
let reason = parse_exception_for_prometheus_label(&e);
inc(
FLAG_EVALUATION_ERROR_COUNTER,
Expand All @@ -340,7 +364,6 @@ impl FeatureFlagMatcher {
writing_hash_key_override = true;
}

// TODO I'm not sure if this is the right place to increment this counter
inc(
FLAG_HASH_KEY_WRITES_COUNTER,
&[
Expand Down Expand Up @@ -443,7 +466,13 @@ impl FeatureFlagMatcher {
)
.await
{
Ok(_) => {}
Ok(_) => {
inc(
DB_PERSON_AND_GROUP_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
}
Err(e) => {
error_while_computing_flags = true;
// TODO add sentry exception tracking
Expand Down Expand Up @@ -806,7 +835,7 @@ impl FeatureFlagMatcher {
}
}

/// Get group properties from cache or database.
/// Get group properties from overrides, cache or database.
///
/// This function attempts to retrieve group properties either from a cache or directly from the database.
/// It first checks if there are any locally computable property overrides. If so, it returns those.
Expand All @@ -832,9 +861,26 @@ impl FeatureFlagMatcher {
/// and updates the cache accordingly.
async fn get_person_id(&mut self) -> Result<PersonId, FlagError> {
match self.properties_cache.person_id {
Some(id) => Ok(id),
Some(id) => {
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "person_id".to_string())],
1,
);
Ok(id)
}
None => {
inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "person_id".to_string())],
1,
);
let id = self.get_person_id_from_db().await?;
inc(
DB_PERSON_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), self.team_id.to_string())],
1,
);
self.properties_cache.person_id = Some(id);
Ok(id)
}
Expand All @@ -852,7 +898,7 @@ impl FeatureFlagMatcher {
.map(|(_, person_id)| person_id)
}

/// Get person properties from cache or database.
/// Get person properties from overrides, cache or database.
///
/// This function attempts to retrieve person properties either from a cache or directly from the database.
/// It first checks if there are any locally computable property overrides. If so, it returns those.
Expand Down Expand Up @@ -997,16 +1043,33 @@ impl FeatureFlagMatcher {
.group_properties
.get(&group_type_index)
{
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "group_properties".to_string())],
1,
);
let mut result = HashMap::new();
result.clone_from(properties);
return Ok(result);
}

inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "group_properties".to_string())],
1,
);

let reader = self.reader.clone();
let team_id = self.team_id;
let db_properties =
fetch_group_properties_from_db(reader, team_id, group_type_index).await?;

inc(
DB_GROUP_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

// once the properties are fetched, cache them so we don't need to fetch again in a given request
self.properties_cache
.group_properties
Expand All @@ -1025,17 +1088,34 @@ impl FeatureFlagMatcher {
) -> Result<HashMap<String, Value>, FlagError> {
// check if the properties are already cached, if so return them
if let Some(properties) = &self.properties_cache.person_properties {
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "person_properties".to_string())],
1,
);
let mut result = HashMap::new();
result.clone_from(properties);
return Ok(result);
}

inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "person_properties".to_string())],
1,
);

let reader = self.reader.clone();
let distinct_id = self.distinct_id.clone();
let team_id = self.team_id;
let (db_properties, person_id) =
fetch_person_properties_from_db(reader, distinct_id, team_id).await?;

inc(
DB_PERSON_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

// once the properties and person ID are fetched, cache them so we don't need to fetch again in a given request
self.properties_cache.person_properties = Some(db_properties.clone());
self.properties_cache.person_id = Some(person_id);
Expand Down Expand Up @@ -1555,9 +1635,6 @@ fn locally_computable_property_overrides(
property_filters: &[PropertyFilter],
) -> Option<HashMap<String, Value>> {
property_overrides.as_ref().and_then(|overrides| {
// TODO handle note from Neil: https://github.com/PostHog/posthog/pull/24589#discussion_r1735828561
// TL;DR – we'll need to handle cohort properties at the DB level, i.e. we'll need to adjust the cohort query
// to account for if a given person is an element of the cohort X, Y, Z, etc
let should_prefer_overrides = property_filters
.iter()
.all(|prop| overrides.contains_key(&prop.key) && prop.prop_type != "cohort");
Expand Down
Loading