Skip to content

Commit

Permalink
chore(flags): add more metrics for new flags service (#26447)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmarticus authored Nov 27, 2024
1 parent 2f03369 commit 5da2d74
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 71 deletions.
46 changes: 42 additions & 4 deletions rust/feature-flags/src/cohort/cohort_cache_manager.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use crate::api::errors::FlagError;
use crate::cohort::cohort_models::Cohort;
use crate::flags::flag_matching::{PostgresReader, TeamId};
use crate::metrics::metrics_consts::{
COHORT_CACHE_HIT_COUNTER, COHORT_CACHE_MISS_COUNTER, DB_COHORT_ERRORS_COUNTER,
DB_COHORT_READS_COUNTER,
};
use moka::future::Cache;
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -63,22 +67,56 @@ impl CohortCacheManager {
/// If the cohorts are not present in the cache or have expired, it fetches them from the database,
/// caches the result upon successful retrieval, and then returns it.
pub async fn get_cohorts(&self, team_id: TeamId) -> Result<Vec<Cohort>, FlagError> {
// First check cache before acquiring lock
if let Some(cached_cohorts) = self.cache.get(&team_id).await {
common_metrics::inc(
COHORT_CACHE_HIT_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
return Ok(cached_cohorts.clone());
}

// Acquire the lock before fetching
let _lock = self.fetch_lock.lock().await;

// Double-check the cache after acquiring the lock
// Double-check the cache after acquiring lock
if let Some(cached_cohorts) = self.cache.get(&team_id).await {
common_metrics::inc(
COHORT_CACHE_HIT_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
return Ok(cached_cohorts.clone());
}

let fetched_cohorts = Cohort::list_from_pg(self.reader.clone(), team_id).await?;
self.cache.insert(team_id, fetched_cohorts.clone()).await;
// If we get here, we have a cache miss
common_metrics::inc(
COHORT_CACHE_MISS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

Ok(fetched_cohorts)
// Attempt to fetch from DB
match Cohort::list_from_pg(self.reader.clone(), team_id).await {
Ok(fetched_cohorts) => {
common_metrics::inc(
DB_COHORT_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
self.cache.insert(team_id, fetched_cohorts.clone()).await;
Ok(fetched_cohorts)
}
Err(e) => {
common_metrics::inc(
DB_COHORT_ERRORS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
Err(e)
}
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions rust/feature-flags/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ pub struct Config {
#[envconfig(from = "TEAM_IDS_TO_TRACK", default = "all")]
pub team_ids_to_track: TeamIdsToTrack,

#[envconfig(from = "CACHE_MAX_ENTRIES", default = "100000")]
pub cache_max_entries: u64,
#[envconfig(from = "CACHE_MAX_COHORT_ENTRIES", default = "100000")]
pub cache_max_cohort_entries: u64,

#[envconfig(from = "CACHE_TTL_SECONDS", default = "300")]
pub cache_ttl_seconds: u64,
Expand All @@ -120,7 +120,7 @@ impl Config {
maxmind_db_path: "".to_string(),
enable_metrics: false,
team_ids_to_track: TeamIdsToTrack::All,
cache_max_entries: 100_000,
cache_max_cohort_entries: 100_000,
cache_ttl_seconds: 300,
}
}
Expand Down
103 changes: 90 additions & 13 deletions rust/feature-flags/src/flags/flag_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ use crate::cohort::cohort_cache_manager::CohortCacheManager;
use crate::cohort::cohort_models::{Cohort, CohortId};
use crate::flags::flag_match_reason::FeatureFlagMatchReason;
use crate::flags::flag_models::{FeatureFlag, FeatureFlagList, FlagGroupType};
use crate::metrics::metrics_consts::{FLAG_EVALUATION_ERROR_COUNTER, FLAG_HASH_KEY_WRITES_COUNTER};
use crate::metrics::metrics_consts::{
DB_GROUP_PROPERTIES_READS_COUNTER, DB_PERSON_AND_GROUP_PROPERTIES_READS_COUNTER,
DB_PERSON_PROPERTIES_READS_COUNTER, FLAG_EVALUATION_ERROR_COUNTER,
FLAG_HASH_KEY_WRITES_COUNTER, PROPERTY_CACHE_HITS_COUNTER, PROPERTY_CACHE_MISSES_COUNTER,
};
use crate::metrics::metrics_utils::parse_exception_for_prometheus_label;
use crate::properties::property_matching::match_property;
use crate::properties::property_models::{OperatorType, PropertyFilter};
Expand Down Expand Up @@ -107,11 +111,22 @@ impl GroupTypeMappingCache {
Ok(mapping) if !mapping.is_empty() => mapping,
Ok(_) => {
self.failed_to_fetch_flags = true;
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
return Err(FlagError::NoGroupTypeMappings);
}
Err(e) => {
self.failed_to_fetch_flags = true;
let reason = parse_exception_for_prometheus_label(&e);
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
return Err(e);
}
};
Expand All @@ -135,7 +150,12 @@ impl GroupTypeMappingCache {
self.group_indexes_to_types.clone_from(&result);
Ok(result)
} else {
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
Err(FlagError::NoGroupTypeMappings)
}
}
Expand Down Expand Up @@ -164,7 +184,12 @@ impl GroupTypeMappingCache {
.collect();

if mapping.is_empty() {
// TODO add the `"Failed to fetch group"` type of lable. See posthog/models/feature_flag/flag_matching.py:parse_exception_for_error_message
let reason = "no_group_type_mappings";
inc(
FLAG_EVALUATION_ERROR_COUNTER,
&[("reason".to_string(), reason.to_string())],
1,
);
Err(FlagError::NoGroupTypeMappings)
} else {
Ok(mapping)
Expand Down Expand Up @@ -328,7 +353,6 @@ impl FeatureFlagMatcher {
.await
{
error!("Failed to set feature flag hash key overrides: {:?}", e);
// Increment the counter for failed write
let reason = parse_exception_for_prometheus_label(&e);
inc(
FLAG_EVALUATION_ERROR_COUNTER,
Expand All @@ -340,7 +364,6 @@ impl FeatureFlagMatcher {
writing_hash_key_override = true;
}

// TODO I'm not sure if this is the right place to increment this counter
inc(
FLAG_HASH_KEY_WRITES_COUNTER,
&[
Expand Down Expand Up @@ -443,7 +466,13 @@ impl FeatureFlagMatcher {
)
.await
{
Ok(_) => {}
Ok(_) => {
inc(
DB_PERSON_AND_GROUP_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);
}
Err(e) => {
error_while_computing_flags = true;
// TODO add sentry exception tracking
Expand Down Expand Up @@ -806,7 +835,7 @@ impl FeatureFlagMatcher {
}
}

/// Get group properties from cache or database.
/// Get group properties from overrides, cache or database.
///
/// This function attempts to retrieve group properties either from a cache or directly from the database.
/// It first checks if there are any locally computable property overrides. If so, it returns those.
Expand All @@ -832,9 +861,26 @@ impl FeatureFlagMatcher {
/// and updates the cache accordingly.
async fn get_person_id(&mut self) -> Result<PersonId, FlagError> {
match self.properties_cache.person_id {
Some(id) => Ok(id),
Some(id) => {
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "person_id".to_string())],
1,
);
Ok(id)
}
None => {
inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "person_id".to_string())],
1,
);
let id = self.get_person_id_from_db().await?;
inc(
DB_PERSON_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), self.team_id.to_string())],
1,
);
self.properties_cache.person_id = Some(id);
Ok(id)
}
Expand All @@ -852,7 +898,7 @@ impl FeatureFlagMatcher {
.map(|(_, person_id)| person_id)
}

/// Get person properties from cache or database.
/// Get person properties from overrides, cache or database.
///
/// This function attempts to retrieve person properties either from a cache or directly from the database.
/// It first checks if there are any locally computable property overrides. If so, it returns those.
Expand Down Expand Up @@ -997,16 +1043,33 @@ impl FeatureFlagMatcher {
.group_properties
.get(&group_type_index)
{
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "group_properties".to_string())],
1,
);
let mut result = HashMap::new();
result.clone_from(properties);
return Ok(result);
}

inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "group_properties".to_string())],
1,
);

let reader = self.reader.clone();
let team_id = self.team_id;
let db_properties =
fetch_group_properties_from_db(reader, team_id, group_type_index).await?;

inc(
DB_GROUP_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

// once the properties are fetched, cache them so we don't need to fetch again in a given request
self.properties_cache
.group_properties
Expand All @@ -1025,17 +1088,34 @@ impl FeatureFlagMatcher {
) -> Result<HashMap<String, Value>, FlagError> {
// check if the properties are already cached, if so return them
if let Some(properties) = &self.properties_cache.person_properties {
inc(
PROPERTY_CACHE_HITS_COUNTER,
&[("type".to_string(), "person_properties".to_string())],
1,
);
let mut result = HashMap::new();
result.clone_from(properties);
return Ok(result);
}

inc(
PROPERTY_CACHE_MISSES_COUNTER,
&[("type".to_string(), "person_properties".to_string())],
1,
);

let reader = self.reader.clone();
let distinct_id = self.distinct_id.clone();
let team_id = self.team_id;
let (db_properties, person_id) =
fetch_person_properties_from_db(reader, distinct_id, team_id).await?;

inc(
DB_PERSON_PROPERTIES_READS_COUNTER,
&[("team_id".to_string(), team_id.to_string())],
1,
);

// once the properties and person ID are fetched, cache them so we don't need to fetch again in a given request
self.properties_cache.person_properties = Some(db_properties.clone());
self.properties_cache.person_id = Some(person_id);
Expand Down Expand Up @@ -1555,9 +1635,6 @@ fn locally_computable_property_overrides(
property_filters: &[PropertyFilter],
) -> Option<HashMap<String, Value>> {
property_overrides.as_ref().and_then(|overrides| {
// TODO handle note from Neil: https://github.com/PostHog/posthog/pull/24589#discussion_r1735828561
// TL;DR – we'll need to handle cohort properties at the DB level, i.e. we'll need to adjust the cohort query
// to account for if a given person is an element of the cohort X, Y, Z, etc
let should_prefer_overrides = property_filters
.iter()
.all(|prop| overrides.contains_key(&prop.key) && prop.prop_type != "cohort");
Expand Down
Loading

0 comments on commit 5da2d74

Please sign in to comment.