diff --git a/src/errors.rs b/src/errors.rs index 610898f..8805778 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -7,3 +7,15 @@ use anyhow::Error; pub(crate) fn display_causes_and_backtrace(err: &Error) { eprintln!("{:?}", err); } + +/// Given a [`hyper::Error`], return a human-readable description. +/// +/// This description _should_ be "low-arity", i.e., limited to only a handful of +/// values. We use it when reporting error metrics. +pub(crate) fn hyper_error_description_for_metrics(err: &hyper::Error) -> String { + // HACK: We know that the current version of `hyper` puts a low-arity + // description before ":", but follows it up with variable text. + let display_err = err.to_string(); + let cut_at = display_err.find(':').unwrap_or_else(|| display_err.len()); + display_err[..cut_at].to_owned() +} diff --git a/src/geocoders/smarty/client.rs b/src/geocoders/smarty/client.rs index 08a4437..eb62402 100644 --- a/src/geocoders/smarty/client.rs +++ b/src/geocoders/smarty/client.rs @@ -6,12 +6,13 @@ use std::{env, str}; use anyhow::{format_err, Context}; use futures::stream::StreamExt; use hyper::{Body, Request}; -use metrics::{describe_histogram, histogram, Unit}; +use metrics::{counter, describe_histogram, histogram, Unit}; use serde::{Deserialize, Serialize}; use tracing::instrument; use url::Url; use crate::addresses::Address; +use crate::errors::hyper_error_description_for_metrics; use crate::geocoders::{MatchStrategy, SharedHttpClient}; use crate::unpack_vec::unpack_vec; use crate::Result; @@ -130,7 +131,16 @@ async fn street_addresses_impl( .uri(url.as_str()) .header("Content-Type", "application/json; charset=utf-8") .body(Body::from(serde_json::to_string(&requests)?))?; - let res = client.request(req).await?; + let res = match client.request(req).await { + Ok(res) => res, + Err(err) => { + // Errors that occur here are being reported by our local HTTP + // stack, not the remote server. + let desc = hyper_error_description_for_metrics(&err); + counter!("geocodecsv.selected_errors.count", 1, "component" => "smarty", "cause" => desc); + return Err(err.into()); + } + }; let status = res.status(); let mut body = res.into_body(); let mut body_data = vec![]; @@ -149,6 +159,8 @@ async fn street_addresses_impl( let resps: Vec = serde_json::from_slice(&body_data)?; Ok(unpack_vec(resps, requests.len(), |resp| resp.input_index)?) } else { + // This error was reported by the remote server. + counter!("geocodecsv.selected_errors.count", 1, "component" => "smarty", "cause" => status.to_string()); Err(format_err!( "geocoding error: {}\n{}", status, diff --git a/src/key_value_stores/bigtable.rs b/src/key_value_stores/bigtable.rs index 7669ec7..c691d14 100644 --- a/src/key_value_stores/bigtable.rs +++ b/src/key_value_stores/bigtable.rs @@ -1,6 +1,7 @@ //! Support for using BigTable as a key/value store. use std::{ + borrow::Cow, collections::HashMap, time::{Duration, Instant}, }; @@ -8,7 +9,7 @@ use std::{ use anyhow::{format_err, Context}; use async_trait::async_trait; use bigtable_rs::{ - bigtable::{BigTable as BigTableClient, BigTableConnection}, + bigtable::{self, BigTable as BigTableClient, BigTableConnection}, google::bigtable::v2::{ mutate_rows_request::Entry, mutation::{self, SetCell}, @@ -16,7 +17,7 @@ use bigtable_rs::{ MutateRowsRequest, Mutation, ReadRowsRequest, RowFilter, RowSet, }, }; -use metrics::{describe_histogram, histogram, Unit}; +use metrics::{counter, describe_histogram, histogram, Unit}; use tracing::{instrument, trace}; use url::Url; @@ -205,7 +206,14 @@ impl<'store> PipelinedGet<'store> for BigTablePipelinedGet<'store> { ..ReadRowsRequest::default() }; trace!("bigtable request: {:?}", request); - let response = client.read_rows(request).await?; + let response = match client.read_rows(request).await { + Ok(response) => response, + Err(err) => { + let cause = bigtable_error_cause_for_metrics(&err); + counter!("geocodecsv.selected_errors.count", 1, "component" => "bigtable", "cause" => cause); + return Err(err).context("error checking BigTable for cached values"); + } + }; histogram!( "geocodecsv.bigtable.get_request.duration_seconds", @@ -300,10 +308,11 @@ impl<'store> PipelinedSet<'store> for BigTablePipelinedSet<'store> { app_profile_id: "".to_owned(), // Dave says this is what we want. entries: self.entries.clone(), }; - client - .mutate_rows(request) - .await - .context("error writing cached values to BigTable")?; + if let Err(err) = client.mutate_rows(request).await { + let cause = bigtable_error_cause_for_metrics(&err); + counter!("geocodecsv.selected_errors.count", 1, "component" => "bigtable", "cause" => cause); + return Err(err).context("error writing cached values to BigTable"); + } histogram!( "geocodecsv.bigtable.set_request.duration_seconds", @@ -312,3 +321,24 @@ impl<'store> PipelinedSet<'store> for BigTablePipelinedSet<'store> { Ok(()) } } + +/// Convert a BigTable error to a "low-arity" string describing what went wrong. +/// +/// For reporting with metrics. This does not replace detailed logs, but it +/// should give some quick ideas about what to look for. +fn bigtable_error_cause_for_metrics(err: &bigtable::Error) -> Cow<'static, str> { + match err { + bigtable::Error::AccessTokenError(_) => Cow::Borrowed("access token"), + bigtable::Error::CertificateError(_) => Cow::Borrowed("certificate"), + // `std:io::ErrorKind` is low-arity, and Rust can at least turn it into + // a `Debug` string. + bigtable::Error::IoError(err) => Cow::Owned(format!("{:?}", err.kind())), + bigtable::Error::TransportError(_) => Cow::Borrowed("transport"), + bigtable::Error::RowNotFound => Cow::Borrowed("row not found"), + bigtable::Error::RowWriteFailed => Cow::Borrowed("row write failed"), + bigtable::Error::ObjectNotFound(_) => Cow::Borrowed("object not found"), + bigtable::Error::ObjectCorrupt(_) => Cow::Borrowed("object corrupt"), + bigtable::Error::RpcError(_) => Cow::Borrowed("rpc"), + bigtable::Error::TimeoutError(_) => Cow::Borrowed("timeout"), + } +} diff --git a/src/main.rs b/src/main.rs index 66cad90..0f2614b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ use geocoders::normalizer::Normalizer; use geocoders::smarty::Smarty; use geocoders::{shared_http_client, Geocoder}; use key_value_stores::KeyValueStore; +use metrics::describe_counter; use opinionated_metrics::Mode; use std::path::PathBuf; use std::str::FromStr; @@ -158,6 +159,13 @@ async fn main() -> Result<()> { } let metrics_handle = metrics_builder.install()?; + // Describe our global metrics. Other metrics are described in the modules + // that use them. + describe_counter!( + "geocodecsv.selected_errors.count", + "Particularly interesting errors, by component and cause" + ); + // Choose our main geocoding client. let mut geocoder: Box = match opt.geocoder { GeocoderName::Smarty => Box::new(Smarty::new(