Skip to content

Commit

Permalink
feat: Add new histogram metric type (#386)
Browse files Browse the repository at this point in the history
  • Loading branch information
yinggeh authored and mc-nv committed Aug 19, 2024
1 parent 9ed1544 commit dffc026
Show file tree
Hide file tree
Showing 7 changed files with 502 additions and 14 deletions.
78 changes: 75 additions & 3 deletions include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct TRITONSERVER_Server;
struct TRITONSERVER_ServerOptions;
struct TRITONSERVER_Metric;
struct TRITONSERVER_MetricFamily;
struct TRITONSERVER_MetricArgs;

///
/// TRITONSERVER API Version
Expand Down Expand Up @@ -91,7 +92,7 @@ struct TRITONSERVER_MetricFamily;
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 33
#define TRITONSERVER_API_VERSION_MINOR 34

/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
Expand Down Expand Up @@ -2615,7 +2616,8 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
///
typedef enum TRITONSERVER_metrickind_enum {
TRITONSERVER_METRIC_KIND_COUNTER,
TRITONSERVER_METRIC_KIND_GAUGE
TRITONSERVER_METRIC_KIND_GAUGE,
TRITONSERVER_METRIC_KIND_HISTOGRAM
} TRITONSERVER_MetricKind;

/// Create a new metric family object. The caller takes ownership of the
Expand Down Expand Up @@ -2644,6 +2646,44 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);

/// Get the TRITONSERVER_MetricKind of the metric family.
///
/// \param family The metric family object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_GetMetricFamilyKind(
struct TRITONSERVER_MetricFamily* family, TRITONSERVER_MetricKind* kind);

/// Create a new metric args object. The caller takes ownership of the
/// TRITONSERVER_MetricArgs object and must call TRITONSERVER_MetricArgsDelete
/// to release the object.
///
/// \param args Returns the new metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsNew(
struct TRITONSERVER_MetricArgs** args);

/// Set metric args with histogram metric parameter.
///
/// \param args The metric args object to set.
/// \param buckets The array of bucket boundaries for the expected range of
/// observed values.
///
/// \param buckets_count The number of bucket boundaries.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricArgsSetHistogram(
struct TRITONSERVER_MetricArgs* args, const double* buckets,
const uint64_t buckets_count);

/// Delete a metric args object.
///
/// \param args The metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsDelete(
struct TRITONSERVER_MetricArgs* args);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
Expand All @@ -2661,6 +2701,28 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew(
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
/// responsible for ownership of the labels passed in.
/// Each label can be deleted immediately after creating the metric with
/// TRITONSERVER_ParameterDelete if not re-using the labels.
/// Metric args can be deleted immediately after creating the metric with
/// TRITONSERVER_MetricArgsDelete if not re-using the metric args.
///
/// \param metric Returns the new metric object.
/// \param family The metric family to add this new metric to.
/// \param labels The array of labels to associate with this new metric.
/// \param label_count The number of labels.
/// \param args Metric args that store additional arguments to construct
/// particular metric types, e.g. histogram.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNewWithArgs(
struct TRITONSERVER_Metric** metric,
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count,
const struct TRITONSERVER_MetricArgs* args);

/// Delete a metric object.
/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
Expand Down Expand Up @@ -2705,7 +2767,17 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet(
struct TRITONSERVER_Metric* metric, double value);

/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
/// Sample an observation and count it to the appropriate bucket of a metric.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_HISTOGRAM and returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
///
/// \param metric The metric object to update.
/// \param value The amount for metric to sample observation.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricObserve(
struct TRITONSERVER_Metric* metric, double value);

/// Get the TRITONSERVER_MetricKind of metric of its corresponding family.
///
/// \param metric The metric object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
Expand Down
98 changes: 94 additions & 4 deletions src/metric_family.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
// reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -54,6 +55,12 @@ MetricFamily::MetricFamily(
.Help(description)
.Register(*registry));
break;
case TRITONSERVER_METRIC_KIND_HISTOGRAM:
family_ = reinterpret_cast<void*>(&prometheus::BuildHistogram()
.Name(name)
.Help(description)
.Register(*registry));
break;
default:
throw std::invalid_argument(
"Unsupported kind passed to MetricFamily constructor.");
Expand All @@ -63,24 +70,49 @@ MetricFamily::MetricFamily(
}

void*
MetricFamily::Add(std::map<std::string, std::string> label_map, Metric* metric)
MetricFamily::Add(
std::map<std::string, std::string> label_map, Metric* metric,
const TritonServerMetricArgs* args)
{
void* prom_metric = nullptr;
switch (kind_) {
case TRITONSERVER_METRIC_KIND_COUNTER: {
if (args != nullptr) {
throw std::invalid_argument(
"Unexpected args found in counter Metric constructor.");
}
auto counter_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
auto counter_ptr = &counter_family_ptr->Add(label_map);
prom_metric = reinterpret_cast<void*>(counter_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_GAUGE: {
if (args != nullptr) {
throw std::invalid_argument(
"Unexpected args found in gauge Metric constructor.");
}
auto gauge_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
auto gauge_ptr = &gauge_family_ptr->Add(label_map);
prom_metric = reinterpret_cast<void*>(gauge_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
if (args == nullptr) {
throw std::invalid_argument(
"Bucket boundaries not found in Metric args.");
}
if (args->kind() != TRITONSERVER_METRIC_KIND_HISTOGRAM) {
throw std::invalid_argument("Metric args not set to histogram kind.");
}
auto histogram_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
auto histogram_ptr =
&histogram_family_ptr->Add(label_map, args->buckets());
prom_metric = reinterpret_cast<void*>(histogram_ptr);
break;
}
default:
throw std::invalid_argument(
"Unsupported family kind passed to Metric constructor.");
Expand Down Expand Up @@ -134,6 +166,14 @@ MetricFamily::Remove(void* prom_metric, Metric* metric)
gauge_family_ptr->Remove(gauge_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
auto histogram_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
auto histogram_ptr =
reinterpret_cast<prometheus::Histogram*>(prom_metric);
histogram_family_ptr->Remove(histogram_ptr);
break;
}
default:
// Invalid kind should be caught in constructor
LOG_ERROR << "Unsupported kind in Metric destructor.";
Expand Down Expand Up @@ -169,7 +209,8 @@ MetricFamily::~MetricFamily()
//
Metric::Metric(
TRITONSERVER_MetricFamily* family,
std::vector<const InferenceParameter*> labels)
std::vector<const InferenceParameter*> labels,
const TritonServerMetricArgs* args)
{
family_ = reinterpret_cast<MetricFamily*>(family);
kind_ = family_->Kind();
Expand All @@ -188,7 +229,7 @@ Metric::Metric(
std::string(reinterpret_cast<const char*>(param->ValuePointer()));
}

metric_ = family_->Add(label_map, this);
metric_ = family_->Add(label_map, this, args);
}

Metric::~Metric()
Expand Down Expand Up @@ -235,6 +276,11 @@ Metric::Value(double* value)
*value = gauge_ptr->Value();
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Value");
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down Expand Up @@ -279,6 +325,11 @@ Metric::Increment(double value)
}
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Increment");
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down Expand Up @@ -308,6 +359,45 @@ Metric::Set(double value)
gauge_ptr->Set(value);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Set");
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"Unsupported TRITONSERVER_MetricKind");
}

return nullptr; // Success
}

TRITONSERVER_Error*
Metric::Observe(double value)
{
if (metric_ == nullptr) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
"Could not set metric value. Metric has been invalidated.");
}

switch (kind_) {
case TRITONSERVER_METRIC_KIND_COUNTER: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_COUNTER does not support Observe");
}
case TRITONSERVER_METRIC_KIND_GAUGE: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_GAUGE does not support Observe");
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
auto histogram_ptr = reinterpret_cast<prometheus::Histogram*>(metric_);
histogram_ptr->Observe(value);
break;
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down
35 changes: 32 additions & 3 deletions src/metric_family.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
// reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -27,6 +28,7 @@

#ifdef TRITON_ENABLE_METRICS

#include <cstring>
#include <mutex>
#include <set>
#include <unordered_map>
Expand All @@ -37,6 +39,29 @@

namespace triton { namespace core {

//
// TritonServerMetricArgs
//
// Implementation for TRITONSERVER_MetricArgs.
//
class TritonServerMetricArgs {
public:
TritonServerMetricArgs() = default;

void* SetHistogramArgs(const double* buckets, uint64_t bucket_count)
{
kind_ = TRITONSERVER_METRIC_KIND_HISTOGRAM;
buckets_ = std::vector<double>(buckets, buckets + bucket_count);
return nullptr;
}
TRITONSERVER_MetricKind kind() const { return kind_; }
const std::vector<double>& buckets() const { return buckets_; }

private:
TRITONSERVER_MetricKind kind_;
std::vector<double> buckets_;
};

//
// Implementation for TRITONSERVER_MetricFamily.
//
Expand All @@ -50,7 +75,9 @@ class MetricFamily {
void* Family() const { return family_; }
TRITONSERVER_MetricKind Kind() const { return kind_; }

void* Add(std::map<std::string, std::string> label_map, Metric* metric);
void* Add(
std::map<std::string, std::string> label_map, Metric* metric,
const TritonServerMetricArgs* args);
void Remove(void* prom_metric, Metric* metric);

int NumMetrics()
Expand Down Expand Up @@ -86,7 +113,8 @@ class Metric {
public:
Metric(
TRITONSERVER_MetricFamily* family,
std::vector<const InferenceParameter*> labels);
std::vector<const InferenceParameter*> labels,
const TritonServerMetricArgs* args);
~Metric();

MetricFamily* Family() const { return family_; }
Expand All @@ -95,6 +123,7 @@ class Metric {
TRITONSERVER_Error* Value(double* value);
TRITONSERVER_Error* Increment(double value);
TRITONSERVER_Error* Set(double value);
TRITONSERVER_Error* Observe(double value);

// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the references so we don't access invalid memory.
Expand Down
3 changes: 2 additions & 1 deletion src/metrics.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -35,6 +35,7 @@

#include "prometheus/counter.h"
#include "prometheus/gauge.h"
#include "prometheus/histogram.h"
#include "prometheus/registry.h"
#include "prometheus/serializer.h"
#include "prometheus/summary.h"
Expand Down
Loading

0 comments on commit dffc026

Please sign in to comment.