diff --git a/nvidia_nim/README.md b/nvidia_nim/README.md index 67a37424ad96b..7c033d3ca9951 100644 --- a/nvidia_nim/README.md +++ b/nvidia_nim/README.md @@ -16,7 +16,7 @@ The NVIDIA NIM check is included in the [Datadog Agent][2] package. No additiona ### Configuration -NVIDIA NIM provides Prometheus metrics indicating request statistics. By default, these metrics are available at http://localhost:8000/metrics. The Datadog Agent can collect the exposed metrics using this integration. Follow the instructions below to configure data collection from any or all of the components. +NVIDIA NIM provides Prometheus [metrics][1] indicating request statistics. By default, these metrics are available at http://localhost:8000/metrics. The Datadog Agent can collect the exposed metrics using this integration. Follow the instructions below to configure data collection from any or all of the components. **Note**: This check uses [OpenMetrics][10] for metric collection, which requires Python 3. diff --git a/nvidia_nim/datadog_checks/nvidia_nim/metrics.py b/nvidia_nim/datadog_checks/nvidia_nim/metrics.py index ab8f82df1f84d..f140776d35b3d 100644 --- a/nvidia_nim/datadog_checks/nvidia_nim/metrics.py +++ b/nvidia_nim/datadog_checks/nvidia_nim/metrics.py @@ -5,7 +5,7 @@ METRIC_MAP = { 'process_virtual_memory_bytes': 'process.virtual_memory_bytes', 'process_resident_memory_bytes': 'process.resident_memory_bytes', - 'process_start_time_seconds': 'process.start_time_seconds', + 'process_start_time_seconds': {'name': 'process.start_time_seconds', 'type': 'time_elapsed'}, 'process_cpu_seconds': 'process.cpu_seconds', 'process_open_fds': 'process.open_fds', 'process_max_fds': 'process.max_fds', diff --git a/nvidia_nim/metadata.csv b/nvidia_nim/metadata.csv index 4df6866670704..9c23d73811b85 100644 --- a/nvidia_nim/metadata.csv +++ b/nvidia_nim/metadata.csv @@ -2,30 +2,30 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation nvidia_nim.e2e_request_latency.seconds.bucket,count,,,,The observations of end to end request latency bucketed by seconds.,0,nvidia_nim,,, nvidia_nim.e2e_request_latency.seconds.count,count,,,,The total number of observations of end to end request latency.,0,nvidia_nim,,, nvidia_nim.e2e_request_latency.seconds.sum,count,,second,,The sum of end to end request latency in seconds.,0,nvidia_nim,,, -nvidia_nim.generation_tokens.count,count,,,,Number of generation tokens processed.,0,nvidia_nim,,, -nvidia_nim.gpu_cache_usage_percent,gauge,,percent,,GPU KV-cache usage. 1 means 100 percent usage,0,nvidia_nim,,, -nvidia_nim.num_request.max,gauge,,,,The max number of concurrently running requests.,0,nvidia_nim,,, -nvidia_nim.num_requests.running,gauge,,,,Number of requests currently running on GPU.,0,nvidia_nim,,, -nvidia_nim.num_requests.waiting,gauge,,,,Number of requests waiting.,0,nvidia_nim,,, +nvidia_nim.generation_tokens.count,count,,token,,Number of generation tokens processed.,0,nvidia_nim,,, +nvidia_nim.gpu_cache_usage_percent,gauge,,fraction,,GPU KV-cache usage. 1 means 100 percent usage,0,nvidia_nim,,, +nvidia_nim.num_request.max,gauge,,request,,The max number of concurrently running requests.,0,nvidia_nim,,, +nvidia_nim.num_requests.running,gauge,,request,,Number of requests currently running on GPU.,0,nvidia_nim,,, +nvidia_nim.num_requests.waiting,gauge,,request,,Number of requests waiting.,0,nvidia_nim,,, nvidia_nim.process.cpu_seconds.count,count,,second,,Total user and system CPU time spent in seconds.,0,nvidia_nim,,, nvidia_nim.process.max_fds,gauge,,file,,Maximum number of open file descriptors.,0,nvidia_nim,,, nvidia_nim.process.open_fds,gauge,,file,,Number of open file descriptors.,0,nvidia_nim,,, nvidia_nim.process.resident_memory_bytes,gauge,,byte,,Resident memory size in bytes.,0,nvidia_nim,,, nvidia_nim.process.start_time_seconds,gauge,,second,,Start time of the process since unix epoch in seconds.,0,nvidia_nim,,, nvidia_nim.process.virtual_memory_bytes,gauge,,byte,,Virtual memory size in bytes.,0,nvidia_nim,,, -nvidia_nim.prompt_tokens.count,count,,,,Number of prefill tokens processed.,0,nvidia_nim,,, +nvidia_nim.prompt_tokens.count,count,,token,,Number of prefill tokens processed.,0,nvidia_nim,,, nvidia_nim.python.gc.collections.count,count,,,,Number of times this generation was collected,0,nvidia_nim,,, nvidia_nim.python.gc.objects.collected.count,count,,,,Objects collected during gc,0,nvidia_nim,,, nvidia_nim.python.gc.objects.uncollectable.count,count,,,,Uncollectable objects found during GC,0,nvidia_nim,,, nvidia_nim.python.info,gauge,,,,Python platform information,0,nvidia_nim,,, -nvidia_nim.request.failure.count,count,,,,The count of failed requests.,0,nvidia_nim,,, -nvidia_nim.request.finish.count,count,,,,The count of finished requests.,0,nvidia_nim,,, +nvidia_nim.request.failure.count,count,,request,,The count of failed requests.,0,nvidia_nim,,, +nvidia_nim.request.finish.count,count,,request,,The count of finished requests.,0,nvidia_nim,,, nvidia_nim.request.generation_tokens.bucket,count,,,,Number of generation tokens processed.,0,nvidia_nim,,, nvidia_nim.request.generation_tokens.count,count,,,,Number of generation tokens processed.,0,nvidia_nim,,, -nvidia_nim.request.generation_tokens.sum,count,,,,Number of generation tokens processed.,0,nvidia_nim,,, +nvidia_nim.request.generation_tokens.sum,count,,token,,Number of generation tokens processed.,0,nvidia_nim,,, nvidia_nim.request.prompt_tokens.bucket,count,,,,Number of prefill tokens processed.,0,nvidia_nim,,, nvidia_nim.request.prompt_tokens.count,count,,,,Number of prefill tokens processed.,0,nvidia_nim,,, -nvidia_nim.request.prompt_tokens.sum,count,,,,Number of prefill tokens processed.,0,nvidia_nim,,, +nvidia_nim.request.prompt_tokens.sum,count,,token,,Number of prefill tokens processed.,0,nvidia_nim,,, nvidia_nim.request.success.count,count,,,,Count of successfully processed requests.,0,nvidia_nim,,, nvidia_nim.time_per_output_token.seconds.bucket,count,,,,The observations of time per output token bucketed by seconds.,0,nvidia_nim,,, nvidia_nim.time_per_output_token.seconds.count,count,,,,The total number of observations of time per output token.,0,nvidia_nim,,,