diff --git a/nvidia_nim/README.md b/nvidia_nim/README.md index 2341d3fc79047..939367f24e6a3 100644 --- a/nvidia_nim/README.md +++ b/nvidia_nim/README.md @@ -2,7 +2,7 @@ ## Overview -This check monitors [Nvidia NIM][1] through the Datadog Agent. +This check monitors [NVIDIA NIM][1] through the Datadog Agent. ## Setup @@ -12,15 +12,15 @@ Follow the instructions below to install and configure this check for an Agent r ### Installation -The Nvidia NIM check is included in the [Datadog Agent][2] package. No additional installation is needed on your server. +The NVIDIA NIM check is included in the [Datadog Agent][2] package. No additional installation is needed on your server. ### Configuration -Nvidia NIM provides Prometheus metrics indicating request statistics. By default, these metrics are available at http://localhost:8000/metrics. The Datadog Agent can collect the exposed metrics using this integration. Follow the instructions below to configure data collection from any or all of the components. +NVIDIA NIM provides Prometheus metrics indicating request statistics. By default, these metrics are available at http://localhost:8000/metrics. The Datadog Agent can collect the exposed metrics using this integration. Follow the instructions below to configure data collection from any or all of the components. **Note**: This check uses [OpenMetrics][10] for metric collection, which requires Python 3. -1. Edit the `nvidia_nim.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your Nvidia NIM performance data. See the [sample nvidia_nim.d/conf.yaml][4] for all available configuration options. +1. Edit the `nvidia_nim.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your NVIDIA NIM performance data. See the [sample nvidia_nim.d/conf.yaml][4] for all available configuration options. 2. [Restart the Agent][5]. @@ -36,11 +36,11 @@ See [metadata.csv][7] for a list of metrics provided by this integration. ### Events -The Nvidia NIM integration does not include any events. +The NVIDIA NIM integration does not include any events. ### Service Checks -The Nvidia NIM integration does not include any service checks. +The NVIDIA NIM integration does not include any service checks. See [service_checks.json][8] for a list of service checks provided by this integration. diff --git a/nvidia_nim/assets/configuration/spec.yaml b/nvidia_nim/assets/configuration/spec.yaml index c381578d9bd37..6f739175a5acc 100644 --- a/nvidia_nim/assets/configuration/spec.yaml +++ b/nvidia_nim/assets/configuration/spec.yaml @@ -12,5 +12,5 @@ files: openmetrics_endpoint.required: true openmetrics_endpoint.value.example: http://localhost:8000/metrics openmetrics_endpoint.description: | - Endpoint exposing the Nvidia NIM's Prometheus metrics. For more information refer to: + Endpoint exposing the NVIDIA NIM's Prometheus metrics. For more information refer to: https://docs.nvidia.com/nim/large-language-models/latest/observability.html \ No newline at end of file diff --git a/nvidia_nim/assets/dashboards/nvidia_nim_overview.json b/nvidia_nim/assets/dashboards/nvidia_nim_overview.json index 544b7b4ddde01..ee9a6dc7af428 100644 --- a/nvidia_nim/assets/dashboards/nvidia_nim_overview.json +++ b/nvidia_nim/assets/dashboards/nvidia_nim_overview.json @@ -1,3 +1,1163 @@ { - + "title": "NVIDIA NIM Overview", + "description": "## NVIDIA NIM\n\nThis dashboard provides observability for your NIM deployments with the NVIDIA NIM Integration.\n\nIt shows information about how many tokens your model is generating per second as well as exposing low-level details such as GPU usage and Python memory management.\n\n# Useful Links\n- [NVIDIA NIM Integration ↗](https://docs.datadoghq.com/integrations/nvidia_nim)\n- [NVIDIA NIM Metrics ↗](https://docs.nvidia.com/nim/large-language-models/latest/observability.html)\n- [vLLM Documentation ↗](https://docs.nvidia.com/nim/large-language-models/latest/introduction.html)", + "widgets": [ + { + "id": 4717263751542750, + "definition": { + "title": "", + "banner_img": "/static/images/logos/nvidia-nim_large.svg", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5685022835071772, + "definition": { + "type": "note", + "content": "## NVIDIA NIM\n\nThis dashboard provides observability for your NIM deployments with the NVIDIA NIM Integration.\n\nIt shows information about how many tokens your model is generating per second as well as exposing low-level details such as GPU usage and Python memory management.", + "background_color": "white", + "font_size": "14", + "text_align": "left", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 3, + "height": 3 + } + }, + { + "id": 8921963557059570, + "definition": { + "type": "note", + "content": "# Useful Links\n- [NVIDIA NIM Integration ↗](https://docs.datadoghq.com/integrations/nvidia_nim)\n- [NVIDIA NIM Metrics ↗](https://docs.nvidia.com/nim/large-language-models/latest/observability.html)\n- [NVIDIA NIM Documentation ↗](https://docs.nvidia.com/nim/large-language-models/latest/introduction.html)", + "background_color": "white", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 3, + "y": 0, + "width": 3, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 6 + } + }, + { + "id": 2737008660122334, + "definition": { + "title": "Overview", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 4528647613111842, + "definition": { + "type": "note", + "content": "Here you can see an overview of your LLM of your system activity and any NIM alerts. The service checks on the left speak to the health of your NVIDIA NIM environment, while the ones on the right report on the readiness of your dependencies.\n", + "background_color": "green", + "font_size": "14", + "text_align": "center", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 1 + } + }, + { + "id": 2166067869769356, + "definition": { + "title": "NVIDIA NIM Health Check", + "title_size": "16", + "title_align": "left", + "type": "check_status", + "check": "nvidia_nim.openmetrics.health", + "grouping": "cluster", + "group_by": [ + "endpoint" + ], + "tags": [] + }, + "layout": { + "x": 0, + "y": 1, + "width": 2, + "height": 2 + } + }, + { + "id": 3037068311385910, + "definition": { + "title": "Successful Requests", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "aggregator": "sum", + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.request.success.count{$model_name}.as_count()" + } + ], + "response_format": "scalar" + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "bars" + } + }, + "layout": { + "x": 2, + "y": 1, + "width": 2, + "height": 2 + } + }, + { + "id": 5175941643906344, + "definition": { + "title": "Average Request Latency", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "formula": "query2 / query1" + } + ], + "queries": [ + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query2", + "query": "sum:nvidia_nim.e2e_request_latency.seconds.sum{$model_name}.as_count()" + }, + { + "aggregator": "avg", + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.e2e_request_latency.seconds.count{$model_name}.as_count()" + } + ], + "response_format": "scalar" + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "bars" + } + }, + "layout": { + "x": 4, + "y": 1, + "width": 2, + "height": 2 + } + }, + { + "id": 7873059155305294, + "definition": { + "title": "Monitor Summary", + "type": "manage_status", + "display_format": "countsAndList", + "color_preference": "text", + "hide_zero_counts": true, + "show_status": true, + "last_triggered_format": "relative", + "query": "tag:(integration:vllm)", + "sort": "status,asc", + "count": 50, + "start": 0, + "summary_type": "monitors", + "show_priority": false, + "show_last_triggered": false + }, + "layout": { + "x": 0, + "y": 3, + "width": 6, + "height": 2 + } + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 6 + } + }, + { + "id": 2300381400792284, + "definition": { + "title": "K/V Cache Utilization and Request Metrics", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 995705405594846, + "definition": { + "type": "note", + "content": "The GPU is the workhorse of any LLM. It is also expensive to run. See here how many requests your GPU is running and how much you are taking advantage of its caching mechanisms.", + "background_color": "green", + "font_size": "18", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 1 + } + }, + { + "id": 1818057086692970, + "definition": { + "title": "Requests Waiting", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "name": "query1", + "data_source": "metrics", + "query": "avg:nvidia_nim.num_requests.waiting{$model_name} by {model_name}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 1, + "width": 8, + "height": 3 + } + }, + { + "id": 6975549889095854, + "definition": { + "title": "Requests Waiting", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "name": "query1", + "data_source": "metrics", + "query": "avg:nvidia_nim.num_requests.waiting{$model_name}", + "aggregator": "last" + } + ], + "conditional_formats": [ + { + "comparator": "=", + "value": 0, + "palette": "white_on_green" + }, + { + "comparator": ">", + "value": 5, + "palette": "white_on_yellow" + }, + { + "comparator": ">", + "value": 15, + "palette": "white_on_red" + } + ], + "formulas": [ + { + "formula": "query1", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "request" + } + } + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "yaxis": { + "include_zero": false + }, + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 1, + "width": 4, + "height": 3 + } + }, + { + "id": 1084963586222678, + "definition": { + "title": "Requests Failed", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.request.failure.count{$model_name} by {model_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 4, + "width": 8, + "height": 3 + } + }, + { + "id": 2525646835263004, + "definition": { + "title": "Requests Failed", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "formula": "query1", + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "request" + } + } + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.request.failure.count{$model_name}.as_count()", + "aggregator": "last" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": "<=", + "value": 0, + "palette": "white_on_green" + }, + { + "comparator": ">", + "value": 0, + "palette": "white_on_red" + } + ] + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "type": "bars" + } + }, + "layout": { + "x": 8, + "y": 4, + "width": 4, + "height": 3 + } + }, + { + "id": 6776207665378710, + "definition": { + "title": "Requests per second", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "requests", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.request.success.count{$model_name} by {model_name}.as_rate()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 7, + "width": 8, + "height": 3 + } + }, + { + "id": 3747999506353878, + "definition": { + "title": "Requests Running", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:nvidia_nim.num_requests.running{$model_name}", + "aggregator": "sum" + } + ], + "response_format": "scalar" + } + ], + "autoscale": true, + "precision": 0, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 7, + "width": 4, + "height": 3 + } + }, + { + "id": 2448557456884510, + "definition": { + "title": "K/V Cache Utilization", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:nvidia_nim.gpu_cache_usage_percent{$model_name}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 10, + "width": 8, + "height": 3 + } + }, + { + "id": 5942456558543848, + "definition": { + "title": "K/V Cache Utilization", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "percent" + } + }, + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:nvidia_nim.gpu_cache_usage_percent{$model_name}", + "aggregator": "last" + } + ], + "response_format": "scalar", + "conditional_formats": [ + { + "comparator": "<", + "value": 60, + "palette": "white_on_green" + }, + { + "comparator": "<=", + "value": 80, + "palette": "white_on_yellow" + }, + { + "comparator": ">", + "value": 80, + "palette": "white_on_red" + } + ] + } + ], + "autoscale": false, + "precision": 2, + "timeseries_background": { + "yaxis": { + "include_zero": false + }, + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 10, + "width": 4, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 6, + "width": 12, + "height": 14 + } + }, + { + "id": 880646291321010, + "definition": { + "title": "Text Generation", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5193429521650892, + "definition": { + "type": "note", + "content": "These metrics measure response latency, input-output token balance, and token generation efficiency to ensure performance and scalability.", + "background_color": "green", + "font_size": "18", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 1 + } + }, + { + "id": 7057133142091754, + "definition": { + "title": "Average Time to First Token (TFTT)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.time_to_first_token.seconds.sum{$model_name} by {model_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:nvidia_nim.time_to_first_token.seconds.count{$model_name} by {model_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 1, + "width": 6, + "height": 4 + } + }, + { + "id": 1276907480965038, + "definition": { + "title": "Context vs Generated Tokens", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "Context", + "formula": "(query2 / (query2 + query1)) * 100" + }, + { + "alias": "Generated", + "formula": "(query1 / (query2 + query1)) * 100" + } + ], + "queries": [ + { + "query": "avg:nvidia_nim.request.prompt_tokens.sum{$model_name} by {model_name}.as_count()", + "data_source": "metrics", + "name": "query2" + }, + { + "query": "avg:nvidia_nim.request.generation_tokens.sum{$model_name} by {model_name}.as_count()", + "data_source": "metrics", + "name": "query1" + } + ], + "response_format": "timeseries", + "style": { + "palette": "cool", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { + "x": 6, + "y": 1, + "width": 6, + "height": 4 + } + }, + { + "id": 1973749730991538, + "definition": { + "title": "Average Inter Token Latency (ITL)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1 / query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.time_per_output_token.seconds.sum{$model_name} by {model_name}.as_count()" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "sum:nvidia_nim.time_per_output_token.seconds.count{$model_name} by {model_name}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "cool", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 5, + "width": 12, + "height": 4 + } + } + ] + }, + "layout": { + "x": 0, + "y": 20, + "width": 12, + "height": 10, + "is_column_break": true + } + }, + { + "id": 3331850504686986, + "definition": { + "title": "Python Garbage Collector", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5960991703038874, + "definition": { + "type": "note", + "content": "This section helps explore how NVIDIA NIM uses memory. The garbage collector collects objects in generations. You can see how each generation of objects gets processed.", + "background_color": "green", + "font_size": "18", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 1 + } + }, + { + "id": 2577004928803106, + "definition": { + "title": "Resident Memory", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:nvidia_nim.process.resident_memory_bytes{$model_name}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { + "x": 0, + "y": 1, + "width": 4, + "height": 3 + } + }, + { + "id": 4400803113146958, + "definition": { + "title": "Total Runs", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.python.gc.collections.count{$model_name} by {generation}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { + "x": 4, + "y": 1, + "width": 4, + "height": 6 + } + }, + { + "id": 5270613800707436, + "definition": { + "title": "Uncollectable Objects", + "title_size": "16", + "title_align": "left", + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.python.gc.objects.uncollectable.count{$host}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 8, + "y": 1, + "width": 4, + "height": 3 + } + }, + { + "id": 289938027327656, + "definition": { + "title": "Virtual Memory", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:nvidia_nim.process.virtual_memory_bytes{$host} by {host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 4, + "width": 4, + "height": 3 + } + }, + { + "id": 5699420889371520, + "definition": { + "title": "Collected Objects", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "sum:nvidia_nim.python.gc.objects.collected.count{$model_name} by {generation}.as_count()" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "bars" + } + ] + }, + "layout": { + "x": 8, + "y": 4, + "width": 4, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 30, + "width": 12, + "height": 8 + } + } + ], + "template_variables": [ + { + "name": "model_name", + "prefix": "model_name", + "available_values": [], + "default": "*" + }, + { + "name": "process", + "prefix": "process", + "available_values": [], + "default": "*" + }, + { + "name": "host", + "prefix": "host", + "available_values": [], + "default": "*" + } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" } \ No newline at end of file diff --git a/nvidia_nim/assets/monitors/latency.json b/nvidia_nim/assets/monitors/latency.json index a65e134cadd44..5ad93751d27bd 100644 --- a/nvidia_nim/assets/monitors/latency.json +++ b/nvidia_nim/assets/monitors/latency.json @@ -3,7 +3,7 @@ "created_at": "2024-07-02", "last_updated_at": "2024-07-02", "title": "Average Request Latency is High", - "description": "This monitor alerts you if Nvidia NIM request latency is too high. High latency means requests are waiting long to be processed. This results in clients having to wait longer for their requests to complete. It also indicates your Nvidia NIM server is receiving more requests than it can comfortably handle.", + "description": "This monitor alerts you if NVIDIA request latency is too high. High latency means requests are waiting long to be processed. This results in clients having to wait longer for their requests to complete. It also indicates your NVIDIA server is receiving more requests than it can comfortably handle.", "tags": [ "integration:nvidia-nim" ], @@ -11,7 +11,7 @@ "name": "Average request latency is high", "type": "query alert", "query": "sum(last_15m):sum:nvidia_nim.e2e_request_latency.seconds.sum{*}.as_count() / sum:nvidia_nim.e2e_request_latency.seconds.count{*}.as_count() > 0.3", - "message": "The average latency for requests coming into your Nvidia NIM instance is higher than the threshold. This means requests are waiting too long to be processed.", + "message": "The average latency for requests coming into your NVIDIA instance is higher than the threshold. This means requests are waiting too long to be processed.", "tags": [ "integration:nvidia_nim" ], diff --git a/nvidia_nim/assets/service_checks.json b/nvidia_nim/assets/service_checks.json index 67f8403194413..d0f0c79071ec4 100644 --- a/nvidia_nim/assets/service_checks.json +++ b/nvidia_nim/assets/service_checks.json @@ -1,7 +1,7 @@ [ { "agent_version": "7.61.0", - "integration": "Nvidia NIM", + "integration": "nvidia_nim", "check": "nvidia_nim.openmetrics.health", "statuses": [ "ok", @@ -11,7 +11,7 @@ "host", "endpoint" ], - "name": "Nvidia NIM OpenMetrics endpoint health", - "description": "Returns `CRITICAL` if the Agent is unable to connect to the Nvidia NIM OpenMetrics endpoint, otherwise returns `OK`." + "name": "NVIDIA NIM OpenMetrics endpoint health", + "description": "Returns `CRITICAL` if the Agent is unable to connect to the NVIDIA NIM OpenMetrics endpoint, otherwise returns `OK`." } ] \ No newline at end of file diff --git a/nvidia_nim/datadog_checks/nvidia_nim/check.py b/nvidia_nim/datadog_checks/nvidia_nim/check.py index 268fc0e46ea28..fe48310886506 100644 --- a/nvidia_nim/datadog_checks/nvidia_nim/check.py +++ b/nvidia_nim/datadog_checks/nvidia_nim/check.py @@ -42,7 +42,7 @@ def _submit_version_metadata(self): } self.set_metadata('version', version_raw, scheme='semver', part_map=version_parts) else: - self.log.debug("Invalid Nvidia NIM release format: %s", version) + self.log.debug("Invalid NVIDIA NIM release format: %s", version) def check(self, instance): super().check(instance) diff --git a/nvidia_nim/datadog_checks/nvidia_nim/data/conf.yaml.example b/nvidia_nim/datadog_checks/nvidia_nim/data/conf.yaml.example index 25e2383ed7b6f..c5e8d23aa4e1b 100644 --- a/nvidia_nim/datadog_checks/nvidia_nim/data/conf.yaml.example +++ b/nvidia_nim/datadog_checks/nvidia_nim/data/conf.yaml.example @@ -46,7 +46,7 @@ init_config: instances: ## @param openmetrics_endpoint - string - required - ## Endpoint exposing the Nvidia NIM's Prometheus metrics. For more information refer to: + ## Endpoint exposing the NVIDIA NIM's Prometheus metrics. For more information refer to: ## https://docs.nvidia.com/nim/large-language-models/latest/observability.html # - openmetrics_endpoint: http://localhost:8000/metrics diff --git a/nvidia_nim/datadog_checks/nvidia_nim/metrics.py b/nvidia_nim/datadog_checks/nvidia_nim/metrics.py index d733a111f3ff4..ab8f82df1f84d 100644 --- a/nvidia_nim/datadog_checks/nvidia_nim/metrics.py +++ b/nvidia_nim/datadog_checks/nvidia_nim/metrics.py @@ -27,7 +27,6 @@ 'request_prompt_tokens': 'request.prompt_tokens', 'request_success': 'request.success', 'request_failure': 'request.failure', - } RENAME_LABELS_MAP = { diff --git a/nvidia_nim/manifest.json b/nvidia_nim/manifest.json index 2222e4e69f2e7..08d8d0cadcf29 100644 --- a/nvidia_nim/manifest.json +++ b/nvidia_nim/manifest.json @@ -43,6 +43,9 @@ "vllm_nvext.entrypoints.openai.api_server" ] }, + "dashboards": { + "NVIDIA NIM Overview": "assets/dashboards/nvidia_nim_overview.json" + }, "monitors": { "Average Request Latency is High": "assets/monitors/latency.json" }