Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NIM saved views and logs pipeline #19113

Open
wants to merge 3 commits into
base: kyle.neale/nvidia-nim-integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions nvidia_nim/assets/logs/nvidia_nim.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
id: nvidia_nim
metric_id: nvidia_nim
backend_only: false
facets:
pipeline:
type: pipeline
name: 'NVIDIA NIM'
enabled: true
filter:
query: source:nvidia_nim
processors:
- type: grok-parser
name: Parse timestamp, level, logger, and message
enabled: true
source: message
samples:
- "2024-10-30 21:56:25,295 [INFO] PyTorch version 2.3.1 available."
- "2024-10-30 21:58:26,914 [WARNING] [TRT-LLM] [W] Logger level already set from environment. Discard new verbosity: error"
- "INFO 2024-10-30 21:56:28.831 ngc_injector.py:152] Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]"
- "WARNING 2024-10-30 21:58:27.670 arg_utils.py:775] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False."
- "[1730325496.647520] [dd317ab0670e:126 :0] parser.c:2305 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)"
grok:
matchRules: |
nvidia_nim %{date("yyyy-MM-dd HH:mm:ss,SSS"):timestamp} \[%{_level}\] \[%{data:component_name}\] \[%{word}\] %{_msg}
nvidia_nim_logger %{_level} %{date("yyyy-MM-dd HH:mm:ss.SSS"):timestamp} %{_logger_name}:%{_logger_line}\] %{_msg}
generic_log %{date("yyyy-MM-dd HH:mm:ss,SSS"):timestamp} \[%{_level}\] %{_msg}
componont_log \[%{number:timestamp}\]\W+\[%{data:container_id}:%{number:pid}\W+:%{number:thread_id}\W+%{_logger_name}:%{_logger_line} %{word:component_name}\W+%{_level}\W+\(%{_msg}\)
supportRules: |
_logger_line %{data:logger.line}
_logger_name %{data:logger.name}
_level %{word:level}
_msg %{data:msg}
- type: message-remapper
name: Define `msg` as the official message of the log
enabled: true
sources:
- msg
- type: date-remapper
name: Define `timestamp` as the official date of the log
enabled: true
sources:
- timestamp
- type: status-remapper
name: Define `level` as the official status of the log
enabled: true
sources:
- level
58 changes: 58 additions & 0 deletions nvidia_nim/assets/logs/nvidia_nim_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
id: "nvidia_nim"
tests:
# This log sample satisfies the validation.
-
sample: |-
"2024-10-30 21:56:25,295 [INFO] PyTorch version 2.3.1 available."
result:
custom:
level: "INFO"
timestamp: 1730325385295
message: "PyTorch version 2.3.1 available."
status: "info"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325385295
-
sample: |-
"2024-10-30 21:58:26,914 [WARNING] [TRT-LLM] [W] Logger level already set from environment. Discard new verbosity: error"
result:
custom:
level: "WARNING"
timestamp: 1730325506914
component_name: "TRT-LLM"
message: "Logger level already set from environment. Discard new verbosity: error"
status: "warning"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325506914
-
sample: |-
"INFO 2024-10-30 21:56:28.831 ngc_injector.py:152] Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]"
result:
custom:
level: "INFO"
timestamp: 1730325388831
logger:
line: 152
name: "ngc_injector.py"
message: "Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]"
status: "info"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325388831
-
sample: |-
"WARNING 2024-10-30 21:58:27.670 arg_utils.py:775] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False."
result:
custom:
level: "WARNING"
timestamp: 1730325507670
logger:
line: 775
name: "arg_utils.py"
message: "Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False."
status: "warning"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325507670
20 changes: 20 additions & 0 deletions nvidia_nim/assets/saved_views/nim_errors.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "NVIDIA NIM Errors",
"options": {
"columns": [
"host",
"service"
],
"message_display": "inline",
"show_date_column": true,
"show_message_column": true,
"show_timeline": true
},
"page": "stream",
"query": "source:nvidia_nim status:error",
"timerange": {
"interval_ms": 900000
},
"type": "logs",
"visible_facets": []
}
3 changes: 3 additions & 0 deletions nvidia_nim/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
},
"monitors": {
"Average Request Latency is High": "assets/monitors/latency.json"
},
"saved_views": {
"NVIDIA NIM Errors": "assets/saved_views/nim_errors.json"
}
},
"author": {
Expand Down
Loading