diff --git a/anthropic/assets/dashboards/llm_observability_overview_dashboard.json b/anthropic/assets/dashboards/llm_observability_overview_dashboard.json index 1ae13c94c991d..ca4edc0635d0c 100644 --- a/anthropic/assets/dashboards/llm_observability_overview_dashboard.json +++ b/anthropic/assets/dashboards/llm_observability_overview_dashboard.json @@ -1 +1 @@ -{"title":"LLM Observability Operational Insights Overview","description":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/tracing/llm_observability/sdk/#annotating-a-span) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","widgets":[{"id":3444086504619146,"definition":{"title":"","banner_img":"/static/images/integration_dashboard/llm-observability_hero-1.jpeg","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":2984486576169188,"definition":{"type":"note","content":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/tracing/llm_observability/sdk/#annotating-a-span) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","background_color":"white","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":3}}]},"layout":{"x":0,"y":0,"width":6,"height":6}},{"id":2291155106556328,"definition":{"title":"Monitors","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6165725382794250,"definition":{"type":"note","content":"Create an LLM Observability recommended monitor [here](llm/traces?llmPanels=%5B%7B\"t\"%3A\"monitorPanel\"%7D%5D)!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"bottom","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":1}},{"id":7575597858473386,"definition":{"title":"LLM Observability Monitors","type":"manage_status","display_format":"countsAndList","color_preference":"background","hide_zero_counts":true,"show_status":true,"last_triggered_format":"relative","query":"metric:ml_obs.trace.error $env $service","sort":"status,asc","count":50,"start":0,"summary_type":"monitors","show_priority":false,"show_last_triggered":false},"layout":{"x":0,"y":1,"width":6,"height":4}}]},"layout":{"x":6,"y":0,"width":6,"height":6}},{"id":1310295051024350,"definition":{"type":"note","content":"Tip: select an **ml_app** and **version** at the top of the dashboard for more granular querying and better context links!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":false,"tick_pos":"50%","tick_edge":"right","has_padding":true},"layout":{"x":0,"y":0,"width":12,"height":1}},{"id":4149521700572012,"definition":{"title":"Overview","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6428476651943710,"definition":{"title":"Active ML Apps with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":0,"width":6,"height":3}},{"id":5975795986461220,"definition":{"title":"Active Versions with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {version}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":0,"width":6,"height":3}},{"id":3198293231078300,"definition":{"title":"Trace Success Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"query1 / query2"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0.95,"palette":"black_on_light_green"},{"comparator":">=","value":0.5,"palette":"black_on_light_yellow"},{"comparator":"<=","value":0.5,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:ok {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":3,"width":4,"height":3}},{"id":246716173430280,"definition":{"title":"Total Number of Traces","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":3,"width":4,"height":3}},{"id":4055455815898662,"definition":{"title":"Total Number of Spans","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"spans"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":8,"y":3,"width":4,"height":3}},{"id":2941224801013446,"definition":{"title":"Trace Error Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"1 - (query1 / query2)"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":6,"width":4,"height":4}},{"id":327704243712338,"definition":{"title":"Completion Token Generation Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/sec"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.duration{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":6,"width":4,"height":4}},{"id":8807433954813968,"definition":{"title":"Time To First Token","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.time_to_first_token{$ml_app,$version,$env,$service, $model_provider}","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":8,"y":6,"width":4,"height":3}},{"id":4628057133288278,"definition":{"type":"note","content":"[Annotate](https://docs.datadoghq.com/llm_observability/trace_an_llm_application/?tab=decorators#annotating-spans) your root span with `time_to_first_token` in seconds to populate the widget above.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"top","has_padding":true},"layout":{"x":8,"y":9,"width":4,"height":1}}]},"layout":{"x":0,"y":7,"width":12,"height":11}},{"id":6925805930371330,"definition":{"title":"LLM Calls","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":7368854359822672,"definition":{"type":"note","content":"Break down your **LLM spans**, which represent an invocation call to an LLM, by model and model provider, and track tokens and errors across these calls.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":3,"height":2}},{"id":7403444364106132,"definition":{"title":"Total LLM Requests","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"yaxis":{},"type":"bars"}},"layout":{"x":3,"y":0,"width":3,"height":2}},{"id":5240968989960074,"definition":{"title":"LLM Call Response Time (p50)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{$env,$service,$version,$ml_app, $model_provider, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":6,"y":0,"width":3,"height":2}},{"id":3482460852692924,"definition":{"title":"LLM Call Response Time (p95)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$env,$service,$version,$model_provider, $ml_app, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":9,"y":0,"width":3,"height":2}},{"id":3151345683848596,"definition":{"title":"Model Usage","title_size":"16","title_align":"left","requests":[{"formulas":[{"formula":"query2"}],"queries":[{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.model_provider","limit":10,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@meta.model_name","limit":10,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","hide_total":false,"legend":{"type":"table"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":2,"width":12,"height":4}},{"id":2688839849605810,"definition":{"title":"LLM Span Error Rate","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":6,"width":6,"height":3}},{"id":8491486279677864,"definition":{"title":"Total Token Usage by Prompt & Completion","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Prompt Tokens","formula":"query1"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Completion Tokens","formula":"query2"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Total Tokens","formula":"query3"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query3","query":"sum:ml_obs.span.llm.total.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":6,"width":6,"height":3}},{"id":523785459773502,"definition":{"title":"LLM Span Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":9,"width":6,"height":3}},{"id":4007270851222420,"definition":{"title":"Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"avg:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"}],"response_format":"scalar","text_formats":[[],[{"match":{"type":"is","value":""},"palette":"white_on_green"}]],"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]},"formulas":[{"cell_display_mode":"trend","alias":"Prompt Tokens ","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"formula":"query1"},{"cell_display_mode":"trend","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"alias":"Completion Tokens","formula":"query2"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":9,"width":6,"height":3}},{"id":3075505179106388,"definition":{"title":"LLM Call Response Time","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query2","query":"p75:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query3","query":"p95:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query5","query":"p99:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"}],"response_format":"scalar","sort":{"count":500,"order_by":[{"type":"formula","index":3,"order":"desc"}]},"formulas":[{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p50","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p75","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query2"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p95","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query3"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p99","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query5"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@event_type:span @parent_id:* @{{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":12,"width":6,"height":3}},{"id":3835649873165936,"definition":{"title":"Avg Input Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":6,"y":12,"width":3,"height":3}},{"id":3960916621633432,"definition":{"title":"Avg Output Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":9,"y":12,"width":3,"height":3}},{"id":8614866922979184,"definition":{"title":"LLM Call Response Time by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p95","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":15,"width":6,"height":3}},{"id":8678869962893096,"definition":{"title":"LLM Call Response Time by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p99","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p99:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":15,"width":6,"height":3}}]},"layout":{"x":0,"y":18,"width":12,"height":19,"is_column_break":true}},{"id":3126135057967266,"definition":{"title":"Traces","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":8485748560763588,"definition":{"type":"note","content":"A **trace** represents the entire execution flow of your LLM Application, spanning from the moment a request is first received to when your agent responds to a user. Trace metrics provide insight into your LLM application’s performance end-to-end.\n\nYou can view detailed breakdown of your LLM chains and all of your spans in [LLM Observability LLM Chain Analytics]()\n","background_color":"blue","font_size":"16","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":5,"height":3}},{"id":6387614402128510,"definition":{"title":"Total Traces by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":0,"width":7,"height":3}},{"id":2819887241075640,"definition":{"title":"P95 Trace Execution Time by Span Kind","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":10,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service $model_provider"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","legend":{"type":"inline"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}} @meta.span.kind:{{@meta.span.kind.value}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":3,"width":2,"height":3}},{"id":2413005551640098,"definition":{"title":"Trace Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Error Rate","number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * (query1 / query2)"}],"queries":[{"name":"query1","data_source":"metrics","query":"sum:ml_obs.trace.error{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"name":"query2","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"red","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"y > 20","display_type":"error dashed"},{"value":"5 < y < 20","display_type":"warning dashed"},{"value":"0 < y < 5","display_type":"ok dashed"},{"value":"y = 0","display_type":"error dashed"}],"custom_links":[{"label":"View related LLM Observability spans","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":2,"y":3,"width":3,"height":3}},{"id":3306729246543562,"definition":{"title":"Trace Duration (p50, p75, p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"},{"alias":"p75","formula":"query1"},{"alias":"p50","formula":"query2"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query1","data_source":"metrics","query":"p75:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query2","data_source":"metrics","query":"p50:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":3,"width":7,"height":3}},{"id":2208916880196188,"definition":{"title":"Trace Execution Time by Span (p95)","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"datadog16"}},"layout":{"x":0,"y":6,"width":5,"height":3}},{"id":3301474291795902,"definition":{"title":"Trace Total Duration by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":6,"width":7,"height":3}},{"id":5506235694494168,"definition":{"title":"Spans with Errors by ML App","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span -@parent_id:undefined @status:error $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"errors"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"red"}},"layout":{"x":0,"y":9,"width":5,"height":3}},{"id":8691399239758710,"definition":{"title":"Trace Total Duration by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p99","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p99:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":9,"width":7,"height":3}}]},"layout":{"x":0,"y":37,"width":12,"height":13}}],"template_variables":[{"name":"ml_app","prefix":"ml_app","available_values":[],"default":"*"},{"name":"version","prefix":"version","available_values":[],"default":"*"},{"name":"env","prefix":"env","available_values":[],"default":"*"},{"name":"service","prefix":"service","available_values":[],"default":"*"},{"name":"model_provider","prefix":"model_provider","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"fixed"} \ No newline at end of file +{"title":"LLM Observability Operational Insights Overview","description":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/llm_observability/setup/sdk/python/) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","widgets":[{"id":3444086504619146,"definition":{"title":"","banner_img":"/static/images/integration_dashboard/llm-observability_hero-1.jpeg","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":2984486576169188,"definition":{"type":"note","content":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/tracing/llm_observability/sdk/#annotating-a-span) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","background_color":"white","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":3}}]},"layout":{"x":0,"y":0,"width":6,"height":6}},{"id":2291155106556328,"definition":{"title":"Monitors","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6165725382794250,"definition":{"type":"note","content":"Create an LLM Observability recommended monitor [here](llm/traces?llmPanels=%5B%7B\"t\"%3A\"monitorPanel\"%7D%5D)!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"bottom","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":1}},{"id":7575597858473386,"definition":{"title":"LLM Observability Monitors","type":"manage_status","display_format":"countsAndList","color_preference":"background","hide_zero_counts":true,"show_status":true,"last_triggered_format":"relative","query":"metric:ml_obs.trace.error $env $service","sort":"status,asc","count":50,"start":0,"summary_type":"monitors","show_priority":false,"show_last_triggered":false},"layout":{"x":0,"y":1,"width":6,"height":4}}]},"layout":{"x":6,"y":0,"width":6,"height":6}},{"id":1310295051024350,"definition":{"type":"note","content":"Tip: select an **ml_app** and **version** at the top of the dashboard for more granular querying and better context links!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":false,"tick_pos":"50%","tick_edge":"right","has_padding":true},"layout":{"x":0,"y":0,"width":12,"height":1}},{"id":4149521700572012,"definition":{"title":"Overview","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6428476651943710,"definition":{"title":"Active ML Apps with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":0,"width":6,"height":3}},{"id":5975795986461220,"definition":{"title":"Active Versions with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {version}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":0,"width":6,"height":3}},{"id":3198293231078300,"definition":{"title":"Trace Success Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"query1 / query2"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0.95,"palette":"black_on_light_green"},{"comparator":">=","value":0.5,"palette":"black_on_light_yellow"},{"comparator":"<=","value":0.5,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:ok {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":3,"width":4,"height":3}},{"id":246716173430280,"definition":{"title":"Total Number of Traces","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":3,"width":4,"height":3}},{"id":4055455815898662,"definition":{"title":"Total Number of Spans","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"spans"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":8,"y":3,"width":4,"height":3}},{"id":2941224801013446,"definition":{"title":"Trace Error Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"1 - (query1 / query2)"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":6,"width":4,"height":4}},{"id":327704243712338,"definition":{"title":"Completion Token Generation Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/sec"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.duration{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":6,"width":4,"height":4}},{"id":8807433954813968,"definition":{"title":"Time To First Token","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.time_to_first_token{$ml_app,$version,$env,$service, $model_provider}","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":8,"y":6,"width":4,"height":3}},{"id":4628057133288278,"definition":{"type":"note","content":"[Annotate](https://docs.datadoghq.com/llm_observability/trace_an_llm_application/?tab=decorators#annotating-spans) your root span with `time_to_first_token` in seconds to populate the widget above.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"top","has_padding":true},"layout":{"x":8,"y":9,"width":4,"height":1}}]},"layout":{"x":0,"y":7,"width":12,"height":11}},{"id":6925805930371330,"definition":{"title":"LLM Calls","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":7368854359822672,"definition":{"type":"note","content":"Break down your **LLM spans**, which represent an invocation call to an LLM, by model and model provider, and track tokens and errors across these calls.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":3,"height":2}},{"id":7403444364106132,"definition":{"title":"Total LLM Requests","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"yaxis":{},"type":"bars"}},"layout":{"x":3,"y":0,"width":3,"height":2}},{"id":5240968989960074,"definition":{"title":"LLM Call Response Time (p50)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{$env,$service,$version,$ml_app, $model_provider, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":6,"y":0,"width":3,"height":2}},{"id":3482460852692924,"definition":{"title":"LLM Call Response Time (p95)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$env,$service,$version,$model_provider, $ml_app, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":9,"y":0,"width":3,"height":2}},{"id":3151345683848596,"definition":{"title":"Model Usage","title_size":"16","title_align":"left","requests":[{"formulas":[{"formula":"query2"}],"queries":[{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.model_provider","limit":10,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@meta.model_name","limit":10,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","hide_total":false,"legend":{"type":"table"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":2,"width":12,"height":4}},{"id":2688839849605810,"definition":{"title":"LLM Span Error Rate","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":6,"width":6,"height":3}},{"id":8491486279677864,"definition":{"title":"Total Token Usage by Prompt & Completion","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Prompt Tokens","formula":"query1"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Completion Tokens","formula":"query2"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Total Tokens","formula":"query3"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query3","query":"sum:ml_obs.span.llm.total.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":6,"width":6,"height":3}},{"id":523785459773502,"definition":{"title":"LLM Span Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":9,"width":6,"height":3}},{"id":4007270851222420,"definition":{"title":"Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"avg:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"}],"response_format":"scalar","text_formats":[[],[{"match":{"type":"is","value":""},"palette":"white_on_green"}]],"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]},"formulas":[{"cell_display_mode":"trend","alias":"Prompt Tokens ","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"formula":"query1"},{"cell_display_mode":"trend","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"alias":"Completion Tokens","formula":"query2"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":9,"width":6,"height":3}},{"id":3075505179106388,"definition":{"title":"LLM Call Response Time","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query2","query":"p75:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query3","query":"p95:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query5","query":"p99:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"}],"response_format":"scalar","sort":{"count":500,"order_by":[{"type":"formula","index":3,"order":"desc"}]},"formulas":[{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p50","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p75","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query2"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p95","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query3"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p99","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query5"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@event_type:span @parent_id:* @{{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":12,"width":6,"height":3}},{"id":3835649873165936,"definition":{"title":"Avg Input Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":6,"y":12,"width":3,"height":3}},{"id":3960916621633432,"definition":{"title":"Avg Output Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":9,"y":12,"width":3,"height":3}},{"id":8614866922979184,"definition":{"title":"LLM Call Response Time by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p95","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":15,"width":6,"height":3}},{"id":8678869962893096,"definition":{"title":"LLM Call Response Time by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p99","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p99:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":15,"width":6,"height":3}}]},"layout":{"x":0,"y":18,"width":12,"height":19,"is_column_break":true}},{"id":3126135057967266,"definition":{"title":"Traces","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":8485748560763588,"definition":{"type":"note","content":"A **trace** represents the entire execution flow of your LLM Application, spanning from the moment a request is first received to when your agent responds to a user. Trace metrics provide insight into your LLM application’s performance end-to-end.\n\nYou can view detailed breakdown of your LLM chains and all of your spans in [LLM Observability LLM Chain Analytics]()\n","background_color":"blue","font_size":"16","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":5,"height":3}},{"id":6387614402128510,"definition":{"title":"Total Traces by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":0,"width":7,"height":3}},{"id":2819887241075640,"definition":{"title":"P95 Trace Execution Time by Span Kind","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":10,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service $model_provider"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","legend":{"type":"inline"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}} @meta.span.kind:{{@meta.span.kind.value}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":3,"width":2,"height":3}},{"id":2413005551640098,"definition":{"title":"Trace Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Error Rate","number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * (query1 / query2)"}],"queries":[{"name":"query1","data_source":"metrics","query":"sum:ml_obs.trace.error{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"name":"query2","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"red","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"y > 20","display_type":"error dashed"},{"value":"5 < y < 20","display_type":"warning dashed"},{"value":"0 < y < 5","display_type":"ok dashed"},{"value":"y = 0","display_type":"error dashed"}],"custom_links":[{"label":"View related LLM Observability spans","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":2,"y":3,"width":3,"height":3}},{"id":3306729246543562,"definition":{"title":"Trace Duration (p50, p75, p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"},{"alias":"p75","formula":"query1"},{"alias":"p50","formula":"query2"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query1","data_source":"metrics","query":"p75:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query2","data_source":"metrics","query":"p50:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":3,"width":7,"height":3}},{"id":2208916880196188,"definition":{"title":"Trace Execution Time by Span (p95)","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"datadog16"}},"layout":{"x":0,"y":6,"width":5,"height":3}},{"id":3301474291795902,"definition":{"title":"Trace Total Duration by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":6,"width":7,"height":3}},{"id":5506235694494168,"definition":{"title":"Spans with Errors by ML App","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span -@parent_id:undefined @status:error $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"errors"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"red"}},"layout":{"x":0,"y":9,"width":5,"height":3}},{"id":8691399239758710,"definition":{"title":"Trace Total Duration by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p99","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p99:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":9,"width":7,"height":3}}]},"layout":{"x":0,"y":37,"width":12,"height":13}}],"template_variables":[{"name":"ml_app","prefix":"ml_app","available_values":[],"default":"*"},{"name":"version","prefix":"version","available_values":[],"default":"*"},{"name":"env","prefix":"env","available_values":[],"default":"*"},{"name":"service","prefix":"service","available_values":[],"default":"*"},{"name":"model_provider","prefix":"model_provider","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"fixed"}