diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7d760a9ef9174..cfcaedeab7410 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -200,7 +200,7 @@ datadog_checks_base/tests/**/test_db_statements.py @DataDog/database-monitoring # APM Integrations /langchain/ @DataDog/ml-observability @DataDog/agent-integrations @DataDog/documentation /openai/ @DataDog/ml-observability @DataDog/agent-integrations @DataDog/documentation - +/anthropic/ @DataDog/ml-observability @DataDog/agent-integrations @DataDog/documentation # Windows agent datadog_checks_base/datadog_checks/base/checks/win/ @DataDog/windows-agent @DataDog/agent-integrations diff --git a/.github/workflows/config/labeler.yml b/.github/workflows/config/labeler.yml index 580742a589edf..157f9d601aebc 100644 --- a/.github/workflows/config/labeler.yml +++ b/.github/workflows/config/labeler.yml @@ -317,6 +317,8 @@ integration/kyverno: - kyverno/**/* integration/langchain: - langchain/**/* +integration/anthropic: +- anthropic/**/* integration/lastpass: - lastpass/**/* integration/lighttpd: diff --git a/anthropic/CHANGELOG.md b/anthropic/CHANGELOG.md new file mode 100644 index 0000000000000..3de181c7e0654 --- /dev/null +++ b/anthropic/CHANGELOG.md @@ -0,0 +1,7 @@ +# CHANGELOG - Anthropic + +## 1.0.0 / 2024-11-08 + +***Added***: + +* Initial Release diff --git a/anthropic/README.md b/anthropic/README.md new file mode 100644 index 0000000000000..c15b4d9be6d33 --- /dev/null +++ b/anthropic/README.md @@ -0,0 +1,127 @@ +# Anthropic + +## Overview +Use the Anthropic integration to monitor, troubleshoot, and evaluate your LLM-powered applications, such as chatbots or data extraction tools, using Anthropic's models. + +If you are building LLM applications, use LLM Observability to investigate the root cause of issues, +monitor operational performance, and evaluate the quality, privacy, and safety of your LLM applications. + +See the [LLM Observability tracing view video](https://imgix.datadoghq.com/video/products/llm-observability/expedite-troubleshooting.mp4?fm=webm&fit=max) for an example of how you can investigate a trace. + +## Setup + +### LLM Observability: Get end-to-end visibility into your LLM application using Anthropic +You can enable LLM Observability in different environments. Follow the appropriate setup based on your scenario: + +#### Installation for Python + +##### If you do not have the Datadog Agent: +1. Install the `ddtrace` package: + + ```shell + pip install ddtrace + ``` + +2. Start your application using the following command to enable Agentless mode: + + ```shell + DD_SITE= DD_API_KEY= DD_LLMOBS_ENABLED=1 DD_LLMOBS_AGENTLESS_ENABLED=1 DD_LLMOBS_ML_APP= ddtrace-run python .py + ``` + +##### If you already have the Datadog Agent installed: +1. Make sure the Agent is running and that APM and StatsD are enabled. For example, use the following command with Docker: + + ```shell + docker run -d \ + --cgroupns host \ + --pid host \ + -v /var/run/docker.sock:/var/run/docker.sock:ro \ + -v /proc/:/host/proc/:ro \ + -v /sys/fs/cgroup/:/host/sys/fs/cgroup:ro \ + -e DD_API_KEY= \ + -p 127.0.0.1:8126:8126/tcp \ + -p 127.0.0.1:8125:8125/udp \ + -e DD_DOGSTATSD_NON_LOCAL_TRAFFIC=true \ + -e DD_APM_ENABLED=true \ + gcr.io/datadoghq/agent:latest + ``` + +2. If you haven't already, install the `ddtrace` package: + + ```shell + pip install ddtrace + ``` + +3. To automatically enable tracing, start your application using the `ddtrace-run` command: + + ```shell + DD_SITE= DD_API_KEY= DD_LLMOBS_ENABLED=1 DD_LLMOBS_ML_APP= ddtrace-run python .py + ``` + +**Note**: If the Agent is running on a custom host or port, set `DD_AGENT_HOST` and `DD_TRACE_AGENT_PORT` accordingly. + +##### If you are running LLM Observability in a serverless environment (AWS Lambda): +1. Install the **Datadog-Python** and **Datadog-Extension** Lambda layers as part of your AWS Lambda setup. +2. Enable LLM Observability by setting the following environment variables: + + ```shell + DD_SITE= DD_API_KEY= DD_LLMOBS_ENABLED=1 DD_LLMOBS_ML_APP= + ``` + +**Note**: In serverless environments, Datadog automatically flushes spans at the end of the Lambda function. + +##### Automatic Anthropic tracing + +The Anthropic integration allows for automatic tracing of chat message calls made by the Anthropic Python SDK, capturing latency, errors, input/output messages, and token usage during Anthropic operations. + +The following methods are traced for both synchronous and asynchronous Anthropic operations: +- Chat messages (including streamed calls): `Anthropic().messages.create()`, `AsyncAnthropic().messages.create()` +- Streamed chat messages: `Anthropic().messages.stream()`, `AsyncAnthropic().messages.stream()` + +No additional setup is required for these methods. + +##### Validation + +Validate that LLM Observability is properly capturing spans by checking your application logs for successful span creation. You can also run the following command to check the status of the `dd-trace` integration: + + ```shell + ddtrace-run --info + ``` + +Look for the following message to confirm the setup: + + ```shell + Agent error: None + ``` + +##### Debugging + +If you encounter issues during setup, enable debug logging by passing the `--debug` flag: + + ```shell + ddtrace-run --debug + ``` + +This displays any errors related to data transmission or instrumentation, including issues with Anthropic traces. + +## Data Collected + +### Metrics + +The Anthropic integration does not include any custom metrics. + +### Service Checks + +The Anthropic integration does not include any service checks. + +### Events + +The Anthropic integration does not include any events. + +## Troubleshooting + +Need help? Contact [Datadog support][2]. + +[1]: https://docs.datadoghq.com/integrations/anthropic/ +[2]: https://docs.datadoghq.com/help/ + diff --git a/anthropic/assets/dashboards/llm_observability_overview_dashboard.json b/anthropic/assets/dashboards/llm_observability_overview_dashboard.json new file mode 100644 index 0000000000000..ca4edc0635d0c --- /dev/null +++ b/anthropic/assets/dashboards/llm_observability_overview_dashboard.json @@ -0,0 +1 @@ +{"title":"LLM Observability Operational Insights Overview","description":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/llm_observability/setup/sdk/python/) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","widgets":[{"id":3444086504619146,"definition":{"title":"","banner_img":"/static/images/integration_dashboard/llm-observability_hero-1.jpeg","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":2984486576169188,"definition":{"type":"note","content":"Track LLM model usage, cost, latency, and performance of your LLM applications.\n\n[LLM application traces ↗](/llm/traces) | [Docs ↗](https://docs.datadoghq.com/tracing/llm_observability/) | [Instrument your LLM application ↗](https://docs.datadoghq.com/tracing/llm_observability/trace_an_llm_application/)\n\nEmpty dashboard? Start by annotating your LLM application’s LLM call: [Python SDK doc ↗](https://docs.datadoghq.com/tracing/llm_observability/sdk/#annotating-a-span) | [API doc ↗](https://docs.datadoghq.com/tracing/llm_observability/api/?tab=model#spans-api)\n\nFor insights into LLM chains and spans, go to [LLM Observability LLM Chain Insights](/dash/integration/llm_chain_insights) Dashboard \n\nFor insights into out-of-the-box, user sessions, and custom evaluations, go to [LLM Observability Evaluation](/dash/integration/llm_evaluations) Dashboard ","background_color":"white","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":3}}]},"layout":{"x":0,"y":0,"width":6,"height":6}},{"id":2291155106556328,"definition":{"title":"Monitors","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6165725382794250,"definition":{"type":"note","content":"Create an LLM Observability recommended monitor [here](llm/traces?llmPanels=%5B%7B\"t\"%3A\"monitorPanel\"%7D%5D)!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"bottom","has_padding":true},"layout":{"x":0,"y":0,"width":6,"height":1}},{"id":7575597858473386,"definition":{"title":"LLM Observability Monitors","type":"manage_status","display_format":"countsAndList","color_preference":"background","hide_zero_counts":true,"show_status":true,"last_triggered_format":"relative","query":"metric:ml_obs.trace.error $env $service","sort":"status,asc","count":50,"start":0,"summary_type":"monitors","show_priority":false,"show_last_triggered":false},"layout":{"x":0,"y":1,"width":6,"height":4}}]},"layout":{"x":6,"y":0,"width":6,"height":6}},{"id":1310295051024350,"definition":{"type":"note","content":"Tip: select an **ml_app** and **version** at the top of the dashboard for more granular querying and better context links!","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":false,"tick_pos":"50%","tick_edge":"right","has_padding":true},"layout":{"x":0,"y":0,"width":12,"height":1}},{"id":4149521700572012,"definition":{"title":"Overview","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6428476651943710,"definition":{"title":"Active ML Apps with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":0,"width":6,"height":3}},{"id":5975795986461220,"definition":{"title":"Active Versions with LLM Calls","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query7"}],"queries":[{"name":"query7","data_source":"metrics","query":"sum:ml_obs.span{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {version}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_by":"values","line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":0,"width":6,"height":3}},{"id":3198293231078300,"definition":{"title":"Trace Success Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"query1 / query2"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0.95,"palette":"black_on_light_green"},{"comparator":">=","value":0.5,"palette":"black_on_light_yellow"},{"comparator":"<=","value":0.5,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:ok {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":3,"width":4,"height":3}},{"id":246716173430280,"definition":{"title":"Total Number of Traces","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":3,"width":4,"height":3}},{"id":4055455815898662,"definition":{"title":"Total Number of Spans","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"spans"}},"formula":"query7"}],"queries":[{"data_source":"llm_observability","name":"query7","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service $ml_app @meta.model_provider:$model_provider.value"}}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":8,"y":3,"width":4,"height":3}},{"id":2941224801013446,"definition":{"title":"Trace Error Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"fraction"}},"formula":"1 - (query1 / query2)"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @status:ok"}},{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"cardinality","metric":"@trace_id"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","conditional_formats":[{"comparator":">","value":0,"palette":"black_on_light_red"}]}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"yaxis":{"include_zero":true},"type":"area"}},"layout":{"x":0,"y":6,"width":4,"height":4}},{"id":327704243712338,"definition":{"title":"Completion Token Generation Rate","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/sec"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.duration{$ml_app,$version,$env,$service,$model_provider}.as_count()","aggregator":"sum"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2},"layout":{"x":4,"y":6,"width":4,"height":4}},{"id":8807433954813968,"definition":{"title":"Time To First Token","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.time_to_first_token{$ml_app,$version,$env,$service, $model_provider}","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":8,"y":6,"width":4,"height":3}},{"id":4628057133288278,"definition":{"type":"note","content":"[Annotate](https://docs.datadoghq.com/llm_observability/trace_an_llm_application/?tab=decorators#annotating-spans) your root span with `time_to_first_token` in seconds to populate the widget above.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"center","show_tick":true,"tick_pos":"50%","tick_edge":"top","has_padding":true},"layout":{"x":8,"y":9,"width":4,"height":1}}]},"layout":{"x":0,"y":7,"width":12,"height":11}},{"id":6925805930371330,"definition":{"title":"LLM Calls","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":7368854359822672,"definition":{"type":"note","content":"Break down your **LLM spans**, which represent an invocation call to an LLM, by model and model provider, and track tokens and errors across these calls.","background_color":"blue","font_size":"14","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":3,"height":2}},{"id":7403444364106132,"definition":{"title":"Total LLM Requests","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"yaxis":{},"type":"bars"}},"layout":{"x":3,"y":0,"width":3,"height":2}},{"id":5240968989960074,"definition":{"title":"LLM Call Response Time (p50)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{$env,$service,$version,$ml_app, $model_provider, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":6,"y":0,"width":3,"height":2}},{"id":3482460852692924,"definition":{"title":"LLM Call Response Time (p95)","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"}],"queries":[{"aggregator":"percentile","data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$env,$service,$version,$model_provider, $ml_app, span_kind:llm}"}],"response_format":"scalar"}],"autoscale":true,"precision":2,"timeseries_background":{"type":"area"}},"layout":{"x":9,"y":0,"width":3,"height":2}},{"id":3151345683848596,"definition":{"title":"Model Usage","title_size":"16","title_align":"left","requests":[{"formulas":[{"formula":"query2"}],"queries":[{"data_source":"llm_observability","name":"query2","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.model_provider","limit":10,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@meta.model_name","limit":10,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span $ml_app $version $env $service @meta.model_provider:$model_provider.value @meta.span.kind:llm"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","hide_total":false,"legend":{"type":"table"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":2,"width":12,"height":4}},{"id":2688839849605810,"definition":{"title":"LLM Span Error Rate","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":6,"width":6,"height":3}},{"id":8491486279677864,"definition":{"title":"Total Token Usage by Prompt & Completion","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Prompt Tokens","formula":"query1"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Completion Tokens","formula":"query2"},{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens"}},"alias":"Total Tokens","formula":"query3"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"},{"data_source":"metrics","name":"query3","query":"sum:ml_obs.span.llm.total.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":6,"width":6,"height":3}},{"id":523785459773502,"definition":{"title":"LLM Span Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.error{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{span_kind:llm,$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"0 < y < 0.05","display_type":"ok dashed"},{"value":"0.05 < y < 0.2","display_type":"warning dashed"},{"value":"0.2 < y < 1","display_type":"error dashed"}],"custom_links":[{"label":"View error traces in LLM Observability","link":"/llm/traces?query=@status:error {{$version}} {{$ml_app}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":9,"width":6,"height":3}},{"id":4007270851222420,"definition":{"title":"Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"avg:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"avg:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider} by {model_name,model_provider}","aggregator":"avg"}],"response_format":"scalar","text_formats":[[],[{"match":{"type":"is","value":""},"palette":"white_on_green"}]],"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]},"formulas":[{"cell_display_mode":"trend","alias":"Prompt Tokens ","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"formula":"query1"},{"cell_display_mode":"trend","cell_display_mode_options":{"trend_type":"area","y_scale":"independent"},"alias":"Completion Tokens","formula":"query2"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":9,"width":6,"height":3}},{"id":3075505179106388,"definition":{"title":"LLM Call Response Time","title_size":"16","title_align":"left","type":"query_table","requests":[{"queries":[{"data_source":"metrics","name":"query1","query":"p50:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query2","query":"p75:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query3","query":"p95:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"},{"data_source":"metrics","name":"query5","query":"p99:ml_obs.span.duration{span_kind:llm,$ml_app,$env,$service,$version,$model_provider} by {model_name,model_provider}","aggregator":"percentile"}],"response_format":"scalar","sort":{"count":500,"order_by":[{"type":"formula","index":3,"order":"desc"}]},"formulas":[{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p50","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query1"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p75","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query2"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p95","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query3"},{"conditional_formats":[{"comparator":">","value":3,"palette":"black_on_light_red"},{"comparator":">","value":2,"palette":"black_on_light_yellow"},{"comparator":">","value":0,"palette":"black_on_light_green"}],"cell_display_mode":"bar","alias":"p99","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}},"formula":"query5"}]}],"has_search_bar":"auto","custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@event_type:span @parent_id:* @{{$ml_app}} @meta.{{model_provider}} {{$version}} @meta.{{model_name}} &start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":12,"width":6,"height":3}},{"id":3835649873165936,"definition":{"title":"Avg Input Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.input.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related spans in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":6,"y":12,"width":3,"height":3}},{"id":3960916621633432,"definition":{"title":"Avg Output Tokens per LLM Call","title_size":"16","title_align":"left","type":"query_value","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"tokens/call"}},"formula":"query1 / query2"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:ml_obs.span.llm.output.tokens{$ml_app,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"},{"data_source":"metrics","name":"query2","query":"sum:ml_obs.span{$ml_app,span_kind:llm,$version,$env,$service, $model_provider}.as_count()","aggregator":"avg"}],"response_format":"scalar"}],"autoscale":true,"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type%3Aspan%20@parent_id%3A*%20@meta.span.kind%3Allm&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}],"precision":2},"layout":{"x":9,"y":12,"width":3,"height":3}},{"id":8614866922979184,"definition":{"title":"LLM Call Response Time by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p95","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p95:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":15,"width":6,"height":3}},{"id":8678869962893096,"definition":{"title":"LLM Call Response Time by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"seconds"}},"alias":"p99","formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"p99:ml_obs.span.duration{$ml_app,$version,span_kind:llm,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":6,"y":15,"width":6,"height":3}}]},"layout":{"x":0,"y":18,"width":12,"height":19,"is_column_break":true}},{"id":3126135057967266,"definition":{"title":"Traces","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":8485748560763588,"definition":{"type":"note","content":"A **trace** represents the entire execution flow of your LLM Application, spanning from the moment a request is first received to when your agent responds to a user. Trace metrics provide insight into your LLM application’s performance end-to-end.\n\nYou can view detailed breakdown of your LLM chains and all of your spans in [LLM Observability LLM Chain Analytics]()\n","background_color":"blue","font_size":"16","text_align":"left","vertical_align":"top","show_tick":false,"tick_pos":"50%","tick_edge":"left","has_padding":true},"layout":{"x":0,"y":0,"width":5,"height":3}},{"id":6387614402128510,"definition":{"title":"Total Traces by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"traces"}},"formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":0,"width":7,"height":3}},{"id":2819887241075640,"definition":{"title":"P95 Trace Execution Time by Span Kind","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":10,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service $model_provider"}}],"response_format":"scalar","style":{"palette":"datadog16"},"sort":{"count":500,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"type":"sunburst","legend":{"type":"inline"},"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} {{$version}} @meta.span.kind:{{@meta.span.kind.value}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":0,"y":3,"width":2,"height":3}},{"id":2413005551640098,"definition":{"title":"Trace Error Rate by ML App","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Error Rate","number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * (query1 / query2)"}],"queries":[{"name":"query1","data_source":"metrics","query":"sum:ml_obs.trace.error{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"},{"name":"query2","data_source":"metrics","query":"sum:ml_obs.trace{$ml_app,$version,$env,$service, $model_provider} by {ml_app}.as_count()"}],"response_format":"timeseries","style":{"palette":"red","line_type":"solid","line_width":"normal"},"display_type":"line"}],"markers":[{"value":"y > 20","display_type":"error dashed"},{"value":"5 < y < 20","display_type":"warning dashed"},{"value":"0 < y < 5","display_type":"ok dashed"},{"value":"y = 0","display_type":"error dashed"}],"custom_links":[{"label":"View related LLM Observability spans","link":"/llm/traces?query={{$ml_app}} @status:error {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":2,"y":3,"width":3,"height":3}},{"id":3306729246543562,"definition":{"title":"Trace Duration (p50, p75, p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"},{"alias":"p75","formula":"query1"},{"alias":"p50","formula":"query2"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query1","data_source":"metrics","query":"p75:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"},{"name":"query2","data_source":"metrics","query":"p50:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}}%20{{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":3,"width":7,"height":3}},{"id":2208916880196188,"definition":{"title":"Trace Execution Time by Span (p95)","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"pc95","metric":"@duration"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"pc95","metric":"@duration"}}],"search":{"query":"@event_type:span -@parent_id:undefined $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"nanosecond"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"datadog16"}},"layout":{"x":0,"y":6,"width":5,"height":3}},{"id":3301474291795902,"definition":{"title":"Trace Total Duration by ML App (p95)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p95:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":6,"width":7,"height":3}},{"id":5506235694494168,"definition":{"title":"Spans with Errors by ML App","type":"toplist","requests":[{"queries":[{"data_source":"llm_observability","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[{"facet":"@meta.span.kind","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@name","limit":15,"sort":{"order":"desc","aggregation":"count"}},{"facet":"@ml_app","limit":15,"sort":{"order":"desc","aggregation":"count"}}],"search":{"query":"@event_type:span -@parent_id:undefined @status:error $ml_app $version $env $service @meta.model_provider:$model_provider.value"}}],"response_format":"scalar","formulas":[{"number_format":{"unit":{"type":"custom_unit_label","label":"errors"}},"formula":"query1"}],"sort":{"count":3375,"order_by":[{"type":"formula","index":0,"order":"desc"}]}}],"custom_links":[{"label":"View related LLM Observability Spans","link":"/llm/traces?query=@type:span @parent_id:* {{$ml_app}} @meta.span.kind:{{@meta.span.kind.value}} @name:{{@name.value}} {{$version}}&start={{timestamp_start}}&end={{timestamp_end}}&paused=false"}],"style":{"display":{"type":"stacked","legend":"inline"},"palette":"red"}},"layout":{"x":0,"y":9,"width":5,"height":3}},{"id":8691399239758710,"definition":{"title":"Trace Total Duration by ML App (p99)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p99","formula":"query5"}],"queries":[{"name":"query5","data_source":"metrics","query":"p99:ml_obs.trace.duration{$ml_app,$version,$env,$service, $model_provider} by {ml_app}","aggregator":"percentile"}],"response_format":"timeseries","style":{"palette":"datadog16","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"custom_links":[{"label":"View related traces in LLM Observability","link":"/llm/traces?query={{$ml_app}} {{$version}}&start={{timestamp_widget_start}}&end={{timestamp_widget_end}}&paused=false"}]},"layout":{"x":5,"y":9,"width":7,"height":3}}]},"layout":{"x":0,"y":37,"width":12,"height":13}}],"template_variables":[{"name":"ml_app","prefix":"ml_app","available_values":[],"default":"*"},{"name":"version","prefix":"version","available_values":[],"default":"*"},{"name":"env","prefix":"env","available_values":[],"default":"*"},{"name":"service","prefix":"service","available_values":[],"default":"*"},{"name":"model_provider","prefix":"model_provider","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"fixed"} diff --git a/anthropic/assets/service_checks.json b/anthropic/assets/service_checks.json new file mode 100644 index 0000000000000..fe51488c7066f --- /dev/null +++ b/anthropic/assets/service_checks.json @@ -0,0 +1 @@ +[] diff --git a/anthropic/manifest.json b/anthropic/manifest.json new file mode 100644 index 0000000000000..d6beb394485f6 --- /dev/null +++ b/anthropic/manifest.json @@ -0,0 +1,46 @@ +{ + "manifest_version": "2.0.0", + "app_uuid": "53fe7c3e-57eb-42ca-8e43-ec92c04b6160", + "app_id": "anthropic", + "display_on_public_website": true, + "tile": { + "overview": "README.md#Overview", + "configuration": "README.md#Setup", + "support": "README.md#Support", + "changelog": "CHANGELOG.md", + "description": "Monitor Anthropic usage and health at the application level", + "title": "Anthropic", + "media": [], + "classifier_tags": [ + "Category::AI/ML", + "Category::Metrics", + "Submitted Data Type::Traces", + "Supported OS::Linux", + "Supported OS::Windows", + "Supported OS::macOS", + "Offering::Integration" + ] + }, + "assets": { + "integration": { + "auto_install": false, + "source_type_id": 31102434, + "source_type_name": "Anthropic", + "events": { + "creates_events": false + }, + "service_checks": { + "metadata_path": "assets/service_checks.json" + } + }, + "dashboards": { + "LLM Observability Overview Dashboard": "assets/dashboards/llm_observability_overview_dashboard.json" + } + }, + "author": { + "support_email": "help@datadoghq.com", + "name": "Datadog", + "homepage": "https://www.datadoghq.com", + "sales_email": "info@datadoghq.com" + } +}