From d305721e166e08af884ca6068a89b8eef8bb1b84 Mon Sep 17 00:00:00 2001
From: Jeel-mehta <72543735+Jeel-mehta@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:05:51 -0800
Subject: [PATCH] Gen-AI python implementation (#290)

*Issue #, if available:*

*Description of changes:*


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

---------

Co-authored-by: Jeel Mehta <jeelmm@amazon.com>
Co-authored-by: Michael He <53622546+yiyuan-he@users.noreply.github.com>
Co-authored-by: Min Xia <mxiamxia@gmail.com>
---
 .../distro/_aws_span_processing_util.py       |   6 +
 .../distro/patches/_bedrock_patches.py        | 188 ++++++++++++++-
 .../distro/test_instrumentation_patch.py      | 222 +++++++++++++++++-
 3 files changed, 404 insertions(+), 12 deletions(-)

diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py
index 082c2de5c..24aaa68dc 100644
--- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py
+++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py
@@ -29,6 +29,12 @@
 # TODO: Use Semantic Conventions once upgrade to 0.47b0
 GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model"
 GEN_AI_SYSTEM: str = "gen_ai.system"
+GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens"
+GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature"
+GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p"
+GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons"
+GEN_AI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens"
+GEN_AI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens"
 
 
 # Get dialect keywords retrieved from dialect_keywords.json file.
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
index 581ca36f4..4a6eb10f5 100644
--- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
+++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -2,7 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 import abc
 import inspect
-from typing import Dict, Optional
+import io
+import json
+import logging
+import math
+from typing import Any, Dict, Optional
+
+from botocore.response import StreamingBody
 
 from amazon.opentelemetry.distro._aws_attribute_keys import (
     AWS_BEDROCK_AGENT_ID,
@@ -11,7 +17,16 @@
     AWS_BEDROCK_GUARDRAIL_ID,
     AWS_BEDROCK_KNOWLEDGE_BASE_ID,
 )
-from amazon.opentelemetry.distro._aws_span_processing_util import GEN_AI_REQUEST_MODEL, GEN_AI_SYSTEM
+from amazon.opentelemetry.distro._aws_span_processing_util import (
+    GEN_AI_REQUEST_MAX_TOKENS,
+    GEN_AI_REQUEST_MODEL,
+    GEN_AI_REQUEST_TEMPERATURE,
+    GEN_AI_REQUEST_TOP_P,
+    GEN_AI_RESPONSE_FINISH_REASONS,
+    GEN_AI_SYSTEM,
+    GEN_AI_USAGE_INPUT_TOKENS,
+    GEN_AI_USAGE_OUTPUT_TOKENS,
+)
 from opentelemetry.instrumentation.botocore.extensions.types import (
     _AttributeMapT,
     _AwsSdkCallContext,
@@ -28,6 +43,10 @@
 _MODEL_ID: str = "modelId"
 _AWS_BEDROCK_SYSTEM: str = "aws_bedrock"
 
+_logger = logging.getLogger(__name__)
+# Set logger level to DEBUG
+_logger.setLevel(logging.DEBUG)
+
 
 class _BedrockAgentOperation(abc.ABC):
     """
@@ -240,3 +259,168 @@ def extract_attributes(self, attributes: _AttributeMapT):
         model_id = self._call_context.params.get(_MODEL_ID)
         if model_id:
             attributes[GEN_AI_REQUEST_MODEL] = model_id
+
+            # Get the request body if it exists
+            body = self._call_context.params.get("body")
+            if body:
+                try:
+                    request_body = json.loads(body)
+
+                    if "amazon.titan" in model_id:
+                        self._extract_titan_attributes(attributes, request_body)
+                    elif "anthropic.claude" in model_id:
+                        self._extract_claude_attributes(attributes, request_body)
+                    elif "meta.llama" in model_id:
+                        self._extract_llama_attributes(attributes, request_body)
+                    elif "cohere.command" in model_id:
+                        self._extract_cohere_attributes(attributes, request_body)
+                    elif "ai21.jamba" in model_id:
+                        self._extract_ai21_attributes(attributes, request_body)
+                    elif "mistral" in model_id:
+                        self._extract_mistral_attributes(attributes, request_body)
+
+                except json.JSONDecodeError:
+                    _logger.debug("Error: Unable to parse the body as JSON")
+
+    def _extract_titan_attributes(self, attributes, request_body):
+        config = request_body.get("textGenerationConfig", {})
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount"))
+
+    def _extract_claude_attributes(self, attributes, request_body):
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
+
+    def _extract_cohere_attributes(self, attributes, request_body):
+        prompt = request_body.get("message")
+        if prompt:
+            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p"))
+
+    def _extract_ai21_attributes(self, attributes, request_body):
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
+
+    def _extract_llama_attributes(self, attributes, request_body):
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
+
+    def _extract_mistral_attributes(self, attributes, request_body):
+        prompt = request_body.get("prompt")
+        if prompt:
+            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
+
+    @staticmethod
+    def _set_if_not_none(attributes, key, value):
+        if value is not None:
+            attributes[key] = value
+
+    def on_success(self, span: Span, result: Dict[str, Any]):
+        model_id = self._call_context.params.get(_MODEL_ID)
+
+        if not model_id:
+            return
+
+        if "body" in result and isinstance(result["body"], StreamingBody):
+            original_body = None
+            try:
+                original_body = result["body"]
+                body_content = original_body.read()
+
+                # Use one stream for telemetry
+                stream = io.BytesIO(body_content)
+                telemetry_content = stream.read()
+                response_body = json.loads(telemetry_content.decode("utf-8"))
+                if "amazon.titan" in model_id:
+                    self._handle_amazon_titan_response(span, response_body)
+                elif "anthropic.claude" in model_id:
+                    self._handle_anthropic_claude_response(span, response_body)
+                elif "meta.llama" in model_id:
+                    self._handle_meta_llama_response(span, response_body)
+                elif "cohere.command" in model_id:
+                    self._handle_cohere_command_response(span, response_body)
+                elif "ai21.jamba" in model_id:
+                    self._handle_ai21_jamba_response(span, response_body)
+                elif "mistral" in model_id:
+                    self._handle_mistral_mistral_response(span, response_body)
+                # Replenish stream for downstream application use
+                new_stream = io.BytesIO(body_content)
+                result["body"] = StreamingBody(new_stream, len(body_content))
+
+            except json.JSONDecodeError:
+                _logger.debug("Error: Unable to parse the response body as JSON")
+            except Exception as e:  # pylint: disable=broad-exception-caught, invalid-name
+                _logger.debug("Error processing response: %s", e)
+            finally:
+                if original_body is not None:
+                    original_body.close()
+
+    # pylint: disable=no-self-use
+    def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
+        if "inputTextTokenCount" in response_body:
+            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"])
+            if "results" in response_body and response_body["results"]:
+                result = response_body["results"][0]
+                if "tokenCount" in result:
+                    span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"])
+                if "completionReason" in result:
+                    span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]])
+
+    # pylint: disable=no-self-use
+    def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
+        if "usage" in response_body:
+            usage = response_body["usage"]
+            if "input_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"])
+            if "output_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"])
+        if "stop_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
+
+    # pylint: disable=no-self-use
+    def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
+        # Output tokens: Approximate from the response text
+        if "text" in response_body:
+            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6))
+        if "finish_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]])
+
+    # pylint: disable=no-self-use
+    def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
+        if "usage" in response_body:
+            usage = response_body["usage"]
+            if "prompt_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"])
+            if "completion_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"])
+        if "choices" in response_body:
+            choices = response_body["choices"][0]
+            if "finish_reason" in choices:
+                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]])
+
+    # pylint: disable=no-self-use
+    def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
+        if "prompt_token_count" in response_body:
+            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"])
+        if "generation_token_count" in response_body:
+            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"])
+        if "stop_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
+
+    # pylint: disable=no-self-use
+    def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
+        if "outputs" in response_body:
+            outputs = response_body["outputs"][0]
+            if "text" in outputs:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6))
+        if "stop_reason" in outputs:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
index b27d5e799..86c6bc39f 100644
--- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
+++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
@@ -1,12 +1,16 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+import json
+import math
 import os
+from io import BytesIO
 from typing import Any, Dict
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
 
 import gevent.monkey
 import pkg_resources
+from botocore.response import StreamingBody
 
 from amazon.opentelemetry.distro.patches._instrumentation_patch import (
     AWS_GEVENT_PATCH_MODULES,
@@ -173,7 +177,7 @@ def _test_unpatched_gevent_instrumentation(self):
         self.assertFalse(gevent.monkey.is_module_patched("queue"), "gevent queue module has been patched")
         self.assertFalse(gevent.monkey.is_module_patched("contextvars"), "gevent contextvars module has been patched")
 
-    # pylint: disable=too-many-statements
+    # pylint: disable=too-many-statements, too-many-locals
     def _test_patched_botocore_instrumentation(self):
         # Kinesis
         self.assertTrue("kinesis" in _KNOWN_EXTENSIONS)
@@ -211,12 +215,209 @@ def _test_patched_botocore_instrumentation(self):
         bedrock_agent_runtime_sucess_attributes: Dict[str, str] = _do_on_success_bedrock("bedrock-agent-runtime")
         self.assertEqual(len(bedrock_agent_runtime_sucess_attributes), 0)
 
-        # BedrockRuntime
+        # BedrockRuntime - Amazon Titan Models
         self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
-        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock("bedrock-runtime")
-        self.assertEqual(len(bedrock_runtime_attributes), 2)
+        request_body = {
+            "textGenerationConfig": {
+                "maxTokenCount": 512,
+                "temperature": 0.9,
+                "topP": 0.75,
+            }
+        }
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="amazon.titan", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
         self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
-        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], _GEN_AI_REQUEST_MODEL)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "amazon.titan")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.9)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
+        response_body = {
+            "inputTextTokenCount": 123,
+            "results": [
+                {
+                    "tokenCount": 456,
+                    "outputText": "testing",
+                    "completionReason": "FINISH",
+                }
+            ],
+        }
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="amazon.titan", streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 123)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 456)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["FINISH"])
+
+        # BedrockRuntime - Anthropic Claude Models
+
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p": 0.999,
+        }
+
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="anthropic.claude", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "anthropic.claude")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.999)
+        response_body = {
+            "stop_reason": "end_turn",
+            "stop_sequence": None,
+            "usage": {"input_tokens": 23, "output_tokens": 36},
+        }
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="anthropic.claude", streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 23)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["end_turn"])
+
+        # BedrockRuntime - Cohere Command Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "message": "Hello, world",
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "p": 0.75,
+        }
+
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="cohere.command", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 6)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "cohere.command")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
+        self.assertEqual(
+            bedrock_runtime_attributes["gen_ai.usage.input_tokens"], math.ceil(len(request_body["message"]) / 6)
+        )
+        response_body = {
+            "text": "Goodbye, world",
+            "finish_reason": "COMPLETE",
+        }
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="cohere.command", streaming_body=streaming_body
+        )
+        self.assertEqual(
+            bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], math.ceil(len(response_body["text"]) / 6)
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["COMPLETE"])
+
+        # BedrockRuntime - AI21 Jamba Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p": 0.9,
+        }
+
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="ai21.jamba", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "ai21.jamba")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        response_body = {
+            "choices": [{"finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 24, "completion_tokens": 31, "total_tokens": 55},
+        }
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="ai21.jamba", streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 24)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 31)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
+
+        # BedrockRuntime - Meta LLama Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "max_gen_len": 512,
+            "temperature": 0.5,
+            "top_p": 0.9,
+        }
+
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="meta.llama", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "meta.llama")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        response_body = {"prompt_token_count": 31, "generation_token_count": 36, "stop_reason": "stop"}
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="meta.llama", streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 31)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
+
+        # BedrockRuntime - Mistral Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        msg = "Hello, World"
+        formatted_prompt = f"<s>[INST] {msg} [/INST]"
+        request_body = {
+            "prompt": formatted_prompt,
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p": 0.9,
+        }
+
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", model_id="mistral", request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 6)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "mistral")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        self.assertEqual(
+            bedrock_runtime_attributes["gen_ai.usage.input_tokens"], math.ceil(len(request_body["prompt"]) / 6)
+        )
+        response_body = {"outputs": [{"text": "Goodbye, World", "stop_reason": "stop"}]}
+        json_bytes = json.dumps(response_body).encode("utf-8")
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime", model_id="mistral", streaming_body=streaming_body
+        )
+
+        self.assertEqual(
+            bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"],
+            math.ceil(len(response_body["outputs"][0]["text"]) / 6),
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
 
         # SecretsManager
         self.assertTrue("secretsmanager" in _KNOWN_EXTENSIONS)
@@ -385,26 +586,27 @@ def _do_extract_sqs_attributes() -> Dict[str, str]:
     return _do_extract_attributes(service_name, params)
 
 
-def _do_extract_attributes_bedrock(service, operation=None) -> Dict[str, str]:
+def _do_extract_attributes_bedrock(service, operation=None, model_id=None, request_body=None) -> Dict[str, str]:
     params: Dict[str, Any] = {
         "agentId": _BEDROCK_AGENT_ID,
         "dataSourceId": _BEDROCK_DATASOURCE_ID,
         "knowledgeBaseId": _BEDROCK_KNOWLEDGEBASE_ID,
         "guardrailId": _BEDROCK_GUARDRAIL_ID,
-        "modelId": _GEN_AI_REQUEST_MODEL,
+        "modelId": model_id,
+        "body": request_body,
     }
     return _do_extract_attributes(service, params, operation)
 
 
-def _do_on_success_bedrock(service, operation=None) -> Dict[str, str]:
+def _do_on_success_bedrock(service, operation=None, model_id=None, streaming_body=None) -> Dict[str, str]:
     result: Dict[str, Any] = {
         "agentId": _BEDROCK_AGENT_ID,
         "dataSourceId": _BEDROCK_DATASOURCE_ID,
         "knowledgeBaseId": _BEDROCK_KNOWLEDGEBASE_ID,
         "guardrailId": _BEDROCK_GUARDRAIL_ID,
-        "modelId": _GEN_AI_REQUEST_MODEL,
+        "body": streaming_body,
     }
-    return _do_on_success(service, result, operation)
+    return _do_on_success(service, result, operation, params={"modelId": model_id})
 
 
 def _do_extract_secretsmanager_attributes() -> Dict[str, str]: