aws-observability · yiyuan-he · Nov 1, 2024 · Oct 28, 2024 · Oct 29, 2024 · Oct 29, 2024
@@ -47,6 +47,12 @@ export class AwsSpanProcessingUtil {
   // TODO: Use Semantic Conventions once upgraded
   static GEN_AI_REQUEST_MODEL: string = 'gen_ai.request.model';
   static GEN_AI_SYSTEM: string = 'gen_ai.system';
+  static GEN_AI_REQUEST_MAX_TOKENS: string = 'gen_ai.request.max_tokens';
+  static GEN_AI_REQUEST_TEMPERATURE: string = 'gen_ai.request.temperature';
+  static GEN_AI_REQUEST_TOP_P: string = 'gen_ai.request.top_p';
+  static GEN_AI_RESPONSE_FINISH_REASONS: string = 'gen_ai.response.finish_reasons';
+  static GEN_AI_USAGE_INPUT_TOKENS: string = 'gen_ai.usage.input_tokens';
+  static GEN_AI_USAGE_OUTPUT_TOKENS: string = 'gen_ai.usage.output_tokens';
 
   static getDialectKeywords(): string[] {
     return SQL_DIALECT_KEYWORDS_JSON.keywords;

@@ -211,11 +211,173 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
       spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL] = modelId;
     }
 
+    if (request.commandInput?.body) {
+      const requestBody = JSON.parse(request.commandInput.body);
+      if (modelId.includes('amazon.titan')) {
+        if (requestBody.textGenerationConfig?.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] =
+            requestBody.textGenerationConfig.temperature;
+        }
+        if (requestBody.textGenerationConfig?.topP !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.textGenerationConfig.topP;
+        }
+        if (requestBody.textGenerationConfig?.maxTokenCount !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] =
+            requestBody.textGenerationConfig.maxTokenCount;
+        }
+      } else if (modelId.includes('anthropic.claude')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('meta.llama')) {
+        if (requestBody.max_gen_len !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_gen_len;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('cohere.command')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
+        }
+      } else if (modelId.includes('ai21.jamba')) {
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      } else if (modelId.includes('mistral.mistral')) {
+        if (requestBody.prompt !== undefined) {
+          // NOTE: We approximate the token count since this value is not directly available in the body
+          // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+          // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
+        }
+        if (requestBody.max_tokens !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
+        }
+        if (requestBody.temperature !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
+        }
+        if (requestBody.top_p !== undefined) {
+          spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
+        }
+      }
+    }
+
     return {
       isIncoming,
       spanAttributes,
       spanKind,
       spanName,
     };
   }
+
+  responseHook(response: NormalizedResponse, span: Span, tracer: Tracer, config: AwsSdkInstrumentationConfig): void {
+    const currentModelId = response.request.commandInput?.modelId;
+    if (response.data?.body) {
+      const decodedResponseBody = new TextDecoder().decode(response.data.body);
+      const responseBody = JSON.parse(decodedResponseBody);
+      if (currentModelId.includes('amazon.titan')) {
+        if (responseBody.inputTextTokenCount !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.inputTextTokenCount);
+        }
+        if (responseBody.results?.[0]?.tokenCount !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.results[0].tokenCount);
+        }
+        if (responseBody.results?.[0]?.completionReason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.results[0].completionReason,
+          ]);
+        }
+      } else if (currentModelId.includes('anthropic.claude')) {
+        if (responseBody.usage?.input_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.input_tokens);
+        }
+        if (responseBody.usage?.output_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.output_tokens);
+        }
+        if (responseBody.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
+        }
+      } else if (currentModelId.includes('meta.llama')) {
+        if (responseBody.prompt_token_count !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.prompt_token_count);
+        }
+        if (responseBody.generation_token_count !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.generation_token_count);
+        }
+        if (responseBody.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
+        }
+      } else if (currentModelId.includes('cohere.command')) {
+        if (responseBody.prompt !== undefined) {
+          // NOTE: We approximate the token count since this value is not directly available in the body
+          // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+          // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
+        }
+        if (responseBody.generations?.[0]?.text !== undefined) {
+          span.setAttribute(
+            AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
+            // NOTE: We approximate the token count since this value is not directly available in the body
+            // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+            // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+            Math.ceil(responseBody.generations[0].text.length / 6)
+          );
+        }
+        if (responseBody.generations?.[0]?.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.generations[0].finish_reason,
+          ]);
+        }
+      } else if (currentModelId.includes('ai21.jamba')) {
+        if (responseBody.usage?.prompt_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.prompt_tokens);
+        }
+        if (responseBody.usage?.completion_tokens !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.completion_tokens);
+        }
+        if (responseBody.choices?.[0]?.finish_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.choices[0].finish_reason,
+          ]);
+        }
+      } else if (currentModelId.includes('mistral.mistral')) {
+        if (responseBody.outputs?.[0]?.text !== undefined) {
+          span.setAttribute(
+            AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
+            // NOTE: We approximate the token count since this value is not directly available in the body
+            // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
+            // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+            Math.ceil(responseBody.outputs[0].text.length / 6)
+          );
+        }
+        if (responseBody.outputs?.[0]?.stop_reason !== undefined) {
+          span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
+            responseBody.outputs[0].stop_reason,
+          ]);
+        }
+      }
+    }
+  }
 }