diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts b/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts index 69a3a28..2ac562d 100644 --- a/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts +++ b/aws-distro-opentelemetry-node-autoinstrumentation/src/patches/aws/services/bedrock.ts @@ -245,6 +245,22 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { if (requestBody.top_p !== undefined) { spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p; } + } else if (modelId.includes('cohere.command-r')) { + if (requestBody.max_tokens !== undefined) { + spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; + } + if (requestBody.temperature !== undefined) { + spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature; + } + if (requestBody.p !== undefined) { + spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p; + } + if (requestBody.message !== undefined) { + // NOTE: We approximate the token count since this value is not directly available in the body + // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. + // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html + spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.message.length / 6); + } } else if (modelId.includes('cohere.command')) { if (requestBody.max_tokens !== undefined) { spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; @@ -255,6 +271,9 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { if (requestBody.p !== undefined) { spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p; } + if (requestBody.prompt !== undefined) { + spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6); + } } else if (modelId.includes('ai21.jamba')) { if (requestBody.max_tokens !== undefined) { spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; @@ -265,7 +284,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { if (requestBody.top_p !== undefined) { spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p; } - } else if (modelId.includes('mistral.mistral')) { + } else if (modelId.includes('mistral')) { if (requestBody.prompt !== undefined) { // NOTE: We approximate the token count since this value is not directly available in the body // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. @@ -329,13 +348,18 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { if (responseBody.stop_reason !== undefined) { span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]); } - } else if (currentModelId.includes('cohere.command')) { - if (responseBody.prompt !== undefined) { + } else if (currentModelId.includes('cohere.command-r')) { + console.log('Response Body:', responseBody); + if (responseBody.text !== undefined) { // NOTE: We approximate the token count since this value is not directly available in the body // According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. // https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html - span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6)); + span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, Math.ceil(responseBody.text.length / 6)); } + if (responseBody.finish_reason !== undefined) { + span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.finish_reason]); + } + } else if (currentModelId.includes('cohere.command')) { if (responseBody.generations?.[0]?.text !== undefined) { span.setAttribute( AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, @@ -362,7 +386,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { responseBody.choices[0].finish_reason, ]); } - } else if (currentModelId.includes('mistral.mistral')) { + } else if (currentModelId.includes('mistral')) { if (responseBody.outputs?.[0]?.text !== undefined) { span.setAttribute( AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, diff --git a/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts b/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts index 97c63a6..8cc5a2b 100644 --- a/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts +++ b/aws-distro-opentelemetry-node-autoinstrumentation/test/patches/aws/services/bedrock.test.ts @@ -517,6 +517,60 @@ describe('BedrockRuntime', () => { expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT); }); + it('Add Cohere Command R model attributes to span', async () => { + const modelId: string = 'cohere.command-r-v1:0"'; + const prompt: string = "Describe the purpose of a 'hello world' program in one line"; + const nativeRequest: any = { + message: prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }; + const mockRequestBody: string = JSON.stringify(nativeRequest); + const mockResponseBody: any = { + finish_reason: 'COMPLETE', + text: 'test-generation-text', + prompt: prompt, + request: { + commandInput: { + modelId: modelId, + }, + }, + }; + + nock(`https://bedrock-runtime.${region}.amazonaws.com`) + .post(`/model/${encodeURIComponent(modelId)}/invoke`) + .reply(200, mockResponseBody); + + await bedrock + .invokeModel({ + modelId: modelId, + body: mockRequestBody, + }) + .catch((err: any) => { + console.log('error', err); + }); + + const testSpans: ReadableSpan[] = getTestSpans(); + const invokeModelSpans: ReadableSpan[] = testSpans.filter((s: ReadableSpan) => { + return s.name === 'BedrockRuntime.InvokeModel'; + }); + expect(invokeModelSpans.length).toBe(1); + const invokeModelSpan = invokeModelSpans[0]; + expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_AGENT_ID]).toBeUndefined(); + expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_KNOWLEDGE_BASE_ID]).toBeUndefined(); + expect(invokeModelSpan.attributes[AWS_ATTRIBUTE_KEYS.AWS_BEDROCK_DATA_SOURCE_ID]).toBeUndefined(); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_SYSTEM]).toBe('aws_bedrock'); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL]).toBe(modelId); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS]).toBe(512); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE]).toBe(0.5); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P]).toBe(0.65); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS]).toBe(10); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS]).toBe(4); + expect(invokeModelSpan.attributes[AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS]).toEqual(['COMPLETE']); + expect(invokeModelSpan.kind).toBe(SpanKind.CLIENT); + }); + it('Add Meta Llama model attributes to span', async () => { const modelId: string = 'meta.llama2-13b-chat-v1'; const prompt: string = 'Describe the purpose of an interpreter program in one line.';