-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add support for new cohere command r models #118
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -245,6 +245,22 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { | |
if (requestBody.top_p !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p; | ||
} | ||
} else if (modelId.includes('cohere.command-r')) { | ||
if (requestBody.max_tokens !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; | ||
} | ||
if (requestBody.temperature !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature; | ||
} | ||
if (requestBody.p !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p; | ||
} | ||
if (requestBody.message !== undefined) { | ||
// NOTE: We approximate the token count since this value is not directly available in the body | ||
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. | ||
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.message.length / 6); | ||
} | ||
} else if (modelId.includes('cohere.command')) { | ||
if (requestBody.max_tokens !== undefined) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if we should go ahead and remove support for the old According to the docs, EOL should not be until 2025 but we are already getting 404s to this model. |
||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; | ||
|
@@ -255,6 +271,9 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { | |
if (requestBody.p !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p; | ||
} | ||
if (requestBody.prompt !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6); | ||
} | ||
} else if (modelId.includes('ai21.jamba')) { | ||
if (requestBody.max_tokens !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens; | ||
|
@@ -265,7 +284,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { | |
if (requestBody.top_p !== undefined) { | ||
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p; | ||
} | ||
} else if (modelId.includes('mistral.mistral')) { | ||
} else if (modelId.includes('mistral')) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We loosen this conditional because out of this list for mistral models, one of the model ids starts with |
||
if (requestBody.prompt !== undefined) { | ||
// NOTE: We approximate the token count since this value is not directly available in the body | ||
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. | ||
|
@@ -329,13 +348,18 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { | |
if (responseBody.stop_reason !== undefined) { | ||
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]); | ||
} | ||
} else if (currentModelId.includes('cohere.command')) { | ||
if (responseBody.prompt !== undefined) { | ||
} else if (currentModelId.includes('cohere.command-r')) { | ||
console.log('Response Body:', responseBody); | ||
if (responseBody.text !== undefined) { | ||
// NOTE: We approximate the token count since this value is not directly available in the body | ||
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The prompt is only available in the JavaScript implementation because of special data model defined in an upstream Otel package. This makes it possible to approximate the input token usage from the response body. However, this is not possible in the Java implementation as there is no special data model wrapping the inputs into the response body. As a result, I decided to move this approximation logic strictly to the request body to keep the implementation logic consistent between languages. |
||
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html | ||
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6)); | ||
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, Math.ceil(responseBody.text.length / 6)); | ||
} | ||
if (responseBody.finish_reason !== undefined) { | ||
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.finish_reason]); | ||
} | ||
} else if (currentModelId.includes('cohere.command')) { | ||
if (responseBody.generations?.[0]?.text !== undefined) { | ||
span.setAttribute( | ||
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, | ||
|
@@ -362,7 +386,7 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension { | |
responseBody.choices[0].finish_reason, | ||
]); | ||
} | ||
} else if (currentModelId.includes('mistral.mistral')) { | ||
} else if (currentModelId.includes('mistral')) { | ||
if (responseBody.outputs?.[0]?.text !== undefined) { | ||
span.setAttribute( | ||
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to the docs, this data should be available in the response body. However when logging the response body in the implementation it seems the data is not actually there. As a result, I decided to stay with this token approximation approach.