Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: set up gen ai inference attributes for foundational text models #111

Merged
merged 10 commits into from
Nov 1, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ export class AwsSpanProcessingUtil {
// TODO: Use Semantic Conventions once upgraded
static GEN_AI_REQUEST_MODEL: string = 'gen_ai.request.model';
static GEN_AI_SYSTEM: string = 'gen_ai.system';
static GEN_AI_REQUEST_MAX_TOKENS: string = 'gen_ai.request.max_tokens';
static GEN_AI_REQUEST_TEMPERATURE: string = 'gen_ai.request.temperature';
static GEN_AI_REQUEST_TOP_P: string = 'gen_ai.request.top_p';
static GEN_AI_RESPONSE_FINISH_REASONS: string = 'gen_ai.response.finish_reasons';
static GEN_AI_USAGE_INPUT_TOKENS: string = 'gen_ai.usage.input_tokens';
static GEN_AI_USAGE_OUTPUT_TOKENS: string = 'gen_ai.usage.output_tokens';

static getDialectKeywords(): string[] {
return SQL_DIALECT_KEYWORDS_JSON.keywords;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,173 @@ export class BedrockRuntimeServiceExtension implements ServiceExtension {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MODEL] = modelId;
}

if (request.commandInput?.body) {
const requestBody = JSON.parse(request.commandInput.body);
if (modelId.includes('amazon.titan')) {
if (requestBody.textGenerationConfig?.temperature !== undefined) {
mxiamxia marked this conversation as resolved.
Show resolved Hide resolved
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] =
requestBody.textGenerationConfig.temperature;
}
if (requestBody.textGenerationConfig?.topP !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.textGenerationConfig.topP;
}
if (requestBody.textGenerationConfig?.maxTokenCount !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] =
requestBody.textGenerationConfig.maxTokenCount;
}
} else if (modelId.includes('anthropic.claude')) {
if (requestBody.max_tokens !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
}
if (requestBody.temperature !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
}
if (requestBody.top_p !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
}
} else if (modelId.includes('meta.llama')) {
if (requestBody.max_gen_len !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_gen_len;
}
if (requestBody.temperature !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
}
if (requestBody.top_p !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
}
} else if (modelId.includes('cohere.command')) {
if (requestBody.max_tokens !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
}
if (requestBody.temperature !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
}
if (requestBody.p !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.p;
}
} else if (modelId.includes('ai21.jamba')) {
mxiamxia marked this conversation as resolved.
Show resolved Hide resolved
if (requestBody.max_tokens !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
}
if (requestBody.temperature !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
}
if (requestBody.top_p !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
}
} else if (modelId.includes('mistral.mistral')) {
if (requestBody.prompt !== undefined) {
// NOTE: We approximate the token count since this value is not directly available in the body
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
spanAttributes[AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS] = Math.ceil(requestBody.prompt.length / 6);
}
if (requestBody.max_tokens !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_MAX_TOKENS] = requestBody.max_tokens;
}
if (requestBody.temperature !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TEMPERATURE] = requestBody.temperature;
}
if (requestBody.top_p !== undefined) {
spanAttributes[AwsSpanProcessingUtil.GEN_AI_REQUEST_TOP_P] = requestBody.top_p;
}
}
}

return {
isIncoming,
spanAttributes,
spanKind,
spanName,
};
}

responseHook(response: NormalizedResponse, span: Span, tracer: Tracer, config: AwsSdkInstrumentationConfig): void {
const currentModelId = response.request.commandInput?.modelId;
if (response.data?.body) {
const decodedResponseBody = new TextDecoder().decode(response.data.body);
const responseBody = JSON.parse(decodedResponseBody);
if (currentModelId.includes('amazon.titan')) {
if (responseBody.inputTextTokenCount !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.inputTextTokenCount);
}
if (responseBody.results?.[0]?.tokenCount !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.results[0].tokenCount);
}
if (responseBody.results?.[0]?.completionReason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
responseBody.results[0].completionReason,
]);
}
} else if (currentModelId.includes('anthropic.claude')) {
if (responseBody.usage?.input_tokens !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.input_tokens);
}
if (responseBody.usage?.output_tokens !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.output_tokens);
}
if (responseBody.stop_reason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
}
} else if (currentModelId.includes('meta.llama')) {
if (responseBody.prompt_token_count !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.prompt_token_count);
}
if (responseBody.generation_token_count !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.generation_token_count);
}
if (responseBody.stop_reason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [responseBody.stop_reason]);
}
} else if (currentModelId.includes('cohere.command')) {
if (responseBody.prompt !== undefined) {
// NOTE: We approximate the token count since this value is not directly available in the body
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, Math.ceil(responseBody.prompt.length / 6));
mxiamxia marked this conversation as resolved.
Show resolved Hide resolved
}
if (responseBody.generations?.[0]?.text !== undefined) {
span.setAttribute(
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
// NOTE: We approximate the token count since this value is not directly available in the body
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
Math.ceil(responseBody.generations[0].text.length / 6)
);
}
if (responseBody.generations?.[0]?.finish_reason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
responseBody.generations[0].finish_reason,
]);
}
} else if (currentModelId.includes('ai21.jamba')) {
if (responseBody.usage?.prompt_tokens !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_INPUT_TOKENS, responseBody.usage.prompt_tokens);
}
if (responseBody.usage?.completion_tokens !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS, responseBody.usage.completion_tokens);
}
if (responseBody.choices?.[0]?.finish_reason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
responseBody.choices[0].finish_reason,
]);
}
} else if (currentModelId.includes('mistral.mistral')) {
if (responseBody.outputs?.[0]?.text !== undefined) {
span.setAttribute(
AwsSpanProcessingUtil.GEN_AI_USAGE_OUTPUT_TOKENS,
// NOTE: We approximate the token count since this value is not directly available in the body
// According to Bedrock docs they use (total_chars / 6) to approximate token count for pricing.
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
Math.ceil(responseBody.outputs[0].text.length / 6)
);
}
if (responseBody.outputs?.[0]?.stop_reason !== undefined) {
span.setAttribute(AwsSpanProcessingUtil.GEN_AI_RESPONSE_FINISH_REASONS, [
responseBody.outputs[0].stop_reason,
]);
}
}
}
}
}
Loading
Loading