Skip to content

Commit

Permalink
feat: Add CFN Primary ID for Existing Supported AWS Resources (#264)
Browse files Browse the repository at this point in the history
### *Description of changes:*

These changes support AWS Resources with different CFN Identifier
formats. This new attribute will have the key
`aws.remote.resource.cfn.primary.identifier`.

More context can be found in the comments of this closed PR:
#261

In most cases, this new attribute should have the same value as the
existing attribute `aws.remote.resource.identifier`.

However, there are some edge cases we must handle as seen in this PR:
- `AWS::SQS::Queue`
-
https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sqs-queue.html
  - The CFN Id should be the URL of the Queue.
- `AWS::Bedrock::DataSource`
-
https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-bedrock-datasource.html
- The CFN Id should be a compound value where the knowledge base Id and
the data source Id are separated by `|`

Also updated Bedrock Agent request parameters to also include Knowledge
Base ID:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent/client/get_data_source.html

### *Test Plan:*
Set up a client-server with auto-instrumentation to verify that the
correct span data is being generated.


![bedrock_cfn_primary_id_verification](https://github.com/user-attachments/assets/9a375751-a70c-41ab-896a-00e26a5fcc76)

![sqs_cfn_primary_id_verification](https://github.com/user-attachments/assets/a8ee28b0-4344-4f48-82b7-a100da939860)

Unit tests for instrumentation

![metric_generator_unit_test_verification](https://github.com/user-attachments/assets/52c26651-068d-4d40-a020-69714feab17c)

![instrumentation_patch_unit_test_verification](https://github.com/user-attachments/assets/3c23031d-52c2-49e6-9285-da679351ae0d)


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
  • Loading branch information
yiyuan-he authored Sep 25, 2024
1 parent b1fcdb3 commit 41e0987
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
AWS_SDK_DESCENDANT: str = "aws.sdk.descendant"
AWS_CONSUMER_PARENT_SPAN_KIND: str = "aws.consumer.parent.span.kind"
AWS_TRACE_FLAG_SAMPLED: str = "aws.trace.flag.sampled"
AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER: str = "aws.remote.resource.cfn.primary.identifier"

# AWS_#_NAME attributes are not supported in python as they are not part of the Semantic Conventions.
# TODO:Move to Semantic Conventions when these attributes are added.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
AWS_BEDROCK_DATA_SOURCE_ID,
AWS_BEDROCK_GUARDRAIL_ID,
AWS_BEDROCK_KNOWLEDGE_BASE_ID,
AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER,
AWS_KINESIS_STREAM_NAME,
AWS_LOCAL_OPERATION,
AWS_LOCAL_SERVICE,
Expand Down Expand Up @@ -372,6 +373,7 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
"""
remote_resource_type: Optional[str] = None
remote_resource_identifier: Optional[str] = None
cloudformation_primary_identifier: Optional[str] = None

if is_aws_sdk_span(span):
# Only extract the table name when _AWS_TABLE_NAMES has size equals to one
Expand All @@ -387,17 +389,24 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
elif is_key_present(span, AWS_SQS_QUEUE_NAME):
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_SQS_QUEUE_NAME))
cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SQS_QUEUE_URL))
elif is_key_present(span, AWS_SQS_QUEUE_URL):
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
remote_resource_identifier = _escape_delimiters(
SqsUrlParser.get_queue_name(span.attributes.get(AWS_SQS_QUEUE_URL))
)
cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SQS_QUEUE_URL))
elif is_key_present(span, AWS_BEDROCK_AGENT_ID):
remote_resource_type = _NORMALIZED_BEDROCK_SERVICE_NAME + "::Agent"
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_BEDROCK_AGENT_ID))
elif is_key_present(span, AWS_BEDROCK_DATA_SOURCE_ID):
remote_resource_type = _NORMALIZED_BEDROCK_SERVICE_NAME + "::DataSource"
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_BEDROCK_DATA_SOURCE_ID))
cloudformation_primary_identifier = (
_escape_delimiters(span.attributes.get(AWS_BEDROCK_KNOWLEDGE_BASE_ID))
+ "|"
+ remote_resource_identifier
)
elif is_key_present(span, AWS_BEDROCK_GUARDRAIL_ID):
remote_resource_type = _NORMALIZED_BEDROCK_SERVICE_NAME + "::Guardrail"
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_BEDROCK_GUARDRAIL_ID))
Expand All @@ -411,9 +420,19 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
remote_resource_type = _DB_CONNECTION_STRING_TYPE
remote_resource_identifier = _get_db_connection(span)

if remote_resource_type is not None and remote_resource_identifier is not None:
# If the CFN Primary Id is still None here, that means it is not an edge case.
# Then, we can just assign it the same value as remote_resource_identifier
if cloudformation_primary_identifier is None:
cloudformation_primary_identifier = remote_resource_identifier

if (
remote_resource_type is not None
and remote_resource_identifier is not None
and cloudformation_primary_identifier is not None
):
attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type
attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier
attributes[AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER] = cloudformation_primary_identifier


def _get_db_connection(span: ReadableSpan) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class _DataSourceOperation(_BedrockAgentOperation):
"""

request_attributes = {
AWS_BEDROCK_KNOWLEDGE_BASE_ID: _KNOWLEDGE_BASE_ID,
AWS_BEDROCK_DATA_SOURCE_ID: _DATA_SOURCE_ID,
}
response_attributes = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1044,14 +1044,24 @@ def test_sdk_client_span_with_remote_resource_attributes(self):
self._mock_attribute([AWS_BEDROCK_AGENT_ID], [None])

# Validate behaviour of AWS_BEDROCK_DATA_SOURCE_ID attribute, then remove it.
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID], ["test_datasource_id"], keys, values)
self._mock_attribute(
[AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID],
["test_datasource_id", "test_knowledge_base_id"],
keys,
values,
)
self._validate_remote_resource_attributes("AWS::Bedrock::DataSource", "test_datasource_id")
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID], [None])
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None, None])

# Validate behaviour of AWS_BEDROCK_DATA_SOURCE_ID attribute with special chars(^), then remove it.
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID], ["test_datasource_^id"], keys, values)
self._mock_attribute(
[AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID],
["test_datasource_^id", "test_knowledge_base_^id"],
keys,
values,
)
self._validate_remote_resource_attributes("AWS::Bedrock::DataSource", "test_datasource_^^id")
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID], [None])
self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None, None])

# Validate behaviour of AWS_BEDROCK_GUARDRAIL_ID attribute, then remove it.
self._mock_attribute([AWS_BEDROCK_GUARDRAIL_ID], ["test_guardrail_id"], keys, values)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,23 @@ def _test_patched_bedrock_agent_instrumentation(self):
"UpdateDataSource": ("aws.bedrock.data_source.id", _BEDROCK_DATASOURCE_ID),
}

data_source_operations = ["DeleteDataSource", "GetDataSource", "UpdateDataSource"]

for operation, attribute_tuple in operation_to_expected_attribute.items():
bedrock_agent_extract_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
"bedrock-agent", operation
)
self.assertEqual(len(bedrock_agent_extract_attributes), 1)
self.assertEqual(bedrock_agent_extract_attributes[attribute_tuple[0]], attribute_tuple[1])

if operation in data_source_operations:
self.assertEqual(len(bedrock_agent_extract_attributes), 2)
self.assertEqual(bedrock_agent_extract_attributes[attribute_tuple[0]], attribute_tuple[1])
self.assertEqual(
bedrock_agent_extract_attributes["aws.bedrock.knowledge_base.id"], _BEDROCK_KNOWLEDGEBASE_ID
)
else:
self.assertEqual(len(bedrock_agent_extract_attributes), 1)
self.assertEqual(bedrock_agent_extract_attributes[attribute_tuple[0]], attribute_tuple[1])

bedrock_agent_success_attributes: Dict[str, str] = _do_on_success_bedrock("bedrock-agent", operation)
self.assertEqual(len(bedrock_agent_success_attributes), 1)
self.assertEqual(bedrock_agent_success_attributes[attribute_tuple[0]], attribute_tuple[1])
Expand Down

0 comments on commit 41e0987

Please sign in to comment.