diff --git a/.github/workflows/test_frameworks.yml b/.github/workflows/test_frameworks.yml index e56256b0d57..6ea2513a763 100644 --- a/.github/workflows/test_frameworks.yml +++ b/.github/workflows/test_frameworks.yml @@ -123,6 +123,7 @@ jobs: name: Sanic 24.6 (with ${{ matrix.suffix }}) runs-on: ubuntu-20.04 needs: needs-run + timeout-minutes: 15 env: DD_PROFILING_ENABLED: ${{ matrix.profiling }} DD_IAST_ENABLED: ${{ matrix.iast }} @@ -625,6 +626,7 @@ jobs: name: Starlette 0.38.4 (with ${{ matrix.suffix }}) runs-on: "ubuntu-latest" needs: needs-run + timeout-minutes: 30 env: DD_TESTING_RAISE: true DD_PROFILING_ENABLED: ${{ matrix.profiling }} diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py index 7ddffd12588..b4e1f7b922e 100644 --- a/ddtrace/_trace/trace_handlers.py +++ b/ddtrace/_trace/trace_handlers.py @@ -9,6 +9,7 @@ import wrapt +from ddtrace import config from ddtrace._trace._span_pointer import _SpanPointerDescription from ddtrace._trace.utils import extract_DD_context_from_messages from ddtrace._trace.utils_botocore.span_pointers import extract_span_pointers_from_successful_botocore_response @@ -487,6 +488,7 @@ def _on_botocore_patched_api_call_success(ctx, response): set_botocore_response_metadata_tags(span, response) for span_pointer_description in extract_span_pointers_from_successful_botocore_response( + dynamodb_primary_key_names_for_tables=config.botocore.dynamodb_primary_key_names_for_tables, endpoint_name=ctx.get_item("endpoint_name"), operation_name=ctx.get_item("operation"), request_parameters=ctx.get_item("params"), diff --git a/ddtrace/_trace/utils_botocore/span_pointers.py b/ddtrace/_trace/utils_botocore/span_pointers.py index 811c4281d8b..4f7b42b2715 100644 --- a/ddtrace/_trace/utils_botocore/span_pointers.py +++ b/ddtrace/_trace/utils_botocore/span_pointers.py @@ -3,6 +3,7 @@ from typing import Dict from typing import List from typing import NamedTuple +from typing import Set from ddtrace._trace._span_pointer import _SpanPointerDescription from ddtrace._trace._span_pointer import _SpanPointerDirection @@ -13,7 +14,19 @@ log = get_logger(__name__) +_DynamoDBTableName = str +_DynamoDBItemFieldName = str +_DynamoDBItemTypeTag = str + +_DynamoDBItemValue = Dict[_DynamoDBItemTypeTag, Any] +_DynamoDBItem = Dict[_DynamoDBItemFieldName, _DynamoDBItemValue] + +_DynamoDBItemPrimaryKeyValue = Dict[_DynamoDBItemTypeTag, str] # must be length 1 +_DynamoDBItemPrimaryKey = Dict[_DynamoDBItemFieldName, _DynamoDBItemPrimaryKeyValue] + + def extract_span_pointers_from_successful_botocore_response( + dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], endpoint_name: str, operation_name: str, request_parameters: Dict[str, Any], @@ -22,9 +35,148 @@ def extract_span_pointers_from_successful_botocore_response( if endpoint_name == "s3": return _extract_span_pointers_for_s3_response(operation_name, request_parameters, response) + if endpoint_name == "dynamodb": + return _extract_span_pointers_for_dynamodb_response( + dynamodb_primary_key_names_for_tables, operation_name, request_parameters + ) + + return [] + + +def _extract_span_pointers_for_dynamodb_response( + dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], + operation_name: str, + request_parameters: Dict[str, Any], +) -> List[_SpanPointerDescription]: + if operation_name == "PutItem": + return _extract_span_pointers_for_dynamodb_putitem_response( + dynamodb_primary_key_names_for_tables, request_parameters + ) + return [] +def _extract_span_pointers_for_dynamodb_putitem_response( + dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], + request_parameters: Dict[str, Any], +) -> List[_SpanPointerDescription]: + try: + table_name = request_parameters["TableName"] + item = request_parameters["Item"] + + return [ + _aws_dynamodb_item_span_pointer_description( + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + table_name=table_name, + primary_key=_aws_dynamodb_item_primary_key_from_item( + dynamodb_primary_key_names_for_tables[table_name], item + ), + ) + ] + + except Exception as e: + log.warning( + "failed to generate DynamoDB.PutItem span pointer: %s", + str(e), + ) + return [] + + +def _aws_dynamodb_item_primary_key_from_item( + primary_key_field_names: Set[_DynamoDBItemFieldName], + item: _DynamoDBItem, +) -> _DynamoDBItemPrimaryKey: + if len(primary_key_field_names) not in (1, 2): + raise ValueError(f"unexpected number of primary key fields: {len(primary_key_field_names)}") + + return { + primary_key_field_name: _aws_dynamodb_extract_and_verify_primary_key_field_value_item( + item, primary_key_field_name + ) + for primary_key_field_name in primary_key_field_names + } + + +def _aws_dynamodb_item_span_pointer_description( + pointer_direction: _SpanPointerDirection, + table_name: _DynamoDBTableName, + primary_key: _DynamoDBItemPrimaryKey, +) -> _SpanPointerDescription: + return _SpanPointerDescription( + pointer_kind="aws.dynamodb.item", + pointer_direction=pointer_direction, + pointer_hash=_aws_dynamodb_item_span_pointer_hash(table_name, primary_key), + extra_attributes={}, + ) + + +def _aws_dynamodb_extract_and_verify_primary_key_field_value_item( + item: _DynamoDBItem, + primary_key_field_name: _DynamoDBItemFieldName, +) -> _DynamoDBItemPrimaryKeyValue: + if primary_key_field_name not in item: + raise ValueError(f"missing primary key field: {primary_key_field_name}") + + value_object = item[primary_key_field_name] + + if len(value_object) != 1: + raise ValueError(f"primary key field {primary_key_field_name} must have exactly one value: {len(value_object)}") + + value_type, value_data = next(iter(value_object.items())) + if value_type not in ("S", "N", "B"): + raise ValueError(f"unexpected primary key field {primary_key_field_name} value type: {value_type}") + + if not isinstance(value_data, str): + raise ValueError(f"unexpected primary key field {primary_key_field_name} value data type: {type(value_data)}") + + return {value_type: value_data} + + +def _aws_dynamodb_item_span_pointer_hash(table_name: _DynamoDBTableName, primary_key: _DynamoDBItemPrimaryKey) -> str: + if len(primary_key) == 1: + key, value_object = next(iter(primary_key.items())) + encoded_key_1 = key.encode("utf-8") + encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object) + encoded_key_2 = b"" + encoded_value_2 = b"" + + elif len(primary_key) == 2: + (key_1, value_object_1), (key_2, value_object_2) = sorted( + primary_key.items(), key=lambda x: x[0].encode("utf-8") + ) + encoded_key_1 = key_1.encode("utf-8") + encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object_1) + encoded_key_2 = key_2.encode("utf-8") + encoded_value_2 = _aws_dynamodb_item_encode_primary_key_value(value_object_2) + + else: + raise ValueError(f"unexpected number of primary key fields: {len(primary_key)}") + + return _standard_hashing_function( + table_name.encode("utf-8"), + encoded_key_1, + encoded_value_1, + encoded_key_2, + encoded_value_2, + ) + + +def _aws_dynamodb_item_encode_primary_key_value(value_object: _DynamoDBItemPrimaryKeyValue) -> bytes: + if len(value_object) != 1: + raise ValueError(f"primary key value object must have exactly one field: {len(value_object)}") + + value_type, value = next(iter(value_object.items())) + + if value_type == "S": + return value.encode("utf-8") + + if value_type in ("N", "B"): + # these should already be here as ASCII strings + return value.encode("ascii") + + raise ValueError(f"unknown primary key value type: {value_type}") + + def _extract_span_pointers_for_s3_response( operation_name: str, request_parameters: Dict[str, Any], diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.cpp new file mode 100644 index 00000000000..8f6252d8983 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.cpp @@ -0,0 +1,215 @@ +#include +#include + +static void +set_lengthupdated_ranges(PyObject* result, + Py_ssize_t result_len, + const TaintRangeRefs& ranges, + const TaintRangeMapTypePtr& tx_map) +{ + if (!tx_map || tx_map->empty()) { + return; + } + + TaintRangeRefs copy_ranges(ranges); + for (auto& range : copy_ranges) { + range->length = result_len; + } + + set_ranges(result, copy_ranges, tx_map); +} + +static PyObject* +call_original_function(PyObject* orig_function, + int nargs, + int flag_added_args, + PyObject* const* args, + PyObject* kwnames) +{ + int skip_args = 2 + flag_added_args; + + // convert ** args to *args + py::list py_args_list; + for (Py_ssize_t i = skip_args; i < nargs; ++i) { + py_args_list.append(py::reinterpret_borrow(args[i])); + } + py::args py_args(py_args_list); + + PyObject* kwargs = kwnames_to_kwargs(args, nargs, kwnames); + auto res = PyObject_Call(orig_function, py_args.ptr(), kwnames_to_kwargs(args, nargs, kwnames)); + Py_DECREF(kwargs); + return res; +} + +static std::tuple +get_args(PyObject* const* args, const Py_ssize_t nargs, PyObject* kwnames) +{ + int flag_added_args = -1; + + PyObject* orig_function = args[0]; + + if (nargs > 1) { + if (PyLong_Check(args[1])) { + flag_added_args = PyLong_AsLong(args[1]); + } + } + + PyObject* text = nullptr; + if (nargs > 2) { + text = args[2]; + } + + PyObject* pyo_encoding = nullptr; + PyObject* pyo_errors = nullptr; + int effective_args = 1; + + if (nargs > 3) { + pyo_encoding = args[3]; + effective_args = 2; + } + if (nargs > 4) { + pyo_errors = args[4]; + effective_args = 3; + } + + // Not using kwnames to kwargs here for performance + if (kwnames and PyTuple_Check(kwnames)) { + for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) { + if (effective_args > 3) { + // Will produce an error, so end here + break; + } + PyObject* key = PyTuple_GET_ITEM(kwnames, i); // Keyword name + PyObject* value = args[nargs + i]; // Keyword value + if (PyUnicode_CompareWithASCIIString(key, "encoding") == 0) { + pyo_encoding = value; + ++effective_args; + continue; + } + if (PyUnicode_CompareWithASCIIString(key, "errors") == 0) { + ++effective_args; + pyo_errors = value; + } + + if (pyo_encoding and pyo_errors) { + break; + } + } + } + + return { effective_args, flag_added_args, orig_function, text, pyo_encoding, pyo_errors }; +} + +PyObject* +api_str_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs, PyObject* kwnames) +{ + if (nargs < 2 or nargs > 5) { + py::set_error(PyExc_ValueError, MSG_ERROR_N_PARAMS); + return nullptr; + } + + auto [effective_args, flag_added_args, orig_function, text, pyo_encoding, pyo_errors] = + get_args(args, nargs, kwnames); + + // This is a flag that the function was not the original + if (flag_added_args == -1 or not is_pointer_this_builtin(orig_function, "str")) { + return call_original_function(orig_function, nargs, flag_added_args, args, kwnames); + } + + if (nargs == 2 or (nargs > 2 and PyUnicode_Check(args[2]) and PyObject_Length(args[2]) == 0)) { + // builtin str() without parameters or empty parameter, just return an empty string + return PyUnicode_FromString(""); + } + + if (effective_args > 3) { + string error_msg = "str() takes at most 3 arguments (" + to_string(effective_args) + " given)"; + py::set_error(PyExc_TypeError, error_msg.c_str()); + return nullptr; + } + + const bool has_encoding = pyo_encoding != nullptr and PyUnicode_GetLength(pyo_encoding) > 0; + const bool has_errors = pyo_errors != nullptr and PyUnicode_GetLength(pyo_errors) > 0; + + // If it has encoding, then the text object must be a bytes or bytearray object; if not, call the original + // function so the error is raised + if (has_encoding and (not PyByteArray_Check(text) and not PyBytes_Check(text))) { + return call_original_function(orig_function, nargs, flag_added_args, args, kwnames); + } + + // Call the original if not a text type and has no encoding + if (not is_text(text)) { + PyObject* as_str = PyObject_Str(text); + return as_str; + } + + PyObject* result_o = nullptr; + + // With no encoding or errors arguments we can directly call PyObject_Str, which is faster + if (!has_encoding and !has_errors) { + result_o = PyObject_Str(text); + if (result_o == nullptr) { + return nullptr; + } + } else { + // Oddly enough, the presence of just the "errors" argument is enough to trigger the decoding + // behaviour of str() even is "encoding" is empty (but then it will take the default utf-8 value) + char* text_raw_bytes = nullptr; + Py_ssize_t text_raw_bytes_size; + + if (PyByteArray_Check(text)) { + text_raw_bytes = PyByteArray_AS_STRING(text); + text_raw_bytes_size = PyByteArray_GET_SIZE(text); + } else if (PyBytes_AsStringAndSize(text, &text_raw_bytes, &text_raw_bytes_size) == -1) { + if (has_pyerr()) { + return nullptr; + } + throw py::error_already_set(); + } + + const char* encoding = has_encoding ? PyUnicode_AsUTF8(pyo_encoding) : "utf-8"; + const char* errors = has_errors ? PyUnicode_AsUTF8(pyo_errors) : "strict"; + result_o = PyUnicode_Decode(text_raw_bytes, text_raw_bytes_size, encoding, errors); + + if (PyErr_Occurred()) { + return nullptr; + } + if (result_o == nullptr) { + Py_RETURN_NONE; + } + } + + TRY_CATCH_ASPECT("str_aspect", return result_o, , { + const auto tx_map = Initializer::get_tainting_map(); + if (!tx_map || tx_map->empty()) { + return result_o; + } + + auto [ranges, ranges_error] = get_ranges(text, tx_map); + if (ranges_error || ranges.empty()) { + return result_o; + } + + if (PyUnicode_Check(text)) { + set_ranges(result_o, ranges, tx_map); + } else { + // Encoding on Bytes or Bytearray: size could change + const auto len_result_o = PyObject_Length(result_o); + PyObject* check_offset = PyObject_Str(text); + + if (check_offset == nullptr) { + PyErr_Clear(); + set_lengthupdated_ranges(result_o, len_result_o, ranges, tx_map); + } else { + Py_ssize_t offset = PyUnicode_Find(result_o, check_offset, 0, len_result_o, 1); + if (offset == -1) { + PyErr_Clear(); + set_lengthupdated_ranges(result_o, len_result_o, ranges, tx_map); + } else { + copy_and_shift_ranges_from_strings(text, result_o, offset, len_result_o, tx_map); + } + } + Py_DECREF(check_offset); + } + return result_o; + }); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.h new file mode 100644 index 00000000000..6f5cab1e5c9 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectStr.h @@ -0,0 +1,5 @@ +#pragma once +#include "Aspects/Helpers.h" + +PyObject* +api_str_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs, PyObject* kwnames); diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp index 1a9b2c879ec..ef915ab5d94 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.cpp @@ -461,7 +461,7 @@ pyexport_aspect_helpers(py::module& m) "taint_escaped_text"_a, "ranges_orig"_a, py::return_value_policy::move); - m.def("parse_params", &parse_params); + m.def("parse_params", &parse_param); m.def("has_pyerr", &has_pyerr); m.def("has_pyerr_as_string", &has_pyerr_as_string); } diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h index cb4a449b9ac..70e878dd1bb 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/Helpers.h @@ -263,11 +263,11 @@ split_taints(const string& str_to_split) * // In this case, the result will be 42 (the positional argument). */ inline py::object -parse_params(size_t position, - const char* keyword_name, - const py::object& default_value, - const py::args& args, - const py::kwargs& kwargs) +parse_param(size_t position, + const char* keyword_name, + const py::object& default_value, + const py::args& args, + const py::kwargs& kwargs) { if (args.size() >= position + 1) { return args[position]; @@ -278,6 +278,37 @@ parse_params(size_t position, return default_value; } +// Convert the kwnames of a function with METH_FASTCALL | METH_KEYWORDS to a classic kwargs dictionary +// so it can be used for other normal functions +inline PyObject* +kwnames_to_kwargs(PyObject* const* args, int nargs, PyObject* kwnames) +{ + PyObject* kwargs = PyDict_New(); + if (kwargs == nullptr) { + return nullptr; // Memory allocation failed + } + + if (kwnames == nullptr || nargs == 0 || args == nullptr) { + return kwargs; // Return empty dictionary + } + + Py_ssize_t nkwargs = PyTuple_Size(kwnames); + + // Iterate over the keyword arguments + for (Py_ssize_t i = 0; i < nkwargs; ++i) { + PyObject* key = PyTuple_GetItem(kwnames, i); + PyObject* value = args[nargs + i]; + + if (PyDict_SetItem(kwargs, key, value) < 0) { + Py_DECREF(kwargs); + return nullptr; + } + } + + // Return the kwargs dictionary (new reference, must be decref by the caller) + return kwargs; +} + void pyexport_aspect_helpers(py::module& m); diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/_aspects_exports.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/_aspects_exports.h index e36b68c7dd6..f5a58552ecb 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Aspects/_aspects_exports.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/_aspects_exports.h @@ -1,6 +1,5 @@ #pragma once #include "AspectFormat.h" -#include "AspectModulo.h" #include "AspectSplit.h" #include "AspectsOsPath.h" #include "Helpers.h" diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.cpp b/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.cpp index 80026e778fd..ed44426dcde 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.cpp @@ -110,3 +110,28 @@ safe_import(const char* module_name, const char* symbol_name) return ret; } + +bool +is_pointer_this_builtin(PyObject* orig_function, const char* builtin_name) +{ + if (!orig_function) { + return false; + } + + static PyObject* builtin = nullptr; + if (builtin == nullptr) { + PyObject* builtins = PyImport_ImportModule("builtins"); + if (!builtins) { + return false; + } + + builtin = PyObject_GetAttrString(builtins, builtin_name); + Py_DECREF(builtins); + + if (!builtin) { + return false; + } + } + + return orig_function == builtin; +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.h b/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.h index c88349fc4cc..ae7e9530d4f 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.h +++ b/ddtrace/appsec/_iast/_taint_tracking/Utils/GenericUtils.h @@ -35,4 +35,7 @@ is_some_number(PyObject* obj) } py::object -safe_import(const char*, const char*); \ No newline at end of file +safe_import(const char*, const char*); + +bool +is_pointer_this_builtin(PyObject* orig_function, const char* builtin_name); diff --git a/ddtrace/appsec/_iast/_taint_tracking/__init__.py b/ddtrace/appsec/_iast/_taint_tracking/__init__.py index 949972fd725..89603d0711a 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/__init__.py +++ b/ddtrace/appsec/_iast/_taint_tracking/__init__.py @@ -80,6 +80,7 @@ "_aspect_rsplit", "_aspect_split", "_aspect_splitlines", + "_aspect_str", "_convert_escaped_text_to_tainted_text", "_format_aspect", "active_map_addreses_size", diff --git a/ddtrace/appsec/_iast/_taint_tracking/_native.cpp b/ddtrace/appsec/_iast/_taint_tracking/_native.cpp index ee647fed162..170c12d8429 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/_native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/_native.cpp @@ -14,6 +14,7 @@ #include "Aspects/AspectModulo.h" #include "Aspects/AspectOperatorAdd.h" #include "Aspects/AspectSlice.h" +#include "Aspects/AspectStr.h" #include "Aspects/_aspects_exports.h" #include "Constants.h" #include "Initializer/_initializer.h" @@ -30,6 +31,7 @@ namespace py = pybind11; static PyMethodDef AspectsMethods[] = { { "add_aspect", ((PyCFunction)api_add_aspect), METH_FASTCALL, "aspect add" }, + { "str_aspect", ((PyCFunction)api_str_aspect), METH_FASTCALL | METH_KEYWORDS, "aspect str" }, { "add_inplace_aspect", ((PyCFunction)api_add_inplace_aspect), METH_FASTCALL, "aspect add" }, { "extend_aspect", ((PyCFunction)api_extend_aspect), METH_FASTCALL, "aspect extend" }, { "index_aspect", ((PyCFunction)api_index_aspect), METH_FASTCALL, "aspect index" }, diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index da7f6fb8843..f9eeae32eaf 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -1,6 +1,5 @@ from builtins import bytearray as builtin_bytearray from builtins import bytes as builtin_bytes -from builtins import str as builtin_str import codecs from re import Match from re import Pattern @@ -64,6 +63,7 @@ split_aspect = _aspect_split rsplit_aspect = _aspect_rsplit splitlines_aspect = _aspect_splitlines +str_aspect = aspects.str_aspect ospathjoin_aspect = _aspect_ospathjoin ospathbasename_aspect = _aspect_ospathbasename ospathdirname_aspect = _aspect_ospathdirname @@ -137,31 +137,6 @@ def bytesio_aspect(orig_function: Optional[Callable], flag_added_args: int, *arg return result -def str_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> str: - if orig_function is not None: - if orig_function != builtin_str: - if flag_added_args > 0: - args = args[flag_added_args:] - return orig_function(*args, **kwargs) - result = builtin_str(*args, **kwargs) - else: - result = args[0].str(*args[1:], **kwargs) - - if args and is_pyobject_tainted(args[0]): - try: - if isinstance(args[0], (bytes, bytearray)): - encoding = parse_params(1, "encoding", "utf-8", *args, **kwargs) - errors = parse_params(2, "errors", "strict", *args, **kwargs) - check_offset = args[0].decode(encoding, errors) - else: - check_offset = args[0] - offset = result.index(check_offset) - copy_and_shift_ranges_from_strings(args[0], result, offset) - except Exception as e: - iast_taint_log_error("str_aspect. {}".format(e)) - return result - - def bytes_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> bytes: if orig_function is not None: if orig_function != builtin_bytes: @@ -440,7 +415,7 @@ def format_value_aspect( element: Any, options: int = 0, format_spec: Optional[str] = None, -) -> str: +) -> Union[str, bytes, bytearray]: if options == 115: new_text = str_aspect(str, 0, element) elif options == 114: @@ -463,7 +438,7 @@ def format_value_aspect( try: new_ranges = list() for text_range in text_ranges: - new_ranges.append(shift_taint_range(text_range, new_new_text.index(new_text))) + new_ranges.append(shift_taint_range(text_range, new_new_text.index(new_text))) # type: ignore if new_ranges: taint_pyobject_with_ranges(new_new_text, tuple(new_ranges)) return new_new_text diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp index b7338e3d683..c55da152cd7 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_common.hpp @@ -48,3 +48,55 @@ class PyEnvWithContext : public ::testing::Test return res_string; } }; + +inline void +EXPECT_RANGESEQ(const TaintRangeRefs& r1, const TaintRangeRefs& r2) +{ + if (r1.size() != r2.size()) { + FAIL() << "Ranges have different sizes: " << r1.size() << " != " << r2.size(); + } + + if (r1.empty() and r2.empty()) { + return; + } + + if (&r1 == &r2) { + return; + } + + // Iterate over the ranges at r1 and check that they are the same as the range in the same position at r2 + for (size_t i = 0; i < r1.size(); i++) { + if (r1[i]->start != r2[i]->start) { + FAIL() << "Ranges have different start values at position " << i << ": " << r1[i]->start + << " != " << r2[i]->start; + } + + if (r1[i]->length != r2[i]->length) { + FAIL() << "Ranges have different length values at position " << i << ": " << r1[i]->length + << " != " << r2[i]->length; + } + + if (r1[i]->source.name != r2[i]->source.name) { + FAIL() << "Ranges have different source names at position " << i << ": " << r1[i]->source.name + << " != " << r2[i]->source.name; + } + + if (r1[i]->source.value != r2[i]->source.value) { + FAIL() << "Ranges have different source values at position " << i << ": " << r1[i]->source.value + << " != " << r2[i]->source.value; + } + + if (r1[i]->source.origin != r2[i]->source.origin) { + FAIL() << "Ranges have different source origins at position " << i << ": " + << origin_to_str(r1[i]->source.origin) << " != " << origin_to_str(r2[i]->source.origin); + } + } +} + +inline void +EXPECT_RANGESEQ(py::handle o1, py::handle o2) +{ + auto r1 = api_get_ranges(o1); + auto r2 = api_get_ranges(o2); + EXPECT_RANGESEQ(r1, r2); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 825a03ba787..43144c70f68 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -445,7 +445,7 @@ TEST_F(ParseParamsCheck, PositionalArgumentPresent) py::kwargs kwargs; py::object default_value = py::int_(0); - py::object result = parse_params(0, "key", default_value, args, kwargs); + py::object result = parse_param(0, "key", default_value, args, kwargs); EXPECT_EQ(result.cast(), 42); } @@ -456,7 +456,7 @@ TEST_F(ParseParamsCheck, KeywordArgumentPresent) kwargs["key"] = py::int_(42); py::object default_value = py::int_(0); - py::object result = parse_params(0, "key", default_value, args, kwargs); + py::object result = parse_param(0, "key", default_value, args, kwargs); EXPECT_EQ(result.cast(), 42); } @@ -466,7 +466,7 @@ TEST_F(ParseParamsCheck, NoArgumentUsesDefault) py::kwargs kwargs; py::object default_value = py::int_(42); - py::object result = parse_params(0, "key", default_value, args, kwargs); + py::object result = parse_param(0, "key", default_value, args, kwargs); EXPECT_EQ(result.cast(), 42); } @@ -477,7 +477,7 @@ TEST_F(ParseParamsCheck, PositionalOverridesKeyword) kwargs["key"] = py::int_(42); py::object default_value = py::int_(0); - py::object result = parse_params(0, "key", default_value, args, kwargs); + py::object result = parse_param(0, "key", default_value, args, kwargs); EXPECT_EQ(result.cast(), 100); } @@ -487,7 +487,7 @@ TEST_F(ParseParamsCheck, HandlesMissingKeyword) py::kwargs kwargs; py::object default_value = py::str("default_value"); - py::object result = parse_params(0, "missing_key", default_value, args, kwargs); + py::object result = parse_param(0, "missing_key", default_value, args, kwargs); EXPECT_STREQ(result.cast().c_str(), "default_value"); } diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_str_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_str_aspect.cpp new file mode 100644 index 00000000000..609a1e971d5 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_str_aspect.cpp @@ -0,0 +1,246 @@ +#include +#include +#include +#include + +using CheckAspectStr = PyEnvWithContext; + +static py::object +str_func() +{ + return safe_import("builtins", "str"); +} + +static PyObject** +one_arg(py::object value) +{ + PyObject** args = new PyObject*[3]; + args[0] = str_func().release().ptr(); + args[1] = PyLong_FromLong(0); + args[2] = value.ptr(); + return args; +} + +static PyObject** +two_args(py::object value, py::object value2) +{ + PyObject** args = new PyObject*[4]; + args[0] = str_func().release().ptr(); + args[1] = PyLong_FromLong(0); + args[2] = value.ptr(); + args[3] = value2.ptr(); + return args; +} + +static PyObject** +three_args(py::object value, py::object value2, py::object value3) +{ + PyObject** args = new PyObject*[5]; + args[0] = str_func().release().ptr(); + args[1] = PyLong_FromLong(0); + args[2] = value.ptr(); + args[3] = value2.ptr(); + args[4] = value3.ptr(); + return args; +} + +TEST_F(CheckAspectStr, StrWithStr) +{ + // auto result = api_str_aspect(py::none().ptr(), one_arg(py::str("test")), 3, py::none().ptr()); + auto result = api_str_aspect(py::none().ptr(), one_arg(py::str("test")), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "test"); +} + +TEST_F(CheckAspectStr, StrWithInteger) +{ + auto result = api_str_aspect(py::none().ptr(), one_arg(py::int_(42)), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "42"); +} + +TEST_F(CheckAspectStr, StrWithFloat) +{ + auto result = api_str_aspect(py::none().ptr(), one_arg(py::float_(42.42)), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "42.42"); +} + +TEST_F(CheckAspectStr, StrWithBytesNoEncoding) +{ + auto result = api_str_aspect(py::none().ptr(), one_arg(py::bytes("test")), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "b'test'"); +} + +TEST_F(CheckAspectStr, StrWithBytesAndEncoding) +{ + auto result = api_str_aspect(py::none().ptr(), two_args(py::bytes("test"), py::str("utf-8")), 4, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "test"); +} + +TEST_F(CheckAspectStr, StrWithBytesAndErrorStrictButNoError) +{ + auto result = api_str_aspect( + py::none().ptr(), three_args(py::bytes("test"), py::str("utf-8"), py::str("strict")), 5, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "test"); +} + +TEST_F(CheckAspectStr, StrWithBytesAndErrorStrictAndErrorRaisesUnicodeDecodeError) +{ + auto result = api_str_aspect( + py::none().ptr(), three_args(py::bytes("test\244"), py::str("ascii"), py::str("strict")), 5, py::none().ptr()); + EXPECT_EQ(result, nullptr); + auto error = has_pyerr_as_string(); + EXPECT_STREQ(error.c_str(), "'ascii' codec can't decode byte 0xa4 in position 4: ordinal not in range(128)"); +} + +TEST_F(CheckAspectStr, StrWithBytesAndErrorIgnoreAndErrorDontRaiseUnicodeDecodeError) +{ + auto result = api_str_aspect( + py::none().ptr(), three_args(py::bytes("test\244"), py::str("ascii"), py::str("ignore")), 5, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + auto presult = py::reinterpret_borrow(result); + EXPECT_STREQ(presult[py::slice(0, 4, 1)].cast().c_str(), "test"); + // No exception should be thrown +} + +TEST_F(CheckAspectStr, StrWithList) +{ + // create a py::list with an integer and a string + auto list = py::list(); + list.append(py::int_(42)); + list.append(py::str("foobar")); + + // auto list = py::list(py::int_(1), py::str("foobar")); + auto result = api_str_aspect(py::none().ptr(), one_arg(list), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "[42, 'foobar']"); +} + +TEST_F(CheckAspectStr, StrWithNone) +{ + auto result = api_str_aspect(py::none().ptr(), one_arg(py::none()), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "None"); +} + +TEST_F(CheckAspectStr, StrWithDict) +{ + auto dict = py::dict(); + dict["key1"] = py::int_(42); + dict["key2"] = py::str("foobar"); + + auto result = api_str_aspect(py::none().ptr(), one_arg(dict), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "{'key1': 42, 'key2': 'foobar'}"); +} + +TEST_F(CheckAspectStr, StrWithTuple) +{ + auto tuple = py::tuple(py::make_tuple(py::str("foo"), py::str("bar"))); + + auto result = api_str_aspect(py::none().ptr(), one_arg(tuple), 3, py::none().ptr()); + EXPECT_TRUE(PyUnicode_Check(result)); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "('foo', 'bar')"); +} + +TEST_F(CheckAspectStr, StrWithTaintedStringNoEncoding) +{ + auto str = py::str("example"); + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 7; + taint_range->source.name = "example"; + api_set_ranges(str, TaintRangeRefs{ taint_range }); + auto ranges = api_get_ranges(str); + EXPECT_EQ(ranges.size(), 1); + + auto result = api_str_aspect(py::none().ptr(), one_arg(str), 3, py::none().ptr()); + auto ranges2 = api_get_ranges(result); + EXPECT_RANGESEQ(ranges, ranges2); +} + +TEST_F(CheckAspectStr, StrWithTaintedBytesNoEncoding) +{ + auto bytes = py::bytes("example"); + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 7; + taint_range->source.name = "example"; + api_set_ranges(bytes, TaintRangeRefs{ taint_range }); + auto ranges = api_get_ranges(bytes); + EXPECT_EQ(ranges.size(), 1); + auto result = api_str_aspect(py::none().ptr(), one_arg(bytes), 3, py::none().ptr()); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "b'example'"); + auto ranges2 = api_get_ranges(result); + EXPECT_EQ(ranges2.size(), 1); + EXPECT_EQ(ranges2[0]->start, 0); + EXPECT_EQ(ranges2[0]->length, 10); +} + +TEST_F(CheckAspectStr, StrWithTaintedByteArrayNoEncoding) +{ + auto bytearray = py::bytearray("example"); + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 7; + taint_range->source.name = "example"; + api_set_ranges(bytearray, TaintRangeRefs{ taint_range }); + auto ranges = api_get_ranges(bytearray); + EXPECT_EQ(ranges.size(), 1); + + auto result = api_str_aspect(py::none().ptr(), one_arg(bytearray), 3, py::none().ptr()); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "bytearray(b'example')"); + auto ranges2 = api_get_ranges(result); + EXPECT_EQ(ranges2.size(), 1); + EXPECT_EQ(ranges2[0]->start, 0); + EXPECT_EQ(ranges2[0]->length, 21); +} + +TEST_F(CheckAspectStr, StrWithTaintedBytesAndEncodingSameSize) +{ + auto bytes = py::bytes("example"); + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 7; + taint_range->source.name = "example"; + api_set_ranges(bytes, TaintRangeRefs{ taint_range }); + auto ranges = api_get_ranges(bytes); + EXPECT_EQ(ranges.size(), 1); + + auto result = api_str_aspect(py::none().ptr(), two_args(bytes, py::str("utf-8")), 4, py::none().ptr()); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "example"); + auto ranges2 = api_get_ranges(result); + EXPECT_RANGESEQ(ranges, ranges2); +} + +TEST_F(CheckAspectStr, StrWithTaintedBytesAndEncodingDifferentSize) +{ + const char utf16le_example[] = { + '\x65', '\x00', // 'e' + '\x78', '\x00', // 'x' + '\x61', '\x00', // 'a' + '\x6D', '\x00', // 'm' + '\x70', '\x00', // 'p' + '\x6C', '\x00', // 'l' + '\x65', '\x00' // 'e' + }; + auto bytes = py::bytes(utf16le_example, sizeof(utf16le_example)); + TaintRangePtr taint_range = std::make_shared(); + taint_range->start = 0; + taint_range->length = 14; + taint_range->source.name = "example"; + api_set_ranges(bytes, TaintRangeRefs{ taint_range }); + auto ranges = api_get_ranges(bytes); + EXPECT_EQ(ranges.size(), 1); + + auto result = api_str_aspect(py::none().ptr(), two_args(bytes, py::str("utf-16")), 4, py::none().ptr()); + EXPECT_STREQ(PyUnicode_AsUTF8(result), "example"); + auto ranges2 = api_get_ranges(result); + EXPECT_EQ(ranges2.size(), 1); + EXPECT_EQ(ranges2[0]->start, 0); + EXPECT_EQ(ranges2[0]->length, 7); +} \ No newline at end of file diff --git a/ddtrace/contrib/botocore/__init__.py b/ddtrace/contrib/botocore/__init__.py index 95628054570..11462e4ec78 100644 --- a/ddtrace/contrib/botocore/__init__.py +++ b/ddtrace/contrib/botocore/__init__.py @@ -108,6 +108,37 @@ Default: ``128`` + +.. py:data:: ddtrace.config.botocore['dynamodb_primary_key_names_for_tables'] + + This enables DynamoDB API calls to be instrumented with span pointers. Many + DynamoDB API calls do not include the Item's Primary Key fields as separate + values, so they need to be provided to the tracer separately. This field + should be structured as a ``dict`` keyed by the table names as ``str``. + Each value should be the ``set`` of primary key field names (as ``str``) + for the associated table. The set may have exactly one or two elements, + depending on the Table's Primary Key schema. + + In python this would look like:: + + ddtrace.config.botocore['dynamodb_primary_key_names_for_tables'] = { + 'table_name': {'key1', 'key2'}, + 'other_table': {'other_key'}, + } + + Can also be enabled with the ``DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS`` + environment variable which is parsed as a JSON object with strings for keys + and lists of strings for values. + + This would look something like:: + + export DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS='{ + "table_name": ["key1", "key2"], + "other_table": ["other_key"] + }' + + Default: ``{}`` + """ diff --git a/ddtrace/contrib/internal/botocore/patch.py b/ddtrace/contrib/internal/botocore/patch.py index de280f2ae60..6e7ecf23c62 100644 --- a/ddtrace/contrib/internal/botocore/patch.py +++ b/ddtrace/contrib/internal/botocore/patch.py @@ -2,7 +2,9 @@ Trace queries to aws api done via botocore client """ import collections +import json import os +from typing import Dict # noqa:F401 from typing import List # noqa:F401 from typing import Set # noqa:F401 from typing import Union # noqa:F401 @@ -59,6 +61,32 @@ log = get_logger(__name__) +def _load_dynamodb_primary_key_names_for_tables() -> Dict[str, Set[str]]: + try: + encoded_table_primary_keys = os.getenv("DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS", "{}") + raw_table_primary_keys = json.loads(encoded_table_primary_keys) + + table_primary_keys = {} + for table, primary_keys in raw_table_primary_keys.items(): + if not isinstance(table, str): + raise ValueError(f"expected string table name: {table}") + + if not isinstance(primary_keys, list): + raise ValueError(f"expected list of primary keys: {primary_keys}") + + unique_primary_keys = set(primary_keys) + if not len(unique_primary_keys) == len(primary_keys): + raise ValueError(f"expected unique primary keys: {primary_keys}") + + table_primary_keys[table] = unique_primary_keys + + return table_primary_keys + + except Exception as e: + log.warning("failed to load DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS: %s", e) + return {} + + # Botocore default settings config._add( "botocore", @@ -73,6 +101,7 @@ "instrument_internals": asbool(os.getenv("DD_BOTOCORE_INSTRUMENT_INTERNALS", default=False)), "propagation_enabled": asbool(os.getenv("DD_BOTOCORE_PROPAGATION_ENABLED", default=False)), "empty_poll_enabled": asbool(os.getenv("DD_BOTOCORE_EMPTY_POLL_ENABLED", default=True)), + "dynamodb_primary_key_names_for_tables": _load_dynamodb_primary_key_names_for_tables(), }, ) diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index 46977361e1f..5dce448919f 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -21,6 +21,13 @@ include(FindCppcheck) include(FindInfer) include(CheckSymbolExists) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) + message(FATAL_ERROR "pthread compatible library not found") +endif() + # Library sources add_library( dd_wrapper SHARED @@ -44,7 +51,7 @@ target_compile_features(dd_wrapper PUBLIC cxx_std_17) target_include_directories(dd_wrapper PRIVATE include ${Datadog_INCLUDE_DIRS}) -target_link_libraries(dd_wrapper PRIVATE ${Datadog_LIBRARIES}) +target_link_libraries(dd_wrapper PRIVATE ${Datadog_LIBRARIES} Threads::Threads) # Figure out the suffix. Try to approximate the cpython way of doing things. C library check_symbol_exists(__GLIBC__ "features.h" HAVE_GLIBC) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index 610d08ca473..6c330c9c970 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -22,11 +22,19 @@ include(FindCppcheck) add_subdirectory(../dd_wrapper ${CMAKE_CURRENT_BINARY_DIR}/../dd_wrapper_build) find_package(Python3 COMPONENTS Interpreter Development) + # Make sure we have necessary Python variables if(NOT Python3_INCLUDE_DIRS) message(FATAL_ERROR "Python3_INCLUDE_DIRS not found") endif() +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) + message(FATAL_ERROR "pthread compatible library not found") +endif() + # Add echion set(ECHION_COMMIT "9d5bcc5867d7aefff73c837adcba4ef46eecebc6" @@ -70,9 +78,9 @@ target_compile_definitions(${EXTENSION_NAME} PRIVATE UNWIND_NATIVE_DISABLE) # warning(push, 0 then pop for the same effect. target_include_directories( ${EXTENSION_NAME} PRIVATE .. # include dd_wrapper from the root in order to make its paths transparent in the code - include) + include) target_include_directories(${EXTENSION_NAME} SYSTEM PRIVATE ${echion_SOURCE_DIR} ${Python3_INCLUDE_DIRS} - include/vendored include/util) + include/vendored include/util) # Echion sources need to be given the current platform if(WIN32) @@ -92,11 +100,10 @@ set_target_properties(${EXTENSION_NAME} PROPERTIES SUFFIX "") # typical. set_target_properties(${EXTENSION_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/..") +target_link_libraries(${EXTENSION_NAME} PRIVATE dd_wrapper Threads::Threads) + if(Python3_LIBRARIES) - target_link_libraries(${EXTENSION_NAME} PRIVATE dd_wrapper ${Python3_LIBRARIES}) -else() - # for manylinux builds - target_link_libraries(${EXTENSION_NAME} PRIVATE dd_wrapper) + target_link_libraries(${EXTENSION_NAME} PRIVATE ${Python3_LIBRARIES}) endif() # Extensions are built as dynamic libraries, so PIC is required. diff --git a/ddtrace/internal/encoding.py b/ddtrace/internal/encoding.py index 13084cea1fc..0cf31f9f7ed 100644 --- a/ddtrace/internal/encoding.py +++ b/ddtrace/internal/encoding.py @@ -83,6 +83,9 @@ def _span_to_dict(span): if span.span_type: d["type"] = span.span_type + if span._links: + d["span_links"] = [link.to_dict() for link in span._links] + return d diff --git a/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml b/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml new file mode 100644 index 00000000000..cd7eeb08900 --- /dev/null +++ b/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + profiling: fixes an issue where stack v2 couldn't be enabled as pthread + was not properly linked on some debian based images for aarch64 architecture. + diff --git a/releasenotes/notes/span-pointers-aws-dynamodb-putitem-f4be08418bc99093.yaml b/releasenotes/notes/span-pointers-aws-dynamodb-putitem-f4be08418bc99093.yaml new file mode 100644 index 00000000000..99986cc8e37 --- /dev/null +++ b/releasenotes/notes/span-pointers-aws-dynamodb-putitem-f4be08418bc99093.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + botocore: Adds span pointers for successful DynamoDB ``PutItem`` spans. Table Primary Keys need to be provided with the ``ddtrace.config.botocore.dynamodb_primary_key_names_for_tables`` option or the ``DD_BOTOCORE_DYNAMODB_TABLE_PRIMARY_KEYS`` environment variable. diff --git a/tests/appsec/contrib_appsec/utils.py b/tests/appsec/contrib_appsec/utils.py index e1f5e31d9dd..4b792cc3de6 100644 --- a/tests/appsec/contrib_appsec/utils.py +++ b/tests/appsec/contrib_appsec/utils.py @@ -128,6 +128,20 @@ def test_simple_attack(self, interface: Interface, root_span, get_tag): query = dict(root_span()._get_ctx_item("http.request.query")) assert query == {"q": "1"} or query == {"q": ["1"]} + @pytest.mark.parametrize("asm_enabled", [True, False]) + @pytest.mark.parametrize( + ("user_agent", "priority"), + [("Mozilla/5.0", False), ("Arachni/v1.5.1", True), ("dd-test-scanner-log-block", True)], + ) + def test_priority(self, interface: Interface, root_span, get_tag, asm_enabled, user_agent, priority): + """Check that we only set manual keep for traces with appsec events.""" + with override_global_config(dict(_asm_enabled=asm_enabled)): + self.update_tracer(interface) + response = interface.client.get("/", headers={"User-Agent": user_agent}) + assert response.status_code == (403 if user_agent == "dd-test-scanner-log-block" and asm_enabled else 200) + span_priority = root_span()._span.context.sampling_priority + assert (span_priority == 2) if asm_enabled and priority else (span_priority < 2) + def test_querystrings(self, interface: Interface, root_span): with override_global_config(dict(_asm_enabled=True)): self.update_tracer(interface) diff --git a/tests/appsec/iast/aspects/test_str_aspect.py b/tests/appsec/iast/aspects/test_str_aspect.py index 4c515c40e63..a8b2dd29509 100644 --- a/tests/appsec/iast/aspects/test_str_aspect.py +++ b/tests/appsec/iast/aspects/test_str_aspect.py @@ -61,11 +61,11 @@ def test_str_aspect_objs(obj): @pytest.mark.parametrize( "args", [ - ("utf-8", "strict"), - ("latin1", "strict"), - ("iso-8859-8", "strict"), - ("sjis", "strict"), - ("utf-8", "replace"), + ("utf-8",), + ("latin1",), + ("iso-8859-8",), + ("sjis",), + ("utf-8",), ], ) @pytest.mark.parametrize("kwargs", [{}, {"errors": "ignore"}, {"errors": "replace"}]) diff --git a/tests/contrib/botocore/test.py b/tests/contrib/botocore/test.py index ab26b667b7c..e51b99c90ca 100644 --- a/tests/contrib/botocore/test.py +++ b/tests/contrib/botocore/test.py @@ -266,6 +266,127 @@ def test_dynamodb_put_get(self): assert span.service == "test-botocore-tracing.dynamodb" assert span.resource == "botocore.parsers.parse" + span = spans[2] + assert span.get_tag("aws.operation") == "PutItem" + # Since the dynamodb_primary_key_names_for_tables isn't configured, we + # cannot create span pointers for this item. + assert not span._links + + @pytest.mark.skipif( + PYTHON_VERSION_INFO < (3, 8), + reason="Skipping for older py versions whose latest supported moto versions don't have the right dynamodb api", + ) + @mock_dynamodb + def test_dynamodb_put_get_with_table_primary_key_mapping(self): + ddb = self.session.create_client("dynamodb", region_name="us-west-2") + Pin(service=self.TEST_SERVICE, tracer=self.tracer).onto(ddb) + + with self.override_config( + "botocore", + dict( + instrument_internals=True, + dynamodb_primary_key_names_for_tables={ + "foobar": {"myattr"}, + }, + ), + ): + ddb.create_table( + TableName="foobar", + AttributeDefinitions=[{"AttributeName": "myattr", "AttributeType": "S"}], + KeySchema=[{"AttributeName": "myattr", "KeyType": "HASH"}], + BillingMode="PAY_PER_REQUEST", + ) + ddb.put_item(TableName="foobar", Item={"myattr": {"S": "baz"}}) + ddb.get_item(TableName="foobar", Key={"myattr": {"S": "baz"}}) + + spans = self.get_spans() + assert spans + span = spans[0] + assert len(spans) == 6 + assert_is_measured(span) + assert span.get_tag("aws.operation") == "CreateTable" + assert span.get_tag("component") == "botocore" + assert span.get_tag("span.kind"), "client" + assert_span_http_status_code(span, 200) + assert span.service == "test-botocore-tracing.dynamodb" + assert span.resource == "dynamodb.createtable" + + span = spans[1] + assert span.name == "botocore.parsers.parse" + assert span.get_tag("component") == "botocore" + assert span.get_tag("span.kind"), "client" + assert span.service == "test-botocore-tracing.dynamodb" + assert span.resource == "botocore.parsers.parse" + + span = spans[2] + assert span.get_tag("aws.operation") == "PutItem" + # This span pointer is only available if the + # dynamodb_primary_key_names_for_tables is properly configured with the + # table and its primary key field names. + assert span._links == [ + _SpanPointer( + pointer_kind="aws.dynamodb.item", + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + # We have more detailed tests for the hashing behavior + # elsewhere. Here we just want to make sure that the pointer is + # correctly attached to the span. + pointer_hash="de960284e8cba01c46f87b102ab1c9cb", + ), + ] + + @pytest.mark.skipif( + PYTHON_VERSION_INFO < (3, 8), + reason="Skipping for older py versions whose latest supported moto versions don't have the right dynamodb api", + ) + @mock_dynamodb + def test_dynamodb_put_get_with_broken_table_primary_key_mapping(self): + ddb = self.session.create_client("dynamodb", region_name="us-west-2") + Pin(service=self.TEST_SERVICE, tracer=self.tracer).onto(ddb) + + with self.override_config( + "botocore", + dict( + instrument_internals=True, + dynamodb_primary_key_names_for_tables={ + "foobar": {"myattr", "other_attr", "impossible_third_attr"}, + }, + ), + ): + ddb.create_table( + TableName="foobar", + AttributeDefinitions=[{"AttributeName": "myattr", "AttributeType": "S"}], + KeySchema=[{"AttributeName": "myattr", "KeyType": "HASH"}], + BillingMode="PAY_PER_REQUEST", + ) + ddb.put_item(TableName="foobar", Item={"myattr": {"S": "baz"}}) + ddb.get_item(TableName="foobar", Key={"myattr": {"S": "baz"}}) + + spans = self.get_spans() + assert spans + span = spans[0] + assert len(spans) == 6 + assert_is_measured(span) + assert span.get_tag("aws.operation") == "CreateTable" + assert span.get_tag("component") == "botocore" + assert span.get_tag("span.kind"), "client" + assert_span_http_status_code(span, 200) + assert span.service == "test-botocore-tracing.dynamodb" + assert span.resource == "dynamodb.createtable" + + span = spans[1] + assert span.name == "botocore.parsers.parse" + assert span.get_tag("component") == "botocore" + assert span.get_tag("span.kind"), "client" + assert span.service == "test-botocore-tracing.dynamodb" + assert span.resource == "botocore.parsers.parse" + + span = spans[2] + assert span.get_tag("aws.operation") == "PutItem" + # The rest of the logic should have worked but since the config is + # malformed with unexpectedly three key attributes, we cannot actually + # create the span pointers. + assert not span._links + @mock_s3 def test_s3_client(self): s3 = self.session.create_client("s3", region_name="us-west-2") diff --git a/tests/integration/test_encoding.py b/tests/integration/test_encoding.py index f5efc03d163..43c47ac4840 100644 --- a/tests/integration/test_encoding.py +++ b/tests/integration/test_encoding.py @@ -63,3 +63,22 @@ def test_trace_with_metrics_accepted_by_agent(self, metrics): tracer.shutdown() log.warning.assert_not_called() log.error.assert_not_called() + + @pytest.mark.parametrize( + "span_links_kwargs", + [ + {"trace_id": 12345, "span_id": 67890}, + ], + ) + def test_trace_with_links_accepted_by_agent(self, span_links_kwargs): + """Links should not break things.""" + tracer = Tracer() + with mock.patch("ddtrace.internal.writer.writer.log") as log: + with tracer.trace("root", service="test_encoding", resource="test_resource") as root: + root.set_link(**span_links_kwargs) + for _ in range(10): + with tracer.trace("child") as child: + child.set_link(**span_links_kwargs) + tracer.shutdown() + log.warning.assert_not_called() + log.error.assert_not_called() diff --git a/tests/tracer/test_encoders.py b/tests/tracer/test_encoders.py index cd14c439e89..213027a7103 100644 --- a/tests/tracer/test_encoders.py +++ b/tests/tracer/test_encoders.py @@ -162,7 +162,7 @@ def test_encode_traces_json(self): # test encoding for JSON format traces = [ [ - Span(name="client.testing"), + Span(name="client.testing", links=[SpanLink(trace_id=12345, span_id=678990)]), Span(name="client.testing"), ], [ @@ -184,6 +184,7 @@ def test_encode_traces_json(self): assert isinstance(spans, str) assert len(items) == 3 assert len(items[0]) == 2 + assert len(items[0][0]["span_links"]) == 1 assert len(items[1]) == 2 assert len(items[2]) == 2 for i in range(3): @@ -194,7 +195,7 @@ def test_encode_traces_json_v2(self): # test encoding for JSON format traces = [ [ - Span(name="client.testing", span_id=0xAAAAAA), + Span(name="client.testing", span_id=0xAAAAAA, links=[SpanLink(trace_id=12345, span_id=67890)]), Span(name="client.testing", span_id=0xAAAAAA), ], [ @@ -215,6 +216,7 @@ def test_encode_traces_json_v2(self): assert isinstance(spans, str) assert len(items) == 3 assert len(items[0]) == 2 + assert len(items[0][0]["span_links"]) == 1 assert len(items[1]) == 2 assert len(items[2]) == 2 for i in range(3): diff --git a/tests/tracer/utils_botocore/test_span_pointers.py b/tests/tracer/utils_botocore/test_span_pointers.py index 4963d882b9f..01513f15f4a 100644 --- a/tests/tracer/utils_botocore/test_span_pointers.py +++ b/tests/tracer/utils_botocore/test_span_pointers.py @@ -1,5 +1,6 @@ import logging import re +from typing import Dict from typing import List from typing import NamedTuple from typing import Optional @@ -9,6 +10,7 @@ from ddtrace._trace._span_pointer import _SpanPointerDescription from ddtrace._trace._span_pointer import _SpanPointerDirection +from ddtrace._trace.utils_botocore.span_pointers import _aws_dynamodb_item_span_pointer_hash from ddtrace._trace.utils_botocore.span_pointers import _aws_s3_object_span_pointer_hash from ddtrace._trace.utils_botocore.span_pointers import extract_span_pointers_from_successful_botocore_response @@ -59,6 +61,65 @@ def test_hashing(self, hashing_case: HashingCase) -> None: ) +class TestDynamodbItemPointer: + class HashingCase(NamedTuple): + name: str + table_name: str + primary_key: Dict[str, Dict[str, str]] + pointer_hash: str + + @pytest.mark.parametrize( + "hashing_case", + [ + HashingCase( + name="one string primary key", + table_name="some-table", + primary_key={"some-key": {"S": "some-value"}}, + pointer_hash="7f1aee721472bcb48701d45c7c7f7821", + ), + HashingCase( + name="one binary primary key", + table_name="some-table", + primary_key={"some-key": {"B": "c29tZS12YWx1ZQo="}}, + pointer_hash="cc789e5ea89c317ac58af92d7a1ba2c2", + ), + HashingCase( + name="one number primary key", + table_name="some-table", + primary_key={"some-key": {"N": "123.456"}}, + pointer_hash="434a6dba3997ce4dbbadc98d87a0cc24", + ), + HashingCase( + name="string and number primary key", + table_name="some-table", + primary_key={ + "some-key": {"S": "some-value"}, + "other-key": {"N": "123"}, + }, + pointer_hash="7aa1b80b0e49bd2078a5453399f4dd67", + ), + HashingCase( + name="string and number primary key reversed", + table_name="some-table", + primary_key={ + "other-key": {"N": "123"}, + "some-key": {"S": "some-value"}, + }, + pointer_hash="7aa1b80b0e49bd2078a5453399f4dd67", + ), + ], + ids=lambda case: case.name, + ) + def test_hashing(self, hashing_case: HashingCase) -> None: + assert ( + _aws_dynamodb_item_span_pointer_hash( + table_name=hashing_case.table_name, + primary_key=hashing_case.primary_key, + ) + == hashing_case.pointer_hash + ) + + class TestBotocoreSpanPointers: class PointersCase(NamedTuple): name: str @@ -274,6 +335,85 @@ class PointersCase(NamedTuple): ], expected_warning_regex=None, ), + PointersCase( + name="dynamodb.PutItem", + endpoint_name="dynamodb", + operation_name="PutItem", + request_parameters={ + "TableName": "some-table", + "Item": { + "some-key": {"S": "some-value"}, + }, + }, + response={ + # things we do not care about + }, + expected_pointers=[ + _SpanPointerDescription( + pointer_kind="aws.dynamodb.item", + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + pointer_hash="7f1aee721472bcb48701d45c7c7f7821", + extra_attributes={}, + ), + ], + expected_warning_regex=None, + ), + PointersCase( + name="dynamodb.PutItem with extra data", + endpoint_name="dynamodb", + operation_name="PutItem", + request_parameters={ + "TableName": "some-table", + "Item": { + "some-key": {"S": "some-value"}, + "otehr-key": {"N": "123"}, + }, + }, + response={ + # things we do not care about + }, + expected_pointers=[ + _SpanPointerDescription( + pointer_kind="aws.dynamodb.item", + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + pointer_hash="7f1aee721472bcb48701d45c7c7f7821", + extra_attributes={}, + ), + ], + expected_warning_regex=None, + ), + PointersCase( + name="dynamodb.PutItem unknown table", + endpoint_name="dynamodb", + operation_name="PutItem", + request_parameters={ + "TableName": "unknown-table", + "Item": { + "some-key": {"S": "some-value"}, + }, + }, + response={ + # things we do not care about + }, + expected_pointers=[], + expected_warning_regex=".*unknown-table.*", + ), + PointersCase( + name="dynamodb.PutItem missing primary key", + endpoint_name="dynamodb", + operation_name="PutItem", + request_parameters={ + "TableName": "some-table", + "Item": { + "other-key": {"S": "some-value"}, + }, + }, + response={ + # things we do not care about + }, + expected_pointers=[], + expected_warning_regex=".*missing primary key field: some-key", + ), ], ids=lambda case: case.name, ) @@ -284,6 +424,9 @@ def test_pointers(self, pointers_case: PointersCase) -> None: with mock.patch.object(logging.Logger, "warning") as mock_logger: assert ( extract_span_pointers_from_successful_botocore_response( + dynamodb_primary_key_names_for_tables={ + "some-table": {"some-key"}, + }, endpoint_name=pointers_case.endpoint_name, operation_name=pointers_case.operation_name, request_parameters=pointers_case.request_parameters,