diff --git a/ddtrace/_trace/_inferred_proxy.py b/ddtrace/_trace/_inferred_proxy.py index 1cacd600844..5b3cedf09d7 100644 --- a/ddtrace/_trace/_inferred_proxy.py +++ b/ddtrace/_trace/_inferred_proxy.py @@ -1,9 +1,12 @@ +from dataclasses import dataclass import logging +from typing import Callable from typing import Dict -from typing import Union +from typing import Optional from ddtrace import config from ddtrace._trace.span import Span +from ddtrace.ext import SpanKind from ddtrace.ext import SpanTypes from ddtrace.ext import http from ddtrace.internal.constants import COMPONENT @@ -13,18 +16,64 @@ log = logging.getLogger(__name__) + +@dataclass +class ProxyHeaderContext: + system_name: str + request_time: str + method: Optional[str] + path: Optional[str] + resource_path: Optional[str] + domain_name: Optional[str] + stage: Optional[str] + account_id: Optional[str] + api_id: Optional[str] + region: Optional[str] + user: Optional[str] + useragent: Optional[str] + + +@dataclass +class ProxyInfo: + span_name: str + component: str + resource_arn_builder: Optional[Callable[[ProxyHeaderContext], Optional[str]]] = None + + +def _api_gateway_rest_api_arn(proxy_context: ProxyHeaderContext) -> Optional[str]: + if proxy_context.region and proxy_context.api_id: + return f"arn:aws:apigateway:{proxy_context.region}::/restapis/{proxy_context.api_id}" + return None + + +def _api_gateway_http_api_arn(proxy_context: ProxyHeaderContext) -> Optional[str]: + if proxy_context.region and proxy_context.api_id: + return f"arn:aws:apigateway:{proxy_context.region}::/apis/{proxy_context.api_id}" + return None + + +supported_proxies: Dict[str, ProxyInfo] = { + "aws-apigateway": ProxyInfo("aws.apigateway", "aws-apigateway", _api_gateway_rest_api_arn), + "aws-httpapi": ProxyInfo("aws.httpapi", "aws-httpapi", _api_gateway_http_api_arn), +} + +SUPPORTED_PROXY_SPAN_NAMES = {info.span_name for info in supported_proxies.values()} + # Checking lower case and upper case versions per WSGI spec following ddtrace/propagation/http.py's # logic to extract http headers POSSIBLE_PROXY_HEADER_SYSTEM = _possible_header("x-dd-proxy") POSSIBLE_PROXY_HEADER_START_TIME_MS = _possible_header("x-dd-proxy-request-time-ms") POSSIBLE_PROXY_HEADER_PATH = _possible_header("x-dd-proxy-path") +POSSIBLE_PROXY_HEADER_RESOURCE_PATH = _possible_header("x-dd-proxy-resource-path") POSSIBLE_PROXY_HEADER_HTTPMETHOD = _possible_header("x-dd-proxy-httpmethod") POSSIBLE_PROXY_HEADER_DOMAIN = _possible_header("x-dd-proxy-domain-name") POSSIBLE_PROXY_HEADER_STAGE = _possible_header("x-dd-proxy-stage") +POSSIBLE_PROXY_HEADER_ACCOUNT_ID = _possible_header("x-dd-proxy-account-id") +POSSIBLE_PROXY_HEADER_API_ID = _possible_header("x-dd-proxy-api-id") +POSSIBLE_PROXY_HEADER_REGION = _possible_header("x-dd-proxy-region") +POSSIBLE_PROXY_HEADER_USER = _possible_header("x-dd-proxy-user") -supported_proxies: Dict[str, Dict[str, str]] = { - "aws-apigateway": {"span_name": "aws.apigateway", "component": "aws-apigateway"} -} +HEADER_USERAGENT = _possible_header("user-agent") def create_inferred_proxy_span_if_headers_exist(ctx, headers, child_of, tracer) -> None: @@ -38,19 +87,23 @@ def create_inferred_proxy_span_if_headers_exist(ctx, headers, child_of, tracer) if not proxy_context: return None - proxy_span_info = supported_proxies[proxy_context["proxy_system_name"]] + proxy_info = supported_proxies[proxy_context.system_name] + + method = proxy_context.method + route_or_path = proxy_context.resource_path or proxy_context.path + resource = f"{method or ''} {route_or_path or ''}" span = tracer.start_span( - proxy_span_info["span_name"], - service=proxy_context.get("domain_name", config._get_service()), - resource=proxy_context["method"] + " " + proxy_context["path"], + proxy_info.span_name, + service=proxy_context.domain_name or config._get_service(), + resource=resource, span_type=SpanTypes.WEB, activate=True, child_of=child_of, ) - span.start_ns = int(proxy_context["request_time"]) * 1000000 + span.start_ns = int(proxy_context.request_time) * 1000000 - set_inferred_proxy_span_tags(span, proxy_context) + set_inferred_proxy_span_tags(span, proxy_context, proxy_info) # we need a callback to finish the api gateway span, this callback will be added to the child spans finish callbacks def finish_callback(_): @@ -62,24 +115,61 @@ def finish_callback(_): ctx.set_item("headers", headers) -def set_inferred_proxy_span_tags(span, proxy_context) -> Span: - span._set_tag_str(COMPONENT, supported_proxies[proxy_context["proxy_system_name"]]["component"]) +def set_inferred_proxy_span_tags(span: Span, proxy_context: ProxyHeaderContext, proxy_info: ProxyInfo) -> Span: + span._set_tag_str(COMPONENT, proxy_info.component) + span._set_tag_str("span.kind", SpanKind.SERVER) - span._set_tag_str(http.METHOD, proxy_context["method"]) - span._set_tag_str(http.URL, f"{proxy_context['domain_name']}{proxy_context['path']}") - span._set_tag_str("stage", proxy_context["stage"]) + span._set_tag_str(http.URL, f"https://{proxy_context.domain_name or ''}{proxy_context.path or ''}") + + if proxy_context.method: + span._set_tag_str(http.METHOD, proxy_context.method) + + if proxy_context.resource_path: + span._set_tag_str(http.ROUTE, proxy_context.resource_path) + + if proxy_context.useragent: + span._set_tag_str(http.USER_AGENT, proxy_context.useragent) + + if proxy_context.stage: + span._set_tag_str("stage", proxy_context.stage) + + if proxy_context.account_id: + span._set_tag_str("account_id", proxy_context.account_id) + + if proxy_context.api_id: + span._set_tag_str("apiid", proxy_context.api_id) + + if proxy_context.region: + span._set_tag_str("region", proxy_context.region) + + if proxy_context.user: + span._set_tag_str("aws_user", proxy_context.user) + + if proxy_info.resource_arn_builder: + resource_arn = proxy_info.resource_arn_builder(proxy_context) + if resource_arn: + span._set_tag_str("dd_resource_key", resource_arn) span.set_metric("_dd.inferred_span", 1) return span -def extract_inferred_proxy_context(headers) -> Union[None, Dict[str, str]]: - proxy_header_system = str(_extract_header_value(POSSIBLE_PROXY_HEADER_SYSTEM, headers)) - proxy_header_start_time_ms = str(_extract_header_value(POSSIBLE_PROXY_HEADER_START_TIME_MS, headers)) - proxy_header_path = str(_extract_header_value(POSSIBLE_PROXY_HEADER_PATH, headers)) - proxy_header_httpmethod = str(_extract_header_value(POSSIBLE_PROXY_HEADER_HTTPMETHOD, headers)) - proxy_header_domain = str(_extract_header_value(POSSIBLE_PROXY_HEADER_DOMAIN, headers)) - proxy_header_stage = str(_extract_header_value(POSSIBLE_PROXY_HEADER_STAGE, headers)) +def extract_inferred_proxy_context(headers) -> Optional[ProxyHeaderContext]: + proxy_header_system = _extract_header_value(POSSIBLE_PROXY_HEADER_SYSTEM, headers) + proxy_header_start_time_ms = _extract_header_value(POSSIBLE_PROXY_HEADER_START_TIME_MS, headers) + proxy_header_path = _extract_header_value(POSSIBLE_PROXY_HEADER_PATH, headers) + proxy_header_resource_path = _extract_header_value(POSSIBLE_PROXY_HEADER_RESOURCE_PATH, headers) + + proxy_header_httpmethod = _extract_header_value(POSSIBLE_PROXY_HEADER_HTTPMETHOD, headers) + proxy_header_domain = _extract_header_value(POSSIBLE_PROXY_HEADER_DOMAIN, headers) + proxy_header_stage = _extract_header_value(POSSIBLE_PROXY_HEADER_STAGE, headers) + + proxy_header_account_id = _extract_header_value(POSSIBLE_PROXY_HEADER_ACCOUNT_ID, headers) + proxy_header_api_id = _extract_header_value(POSSIBLE_PROXY_HEADER_API_ID, headers) + proxy_header_region = _extract_header_value(POSSIBLE_PROXY_HEADER_REGION, headers) + proxy_header_user = _extract_header_value(POSSIBLE_PROXY_HEADER_USER, headers) + + header_user_agent = _extract_header_value(HEADER_USERAGENT, headers) # Exit if start time header is not present if proxy_header_start_time_ms is None: @@ -92,14 +182,20 @@ def extract_inferred_proxy_context(headers) -> Union[None, Dict[str, str]]: ) return None - return { - "request_time": proxy_header_start_time_ms, - "method": proxy_header_httpmethod, - "path": proxy_header_path, - "stage": proxy_header_stage, - "domain_name": proxy_header_domain, - "proxy_system_name": proxy_header_system, - } + return ProxyHeaderContext( + proxy_header_system, + proxy_header_start_time_ms, + proxy_header_httpmethod, + proxy_header_path, + proxy_header_resource_path, + proxy_header_domain, + proxy_header_stage, + proxy_header_account_id, + proxy_header_api_id, + proxy_header_region, + proxy_header_user, + header_user_agent, + ) def normalize_headers(headers) -> Dict[str, str]: diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py index 408889c669c..1aad3646a11 100644 --- a/ddtrace/_trace/trace_handlers.py +++ b/ddtrace/_trace/trace_handlers.py @@ -14,6 +14,7 @@ import ddtrace from ddtrace import config +from ddtrace._trace._inferred_proxy import SUPPORTED_PROXY_SPAN_NAMES from ddtrace._trace._inferred_proxy import create_inferred_proxy_span_if_headers_exist from ddtrace._trace._span_link import SpanLinkKind as _SpanLinkKind from ddtrace._trace._span_pointer import _SpanPointerDescription @@ -244,7 +245,7 @@ def _on_web_framework_finish_request( def _set_inferred_proxy_tags(span, status_code): - if span._parent and span._parent.name == "aws.apigateway": + if span._parent and span._parent.name in SUPPORTED_PROXY_SPAN_NAMES: inferred_span = span._parent status_code = status_code if status_code else span.get_tag("http.status_code") if status_code: diff --git a/releasenotes/notes/feat-inferred-proxy-httpapi-headers-33a0d4d4c1d6c5a1.yaml b/releasenotes/notes/feat-inferred-proxy-httpapi-headers-33a0d4d4c1d6c5a1.yaml new file mode 100644 index 00000000000..7c1f07fbb40 --- /dev/null +++ b/releasenotes/notes/feat-inferred-proxy-httpapi-headers-33a0d4d4c1d6c5a1.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + tracing: Ensures inferred proxy spans for AWS API Gateway HTTP APIs are created when the + ``x-dd-proxy`` header reports ``aws-httpapi``. + AAP: Update inferred proxy span tags to ensure that inferred services are discovered by the App and API Protection API Catalog. diff --git a/tests/contrib/aiohttp/test_middleware.py b/tests/contrib/aiohttp/test_middleware.py index 0cf312f8718..a2d1b6512b7 100644 --- a/tests/contrib/aiohttp/test_middleware.py +++ b/tests/contrib/aiohttp/test_middleware.py @@ -619,7 +619,7 @@ async def test_inferred_spans_api_gateway(app_tracer, aiohttp_client, test_app, api_gateway_resource="GET /", method="GET", status_code=str(test_app["status_code"]), - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers["type"] == "distributed", distributed_trace_id=1, diff --git a/tests/contrib/asgi/test_asgi.py b/tests/contrib/asgi/test_asgi.py index 11159233547..91a439cde33 100644 --- a/tests/contrib/asgi/test_asgi.py +++ b/tests/contrib/asgi/test_asgi.py @@ -808,7 +808,7 @@ async def test_inferred_spans_api_gateway_default(scope, tracer, test_spans, app api_gateway_resource="GET /", method="GET", status_code=app_type["status_code"], - url="local/", + url="https://local/", start=1736973768, is_distributed=headers == distributed_headers, distributed_trace_id=1, diff --git a/tests/contrib/bottle/test.py b/tests/contrib/bottle/test.py index d74ceb4935d..a0ada118626 100644 --- a/tests/contrib/bottle/test.py +++ b/tests/contrib/bottle/test.py @@ -558,7 +558,7 @@ def handled_error_endpoint(): api_gateway_resource="GET /", method="GET", status_code=str(test_endpoint["status"]), - url="local/", + url="https://local/", start=1736973768, is_distributed=False, distributed_trace_id=1, diff --git a/tests/contrib/bottle/test_distributed.py b/tests/contrib/bottle/test_distributed.py index 553d3c2072b..a38e6755e12 100644 --- a/tests/contrib/bottle/test_distributed.py +++ b/tests/contrib/bottle/test_distributed.py @@ -169,7 +169,7 @@ def default_endpoint(): api_gateway_resource="GET /", method="GET", status_code=200, - url="local/", + url="https://local/", start=1736973768, is_distributed=True, distributed_trace_id=1, diff --git a/tests/contrib/cherrypy/test_middleware.py b/tests/contrib/cherrypy/test_middleware.py index 43357cf5150..f665b23bc4e 100644 --- a/tests/contrib/cherrypy/test_middleware.py +++ b/tests/contrib/cherrypy/test_middleware.py @@ -542,7 +542,7 @@ def test_inferred_spans_api_gateway_default(self): api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status"], - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers == distributed_headers, distributed_trace_id=1, diff --git a/tests/contrib/django/test_django.py b/tests/contrib/django/test_django.py index d9287703ad1..30bee527616 100644 --- a/tests/contrib/django/test_django.py +++ b/tests/contrib/django/test_django.py @@ -1952,7 +1952,7 @@ def test_inferred_spans_api_gateway_default(client, test_spans): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768.0, ) @@ -1975,7 +1975,7 @@ def test_inferred_spans_api_gateway_default(client, test_spans): api_gateway_resource="GET /", method="GET", status_code="500", - url="local/", + url="https://local/", start=1736973768.0, ) @@ -2035,7 +2035,7 @@ def test_inferred_spans_api_gateway_distributed_tracing(client, test_spans): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768.0, is_distributed=True, distributed_trace_id=1, diff --git a/tests/contrib/djangorestframework/test_djangorestframework.py b/tests/contrib/djangorestframework/test_djangorestframework.py index 866327e297d..be59e232a41 100644 --- a/tests/contrib/djangorestframework/test_djangorestframework.py +++ b/tests/contrib/djangorestframework/test_djangorestframework.py @@ -107,7 +107,7 @@ def test_inferred_spans_api_gateway_default(client, test_spans, test_endpoint, i api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status_code"], - url="local/", + url="https://local/", start=1736973768, is_distributed=headers == distributed_headers, distributed_trace_id=1, diff --git a/tests/contrib/falcon/test_distributed_tracing.py b/tests/contrib/falcon/test_distributed_tracing.py index 66e2a865f6d..5192ae1744e 100644 --- a/tests/contrib/falcon/test_distributed_tracing.py +++ b/tests/contrib/falcon/test_distributed_tracing.py @@ -122,7 +122,7 @@ def test_inferred_spans_api_gateway_distributed_tracing_enabled(self): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768.0, is_distributed=True, distributed_trace_id=1, diff --git a/tests/contrib/falcon/test_suite.py b/tests/contrib/falcon/test_suite.py index 9771158e34d..e1099d155d4 100644 --- a/tests/contrib/falcon/test_suite.py +++ b/tests/contrib/falcon/test_suite.py @@ -301,7 +301,7 @@ def test_inferred_spans_api_gateway_default(self): api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status"], - url="local/", + url="https://local/", start=1736973768.0, is_distributed=False, distributed_trace_id=1, diff --git a/tests/contrib/fastapi/test_fastapi.py b/tests/contrib/fastapi/test_fastapi.py index 15c29246314..669820e88bd 100644 --- a/tests/contrib/fastapi/test_fastapi.py +++ b/tests/contrib/fastapi/test_fastapi.py @@ -913,7 +913,7 @@ def test_inferred_spans_api_gateway(client, tracer, test_spans, test, inferred_p api_gateway_resource="GET /", method="GET", status_code=test["status_code"], - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers["type"] == "distributed", distributed_trace_id=1, diff --git a/tests/contrib/flask/test_request.py b/tests/contrib/flask/test_request.py index 2b04522ba41..73c40e701cc 100644 --- a/tests/contrib/flask/test_request.py +++ b/tests/contrib/flask/test_request.py @@ -309,7 +309,7 @@ def error_status_code(): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768, ) @@ -330,7 +330,7 @@ def error_status_code(): api_gateway_resource="GET /", method="GET", status_code="500", - url="local/", + url="https://local/", start=1736973768, ) @@ -351,7 +351,7 @@ def error_status_code(): api_gateway_resource="GET /", method="GET", status_code="599", - url="local/", + url="https://local/", start=1736973768, ) @@ -405,7 +405,7 @@ def index(): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768, is_distributed=True, distributed_trace_id=1, diff --git a/tests/contrib/molten/test_molten.py b/tests/contrib/molten/test_molten.py index 33a15ef50d9..6fda3ce3711 100644 --- a/tests/contrib/molten/test_molten.py +++ b/tests/contrib/molten/test_molten.py @@ -494,7 +494,7 @@ def test_inferred_spans_api_gateway_default(self): api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status"], - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers == distributed_headers, distributed_trace_id=1, diff --git a/tests/contrib/pyramid/test_pyramid.py b/tests/contrib/pyramid/test_pyramid.py index dc67027ec22..2ef34973c26 100644 --- a/tests/contrib/pyramid/test_pyramid.py +++ b/tests/contrib/pyramid/test_pyramid.py @@ -365,7 +365,7 @@ def test_inferred_spans_api_gateway_default(self): api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status"], - url="local/", + url="https://local/", start=1736973768, ) else: diff --git a/tests/contrib/sanic/test_sanic.py b/tests/contrib/sanic/test_sanic.py index 5be8c8bb01c..62c6ccf1630 100644 --- a/tests/contrib/sanic/test_sanic.py +++ b/tests/contrib/sanic/test_sanic.py @@ -588,7 +588,7 @@ async def test_inferred_spans_api_gateway_default( api_gateway_resource="GET /", method="GET", status_code=test["status_code"], - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers["type"] == "distributed", distributed_trace_id=1, diff --git a/tests/contrib/starlette/test_starlette.py b/tests/contrib/starlette/test_starlette.py index 8047106f4a5..c5d73e93036 100644 --- a/tests/contrib/starlette/test_starlette.py +++ b/tests/contrib/starlette/test_starlette.py @@ -631,6 +631,6 @@ def test_inferred_spans_api_gateway(client, test_spans): api_gateway_resource="GET /", method="GET", status_code="200", - url="local/", + url="https://local/", start=1736973768, ) diff --git a/tests/contrib/tornado/test_tornado_web.py b/tests/contrib/tornado/test_tornado_web.py index cbb4d2c7785..4b0ba204e17 100644 --- a/tests/contrib/tornado/test_tornado_web.py +++ b/tests/contrib/tornado/test_tornado_web.py @@ -689,7 +689,7 @@ def test_inferred_spans_api_gateway(self): api_gateway_resource="GET /", method="GET", status_code=test_endpoint["status"], - url="local/", + url="https://local/", start=1736973768, is_distributed=test_headers == distributed_headers, distributed_trace_id=1, diff --git a/tests/tracer/test_inferred_proxy.py b/tests/tracer/test_inferred_proxy.py new file mode 100644 index 00000000000..17722f236dc --- /dev/null +++ b/tests/tracer/test_inferred_proxy.py @@ -0,0 +1,52 @@ +import pytest + +from ddtrace._trace._inferred_proxy import SUPPORTED_PROXY_SPAN_NAMES +from ddtrace._trace._inferred_proxy import create_inferred_proxy_span_if_headers_exist +from ddtrace.internal.core import ExecutionContext +from tests.utils import DummyTracer + + +@pytest.mark.parametrize( + "proxy_header,span_name", [("aws-httpapi", "aws.httpapi"), ("aws-apigateway", "aws.apigateway")] +) +def test_create_inferred_proxy_span_for_apigateway(proxy_header, span_name): + tracer = DummyTracer() + ctx = ExecutionContext("test") + headers = { + "x-dd-proxy": proxy_header, + "x-dd-proxy-request-time-ms": "1736973768000", + "x-dd-proxy-path": "/http-api-path", + "x-dd-proxy-resource-path": "/{Path}", + "x-dd-proxy-httpmethod": "POST", + "x-dd-proxy-domain-name": "id.execute-api.us-east-1.amazonaws.com", + "x-dd-proxy-stage": "prod", + "x-dd-proxy-account-id": "123456789012", + "x-dd-proxy-api-id": "abcdef123456", + "x-dd-proxy-region": "us-east-1", + "x-dd-proxy-user": "apigw-user", + } + + create_inferred_proxy_span_if_headers_exist(ctx, headers, child_of=None, tracer=tracer) + + span = ctx.get_item("inferred_proxy_span") + assert span is not None + assert span.name == span_name + assert span.name in SUPPORTED_PROXY_SPAN_NAMES + assert span.resource == "POST /{Path}" + assert span.service == "id.execute-api.us-east-1.amazonaws.com" + assert span.start_ns == 1736973768000 * 1000000 + assert span.get_tag("component") == proxy_header + assert span.get_tag("http.method") == "POST" + assert span.get_tag("http.url") == "https://id.execute-api.us-east-1.amazonaws.com/http-api-path" + assert span.get_tag("http.route") == "/{Path}" + assert span.get_tag("stage") == "prod" + assert span.get_tag("account_id") == "123456789012" + assert span.get_tag("apiid") == "abcdef123456" + assert span.get_tag("region") == "us-east-1" + assert span.get_tag("aws_user") == "apigw-user" + if proxy_header == "aws-httpapi": + assert span.get_tag("dd_resource_key") == "arn:aws:apigateway:us-east-1::/apis/abcdef123456" + elif proxy_header == "aws-apigateway": + assert span.get_tag("dd_resource_key") == "arn:aws:apigateway:us-east-1::/restapis/abcdef123456" + + assert ctx.get_item("inferred_proxy_finish_callback") is not None