PostHog · andrewm4894 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/posthog/urls.py b/posthog/urls.py
@@ -47,6 +47,7 @@
 from posthog.temporal.codec_server import decode_payloads
 
 from products.early_access_features.backend.api import early_access_features
+from products.llm_analytics.backend.api.otel.ingestion import otel_logs_endpoint, otel_traces_endpoint
 
 from .utils import opt_slash_path, render_template
 from .views import (
@@ -168,6 +169,10 @@ def authorize_and_redirect(request: HttpRequest) -> HttpResponse:
     # ee
     *ee_urlpatterns,
     # api
+    # OpenTelemetry traces ingestion for LLM Analytics
+    path("api/projects/<int:project_id>/ai/otel/traces", csrf_exempt(otel_traces_endpoint)),
+    # OpenTelemetry logs ingestion for LLM Analytics
+    path("api/projects/<int:project_id>/ai/otel/logs", csrf_exempt(otel_logs_endpoint)),
     path("api/environments/<int:team_id>/progress/", progress),
     path("api/environments/<int:team_id>/query/<str:query_uuid>/progress/", progress),
     path("api/environments/<int:team_id>/query/<str:query_uuid>/progress", progress),

diff --git a/products/llm_analytics/backend/api/otel/README.md b/products/llm_analytics/backend/api/otel/README.md
diff --git a/products/llm_analytics/backend/api/otel/__init__.py b/products/llm_analytics/backend/api/otel/__init__.py
@@ -0,0 +1 @@
+# OpenTelemetry traces ingestion for PostHog LLM Analytics
diff --git a/products/llm_analytics/backend/api/otel/conventions/__init__.py b/products/llm_analytics/backend/api/otel/conventions/__init__.py
@@ -0,0 +1,7 @@
+"""
+OpenTelemetry semantic conventions for LLM traces.
+
+Supports:
+- PostHog native: posthog.ai.* attributes (highest priority)
+- GenAI: gen_ai.* attributes (fallback)
+"""
diff --git a/products/llm_analytics/backend/api/otel/conventions/genai.py b/products/llm_analytics/backend/api/otel/conventions/genai.py
@@ -0,0 +1,187 @@
+"""
+GenAI semantic conventions for OpenTelemetry.
+
+Implements the GenAI semantic conventions (gen_ai.*) as fallback
+when PostHog-native attributes are not present.
+
+Supports provider-specific transformations for frameworks like Mastra
+that use custom OTEL formats.
+
+Reference: https://opentelemetry.io/docs/specs/semconv/gen-ai/
+"""
+
+from collections import defaultdict
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from .providers.base import ProviderTransformer
+
+
+def has_genai_attributes(span: dict[str, Any]) -> bool:
+    """Check if span uses GenAI semantic conventions."""
+    attributes = span.get("attributes", {})
+    return any(key.startswith("gen_ai.") for key in attributes.keys())
+
+
+def _extract_indexed_messages(attributes: dict[str, Any], prefix: str) -> list[dict[str, Any]] | None:
+    """
+    Extract indexed message attributes like gen_ai.prompt.{N}.{field} into a list of message dicts.
+
+    Args:
+        attributes: Span attributes dictionary
+        prefix: Message prefix (e.g., "gen_ai.prompt" or "gen_ai.completion")
+
+    Returns:
+        List of message dicts with role, content, etc., or None if no messages found
+    """
+    # Group attributes by index
+    messages_by_index: dict[int, dict[str, Any]] = defaultdict(dict)
+
+    for key, value in attributes.items():
+        if not key.startswith(f"{prefix}."):
+            continue
+
+        # Parse: gen_ai.prompt.0.role -> index=0, field=role
+        parts = key[len(prefix) + 1 :].split(".", 1)
+        if len(parts) != 2:
+            continue
+
+        try:
+            index = int(parts[0])
+            field = parts[1]
+            messages_by_index[index][field] = value
+        except (ValueError, IndexError):
+            continue
+
+    if not messages_by_index:
+        return None
+
+    # Convert to sorted list of messages
+    messages = []
+    for index in sorted(messages_by_index.keys()):
+        msg = messages_by_index[index]
+        if msg:  # Only include non-empty messages
+            messages.append(msg)
+
+    return messages if messages else None
+
+
+def detect_provider(span: dict[str, Any], scope: dict[str, Any] | None = None) -> "ProviderTransformer | None":
+    """
+    Detect which provider transformer handles this span.
+
+    Args:
+        span: Parsed OTEL span
+        scope: Instrumentation scope info
+
+    Returns:
+        Matching ProviderTransformer instance, or None if no provider matches
+    """
+    from .providers import PROVIDER_TRANSFORMERS
+
+    scope = scope or {}
+    for transformer_class in PROVIDER_TRANSFORMERS:
+        transformer = transformer_class()
+        if transformer.can_handle(span, scope):
+            return transformer
+    return None
+
+
+def extract_genai_attributes(span: dict[str, Any], scope: dict[str, Any] | None = None) -> dict[str, Any]:
+    """
+    Extract GenAI semantic convention attributes from span.
+
+    GenAI conventions use `gen_ai.*` prefix and are fallback
+    when PostHog-native attributes are not present.
+
+    Supports provider-specific transformations for frameworks that use
+    custom OTEL formats (e.g., Mastra).
+
+    Args:
+        span: Parsed OTEL span
+        scope: Instrumentation scope info (for provider detection)
+
+    Returns:
+        Extracted attributes dict
+    """
+    attributes = span.get("attributes", {})
+    scope = scope or {}
+    result: dict[str, Any] = {}
+
+    # Detect provider-specific transformer
+    provider_transformer = detect_provider(span, scope)
+
+    # Model (prefer request, fallback to response, then system)
+    model = (
+        attributes.get("gen_ai.request.model")
+        or attributes.get("gen_ai.response.model")
+        or attributes.get("gen_ai.model")
+    )
+    if model is not None:
+        result["model"] = model
+
+    # Provider (from gen_ai.system)
+    if (system := attributes.get("gen_ai.system")) is not None:
+        result["provider"] = system
+
+    # Operation name
+    if (operation_name := attributes.get("gen_ai.operation.name")) is not None:
+        result["operation_name"] = operation_name
+
+    # Token usage
+    if (input_tokens := attributes.get("gen_ai.usage.input_tokens")) is not None:
+        result["input_tokens"] = input_tokens
+    if (output_tokens := attributes.get("gen_ai.usage.output_tokens")) is not None:
+        result["output_tokens"] = output_tokens
+
+    # Content (prompt and completion)
+    # Try indexed messages first (gen_ai.prompt.0.role, gen_ai.prompt.0.content, etc.)
+    prompts = _extract_indexed_messages(attributes, "gen_ai.prompt")
+    if prompts:
+        result["prompt"] = prompts
+    # Fallback to direct gen_ai.prompt attribute
+    elif (prompt := attributes.get("gen_ai.prompt")) is not None:
+        # Try provider-specific transformation
+        if provider_transformer:
+            transformed = provider_transformer.transform_prompt(prompt)
+            if transformed is not None:
+                result["prompt"] = transformed
+            else:
+                result["prompt"] = prompt
+        else:
+            result["prompt"] = prompt
+
+    completions = _extract_indexed_messages(attributes, "gen_ai.completion")
+    if completions:
+        result["completion"] = completions
+    # Fallback to direct gen_ai.completion attribute
+    elif (completion := attributes.get("gen_ai.completion")) is not None:
+        # Try provider-specific transformation
+        if provider_transformer:
+            transformed = provider_transformer.transform_completion(completion)
+            if transformed is not None:
+                result["completion"] = transformed
+            else:
+                result["completion"] = completion
+        else:
+            result["completion"] = completion
+
+    # Model parameters
+    if (temperature := attributes.get("gen_ai.request.temperature")) is not None:
+        result["temperature"] = temperature
+    if (max_tokens := attributes.get("gen_ai.request.max_tokens")) is not None:
+        result["max_tokens"] = max_tokens
+    if (top_p := attributes.get("gen_ai.request.top_p")) is not None:
+        result["top_p"] = top_p
+    if (frequency_penalty := attributes.get("gen_ai.request.frequency_penalty")) is not None:
+        result["frequency_penalty"] = frequency_penalty
+    if (presence_penalty := attributes.get("gen_ai.request.presence_penalty")) is not None:
+        result["presence_penalty"] = presence_penalty
+
+    # Response metadata
+    if (finish_reasons := attributes.get("gen_ai.response.finish_reasons")) is not None:
+        result["finish_reasons"] = finish_reasons
+    if (response_id := attributes.get("gen_ai.response.id")) is not None:
+        result["response_id"] = response_id
+
+    return result
diff --git a/products/llm_analytics/backend/api/otel/conventions/posthog_native.py b/products/llm_analytics/backend/api/otel/conventions/posthog_native.py
@@ -0,0 +1,88 @@
+"""
+PostHog-native OpenTelemetry conventions.
+
+Attributes with `posthog.ai.*` prefix have highest priority in the waterfall.
+"""
+
+from typing import Any
+
+
+def has_posthog_attributes(span: dict[str, Any]) -> bool:
+    """Check if span uses PostHog native conventions."""
+    attributes = span.get("attributes", {})
+    return any(key.startswith("posthog.ai.") for key in attributes.keys())
+
+
+def extract_posthog_native_attributes(span: dict[str, Any]) -> dict[str, Any]:
+    """
+    Extract PostHog-native attributes from span.
+
+    PostHog-native convention uses `posthog.ai.*` prefix.
+    This takes highest priority in the waterfall pattern.
+    """
+    attributes = span.get("attributes", {})
+    result: dict[str, Any] = {}
+
+    # Helper to get attribute with prefix
+    def get_attr(key: str) -> Any:
+        return attributes.get(f"posthog.ai.{key}")
+
+    # Core identifiers
+    if (model := get_attr("model")) is not None:
+        result["model"] = model
+    if (provider := get_attr("provider")) is not None:
+        result["provider"] = provider
+    if (trace_id := get_attr("trace_id")) is not None:
+        result["trace_id"] = trace_id
+    if (span_id := get_attr("span_id")) is not None:
+        result["span_id"] = span_id
+    if (parent_id := get_attr("parent_id")) is not None:
+        result["parent_id"] = parent_id
+    if (session_id := get_attr("session_id")) is not None:
+        result["session_id"] = session_id
+    if (generation_id := get_attr("generation_id")) is not None:
+        result["generation_id"] = generation_id
+
+    # Token usage
+    if (input_tokens := get_attr("input_tokens")) is not None:
+        result["input_tokens"] = input_tokens
+    if (output_tokens := get_attr("output_tokens")) is not None:
+        result["output_tokens"] = output_tokens
+    if (cache_read_tokens := get_attr("cache_read_tokens")) is not None:
+        result["cache_read_tokens"] = cache_read_tokens
+    if (cache_write_tokens := get_attr("cache_write_tokens")) is not None:
+        result["cache_write_tokens"] = cache_write_tokens
+
+    # Cost
+    if (input_cost_usd := get_attr("input_cost_usd")) is not None:
+        result["input_cost_usd"] = input_cost_usd
+    if (output_cost_usd := get_attr("output_cost_usd")) is not None:
+        result["output_cost_usd"] = output_cost_usd
+    if (total_cost_usd := get_attr("total_cost_usd")) is not None:
+        result["total_cost_usd"] = total_cost_usd
+
+    # Operation
+    if (operation_name := get_attr("operation_name")) is not None:
+        result["operation_name"] = operation_name
+
+    # Content
+    if (input_content := get_attr("input")) is not None:
+        result["input"] = input_content
+    if (output_content := get_attr("output")) is not None:
+        result["output"] = output_content
+
+    # Model parameters
+    if (temperature := get_attr("temperature")) is not None:
+        result["temperature"] = temperature
+    if (max_tokens := get_attr("max_tokens")) is not None:
+        result["max_tokens"] = max_tokens
+    if (stream := get_attr("stream")) is not None:
+        result["stream"] = stream
+
+    # Error tracking
+    if (is_error := get_attr("is_error")) is not None:
+        result["is_error"] = is_error
+    if (error_message := get_attr("error_message")) is not None:
+        result["error_message"] = error_message
+
+    return result
diff --git a/products/llm_analytics/backend/api/otel/conventions/providers/__init__.py b/products/llm_analytics/backend/api/otel/conventions/providers/__init__.py
@@ -0,0 +1,23 @@
+"""
+Provider-specific OTEL transformers.
+
+Each provider (Mastra, Langchain, LlamaIndex, etc.) handles their
+specific OTEL format quirks and normalizes to PostHog format.
+"""
+
+from .base import OtelInstrumentationPattern, ProviderTransformer
+from .mastra import MastraTransformer
+
+# Registry of all available provider transformers
+# Add new providers here as they're implemented
+PROVIDER_TRANSFORMERS: list[type[ProviderTransformer]] = [
+    MastraTransformer,
+    # Future: LangchainTransformer, LlamaIndexTransformer, etc.
+]
+
+__all__ = [
+    "OtelInstrumentationPattern",
+    "ProviderTransformer",
+    "MastraTransformer",
+    "PROVIDER_TRANSFORMERS",
+]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# OpenTelemetry traces ingestion for PostHog LLM Analytics