Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions backend/db/clickhouse/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def insert_traces_batch(self, traces: list[dict[str, Any]]) -> None:
t.get("release"),
t.get("input"),
t.get("output"),
t.get("metadata"),
now, # ch_create_time
now, # ch_update_time
]
Expand All @@ -67,6 +68,7 @@ def insert_traces_batch(self, traces: list[dict[str, Any]]) -> None:
"release",
"input",
"output",
"metadata",
"ch_create_time",
"ch_update_time",
],
Expand Down Expand Up @@ -100,6 +102,7 @@ def insert_spans_batch(self, spans: list[dict[str, Any]]) -> None:
s.get("input"),
s.get("output"),
s.get("environment", "default"),
s.get("metadata"),
now, # ch_create_time
now, # ch_update_time
]
Expand Down Expand Up @@ -127,6 +130,7 @@ def insert_spans_batch(self, spans: list[dict[str, Any]]) -> None:
"input",
"output",
"environment",
"metadata",
"ch_create_time",
"ch_update_time",
],
Expand Down
1 change: 1 addition & 0 deletions backend/db/clickhouse/migrations/001_create_traces.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ CREATE TABLE IF NOT EXISTS traces
release Nullable(String),
input Nullable(String) CODEC(ZSTD(3)),
output Nullable(String) CODEC(ZSTD(3)),
metadata Nullable(String) CODEC(ZSTD(3)),
ch_create_time DateTime64(3) DEFAULT now64(3),
ch_update_time DateTime64(3) DEFAULT now64(3)
)
Expand Down
1 change: 1 addition & 0 deletions backend/db/clickhouse/migrations/002_create_spans.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ CREATE TABLE IF NOT EXISTS spans
input Nullable(String) CODEC(ZSTD(3)),
output Nullable(String) CODEC(ZSTD(3)),
environment String DEFAULT 'default',
metadata Nullable(String) CODEC(ZSTD(3)),
ch_create_time DateTime64(3) DEFAULT now64(3),
ch_update_time DateTime64(3) DEFAULT now64(3)
)
Expand Down
2 changes: 2 additions & 0 deletions backend/rest/config/traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class SpanResponse(BaseModel):
total_tokens: int | None
input: str | None
output: str | None
metadata: str | None


class TraceListItem(BaseModel):
Expand Down Expand Up @@ -70,4 +71,5 @@ class TraceDetailResponse(BaseModel):
release: str | None
input: str | None
output: str | None
metadata: str | None
spans: list[SpanResponse]
6 changes: 4 additions & 2 deletions backend/rest/services/trace_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_trace(self, project_id: str, trace_id: str) -> dict | None:
trace_query = """
SELECT
trace_id, project_id, name, trace_start_time,
user_id, session_id, environment, release, input, output
user_id, session_id, environment, release, input, output, metadata
FROM traces FINAL
WHERE project_id = {project_id:String} AND trace_id = {trace_id:String}
LIMIT 1
Expand All @@ -160,6 +160,7 @@ def get_trace(self, project_id: str, trace_id: str) -> dict | None:
"release": row[7],
"input": row[8],
"output": row[9],
"metadata": row[10],
}

# Fetch spans
Expand All @@ -168,7 +169,7 @@ def get_trace(self, project_id: str, trace_id: str) -> dict | None:
span_id, trace_id, parent_span_id, name, span_kind,
span_start_time, span_end_time, status, status_message,
model_name, cost, input_tokens, output_tokens, total_tokens,
input, output
input, output, metadata
FROM spans FINAL
WHERE project_id = {project_id:String} AND trace_id = {trace_id:String}
ORDER BY span_start_time ASC
Expand Down Expand Up @@ -198,6 +199,7 @@ def get_trace(self, project_id: str, trace_id: str) -> dict | None:
"total_tokens": int(row[13]) if row[13] is not None else None,
"input": row[14],
"output": row[15],
"metadata": row[16],
}
)

Expand Down
56 changes: 56 additions & 0 deletions backend/worker/otel_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,35 @@

logger = logging.getLogger(__name__)

# Attributes that are already extracted into dedicated fields
_KNOWN_ATTRIBUTE_PREFIXES = {
"traceroot.span.input",
"traceroot.span.output",
"traceroot.span.type",
"traceroot.span.metadata",
"traceroot.span.tags",
"traceroot.llm.",
"traceroot.trace.",
"traceroot.environment",
"traceroot.version",
"openinference.span.kind",
"session.id",
"session.user_id",
"user.id",
"input.value",
"output.value",
"gen_ai.",
"llm.token_count.",
"llm.model_name",
"llm.input_messages",
"llm.output_messages",
}


def _is_known_attribute(key: str) -> bool:
"""Check if an attribute key is already extracted into a dedicated field."""
return any(key == prefix or key.startswith(prefix) for prefix in _KNOWN_ATTRIBUTE_PREFIXES)


def decode_otel_id(b64_value: str | None) -> str | None:
"""Decode base64-encoded OTEL trace/span ID to hex string.
Expand Down Expand Up @@ -367,6 +396,24 @@ def transform_otel_to_clickhouse(
if usage["cost"] is not None:
span_record["cost"] = usage["cost"]

# Extract metadata
# Priority: explicit traceroot.span.metadata > remaining attributes
explicit_metadata = span_attrs.get("traceroot.span.metadata")
if explicit_metadata is not None:
if isinstance(explicit_metadata, str):
span_record["metadata"] = explicit_metadata
else:
span_record["metadata"] = json.dumps(explicit_metadata)
else:
# Collect non-internal attributes as metadata
extra_attrs = {
k: v
for k, v in span_attrs.items()
if not _is_known_attribute(k) and v is not None
}
if extra_attrs:
span_record["metadata"] = json.dumps(extra_attrs)

# Check span status for errors
status = otel_span.get("status", {})
status_code = status.get("code", 0)
Expand Down Expand Up @@ -416,6 +463,15 @@ def transform_otel_to_clickhouse(
"environment": environment,
}

# Extract trace-level metadata
trace_metadata = span_attrs.get("traceroot.trace.metadata")
if trace_metadata is not None:
traces[trace_id]["metadata"] = (
json.dumps(trace_metadata)
if not isinstance(trace_metadata, str)
else trace_metadata
)

# Root span input/output becomes trace input/output
if span_input is not None:
traces[trace_id]["input"] = (
Expand Down
Empty file.
162 changes: 162 additions & 0 deletions backend/worker/tests/test_otel_transform_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""Tests for metadata extraction in otel_transform."""

import base64
import json

from worker.otel_transform import transform_otel_to_clickhouse


def _make_trace_id() -> str:
"""Return a base64-encoded 16-byte trace ID."""
return base64.b64encode(b"\x01" * 16).decode()


def _make_span_id(byte: int = 0x02) -> str:
"""Return a base64-encoded 8-byte span ID."""
return base64.b64encode(bytes([byte] * 8)).decode()


def _attr(key: str, value) -> dict:
"""Build an OTEL attribute entry."""
if isinstance(value, str):
return {"key": key, "value": {"stringValue": value}}
if isinstance(value, bool):
return {"key": key, "value": {"boolValue": value}}
if isinstance(value, int):
return {"key": key, "value": {"intValue": str(value)}}
if isinstance(value, float):
return {"key": key, "value": {"doubleValue": value}}
# Fall back to stringValue for dicts serialised as JSON
return {
"key": key,
"value": {"stringValue": json.dumps(value) if not isinstance(value, str) else value},
}


def _otel_payload(span_attributes: list[dict], *, parent_span_id: str | None = None) -> dict:
"""Build a minimal OTEL payload with one resource span containing one span."""
span = {
"traceId": _make_trace_id(),
"spanId": _make_span_id(),
"name": "test-span",
"kind": "SPAN_KIND_INTERNAL",
"startTimeUnixNano": "1700000000000000000",
"endTimeUnixNano": "1700000001000000000",
"attributes": span_attributes,
"status": {},
}
if parent_span_id is not None:
span["parentSpanId"] = parent_span_id
return {
"resourceSpans": [
{
"resource": {"attributes": []},
"scopeSpans": [{"scope": {"name": "test"}, "spans": [span]}],
}
]
}


# ── Tests ──────────────────────────────────────────────────────────


def test_explicit_metadata_extracted():
"""traceroot.span.metadata attribute is captured as span metadata."""
meta = {"custom_key": "custom_value", "run_id": 42}
payload = _otel_payload([_attr("traceroot.span.metadata", json.dumps(meta))])

_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")

assert len(spans) == 1
assert "metadata" in spans[0]
assert json.loads(spans[0]["metadata"]) == meta


def test_extra_attributes_become_metadata():
"""Custom attributes not in the known set appear as metadata."""
payload = _otel_payload(
[
_attr("my.custom.attr", "hello"),
_attr("another.thing", "world"),
]
)

_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")

assert len(spans) == 1
meta = json.loads(spans[0]["metadata"])
assert meta["my.custom.attr"] == "hello"
assert meta["another.thing"] == "world"


def test_known_attributes_excluded_from_metadata():
"""Known attributes (traceroot.span.input, gen_ai.*, etc.) do NOT leak into metadata."""
payload = _otel_payload(
[
_attr("traceroot.span.input", "some input"),
_attr("gen_ai.system", "openai"),
_attr("llm.model_name", "gpt-4"),
_attr("input.value", "hi"),
_attr("openinference.span.kind", "LLM"),
_attr("session.id", "s-1"),
_attr("user.id", "u-1"),
_attr("llm.input_messages.0.message.role", "user"),
_attr("llm.input_messages.0.message.content", "hello"),
_attr("llm.output_messages.0.message.role", "assistant"),
_attr("llm.output_messages.0.message.content", "hi there"),
# One unknown attribute so we can verify metadata dict exists but excludes known keys
_attr("my.custom.flag", "yes"),
]
)

_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")

meta = json.loads(spans[0]["metadata"])
assert "my.custom.flag" in meta
# None of the known keys should be present
for key in (
"traceroot.span.input",
"gen_ai.system",
"llm.model_name",
"input.value",
"openinference.span.kind",
"session.id",
"user.id",
"llm.input_messages.0.message.role",
"llm.input_messages.0.message.content",
"llm.output_messages.0.message.role",
"llm.output_messages.0.message.content",
):
assert key not in meta, f"{key} should not appear in metadata"


def test_trace_metadata_extracted():
"""traceroot.trace.metadata on root span populates the trace record."""
trace_meta = {"experiment": "v2", "dataset": "eval-100"}
payload = _otel_payload(
[
_attr("traceroot.trace.metadata", json.dumps(trace_meta)),
]
)

traces, _spans = transform_otel_to_clickhouse(payload, project_id="proj-1")

assert len(traces) == 1
assert "metadata" in traces[0]
assert json.loads(traces[0]["metadata"]) == trace_meta


def test_no_metadata_when_no_extra_attributes():
"""When only known attributes exist, metadata is not set on the span."""
payload = _otel_payload(
[
_attr("traceroot.span.input", "hello"),
_attr("traceroot.span.output", "world"),
_attr("traceroot.span.type", "LLM"),
]
)

_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")

assert len(spans) == 1
assert "metadata" not in spans[0]
11 changes: 11 additions & 0 deletions frontend/ui/src/features/traces/components/JsonRenderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ export function JsonRenderer({ value, depth = 0 }: JsonRendererProps) {
}

if (typeof value === "string") {
// Try to parse JSON strings and render them as structured objects
if (value.startsWith("{") || value.startsWith("[")) {
try {
const parsed = JSON.parse(value);
if (typeof parsed === "object" && parsed !== null && depth < 10) {
return <JsonRenderer value={parsed} depth={depth} />;
}
} catch {
// Not valid JSON, render as plain string
}
}
return (
<span className="whitespace-pre-wrap break-words text-green-700 dark:text-green-400">
&quot;{value}&quot;
Expand Down
10 changes: 10 additions & 0 deletions frontend/ui/src/features/traces/components/SpanInfoPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export function SpanInfoPanel({ projectId, trace, selection, onClose }: SpanInfo
const timestamp = isTrace ? trace.trace_start_time : selection.span.span_start_time;
const input = isTrace ? trace.input : selection.span.input;
const output = isTrace ? trace.output : selection.span.output;
const metadata = isTrace ? trace.metadata : selection.span.metadata;

// Trace-level aggregates
const traceTotalCost = isTrace ? getTraceTotalCost(trace) : null;
Expand Down Expand Up @@ -198,6 +199,15 @@ export function SpanInfoPanel({ projectId, trace, selection, onClose }: SpanInfo
>
<ContentRenderer content={output} />
</ExpandableSection>

{/* Metadata */}
<ExpandableSection
title="Metadata"
defaultOpen={true}
onCopy={metadata ? () => copyToClipboard(metadata) : undefined}
>
<ContentRenderer content={metadata} />
</ExpandableSection>
</div>
</div>
);
Expand Down
2 changes: 2 additions & 0 deletions frontend/ui/src/types/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ export interface Span {
total_tokens: number | null;
input: string | null;
output: string | null;
metadata: string | null;
}

export interface TraceDetail {
Expand All @@ -121,6 +122,7 @@ export interface TraceDetail {
release: string | null;
input: string | null;
output: string | null;
metadata: string | null;
spans: Span[];
}

Expand Down
Loading
Loading