-
Notifications
You must be signed in to change notification settings - Fork 77
Expand file tree
/
Copy pathtest_otel_transform_metadata.py
More file actions
162 lines (130 loc) · 5.48 KB
/
test_otel_transform_metadata.py
File metadata and controls
162 lines (130 loc) · 5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""Tests for metadata extraction in otel_transform."""
import base64
import json
from worker.otel_transform import transform_otel_to_clickhouse
def _make_trace_id() -> str:
"""Return a base64-encoded 16-byte trace ID."""
return base64.b64encode(b"\x01" * 16).decode()
def _make_span_id(byte: int = 0x02) -> str:
"""Return a base64-encoded 8-byte span ID."""
return base64.b64encode(bytes([byte] * 8)).decode()
def _attr(key: str, value) -> dict:
"""Build an OTEL attribute entry."""
if isinstance(value, str):
return {"key": key, "value": {"stringValue": value}}
if isinstance(value, bool):
return {"key": key, "value": {"boolValue": value}}
if isinstance(value, int):
return {"key": key, "value": {"intValue": str(value)}}
if isinstance(value, float):
return {"key": key, "value": {"doubleValue": value}}
# Fall back to stringValue for dicts serialised as JSON
return {
"key": key,
"value": {"stringValue": json.dumps(value) if not isinstance(value, str) else value},
}
def _otel_payload(span_attributes: list[dict], *, parent_span_id: str | None = None) -> dict:
"""Build a minimal OTEL payload with one resource span containing one span."""
span = {
"traceId": _make_trace_id(),
"spanId": _make_span_id(),
"name": "test-span",
"kind": "SPAN_KIND_INTERNAL",
"startTimeUnixNano": "1700000000000000000",
"endTimeUnixNano": "1700000001000000000",
"attributes": span_attributes,
"status": {},
}
if parent_span_id is not None:
span["parentSpanId"] = parent_span_id
return {
"resourceSpans": [
{
"resource": {"attributes": []},
"scopeSpans": [{"scope": {"name": "test"}, "spans": [span]}],
}
]
}
# ── Tests ──────────────────────────────────────────────────────────
def test_explicit_metadata_extracted():
"""traceroot.span.metadata attribute is captured as span metadata."""
meta = {"custom_key": "custom_value", "run_id": 42}
payload = _otel_payload([_attr("traceroot.span.metadata", json.dumps(meta))])
_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")
assert len(spans) == 1
assert "metadata" in spans[0]
assert json.loads(spans[0]["metadata"]) == meta
def test_extra_attributes_become_metadata():
"""Custom attributes not in the known set appear as metadata."""
payload = _otel_payload(
[
_attr("my.custom.attr", "hello"),
_attr("another.thing", "world"),
]
)
_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")
assert len(spans) == 1
meta = json.loads(spans[0]["metadata"])
assert meta["my.custom.attr"] == "hello"
assert meta["another.thing"] == "world"
def test_known_attributes_excluded_from_metadata():
"""Known attributes (traceroot.span.input, gen_ai.*, etc.) do NOT leak into metadata."""
payload = _otel_payload(
[
_attr("traceroot.span.input", "some input"),
_attr("gen_ai.system", "openai"),
_attr("llm.model_name", "gpt-4"),
_attr("input.value", "hi"),
_attr("openinference.span.kind", "LLM"),
_attr("session.id", "s-1"),
_attr("user.id", "u-1"),
_attr("llm.input_messages.0.message.role", "user"),
_attr("llm.input_messages.0.message.content", "hello"),
_attr("llm.output_messages.0.message.role", "assistant"),
_attr("llm.output_messages.0.message.content", "hi there"),
# One unknown attribute so we can verify metadata dict exists but excludes known keys
_attr("my.custom.flag", "yes"),
]
)
_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")
meta = json.loads(spans[0]["metadata"])
assert "my.custom.flag" in meta
# None of the known keys should be present
for key in (
"traceroot.span.input",
"gen_ai.system",
"llm.model_name",
"input.value",
"openinference.span.kind",
"session.id",
"user.id",
"llm.input_messages.0.message.role",
"llm.input_messages.0.message.content",
"llm.output_messages.0.message.role",
"llm.output_messages.0.message.content",
):
assert key not in meta, f"{key} should not appear in metadata"
def test_trace_metadata_extracted():
"""traceroot.trace.metadata on root span populates the trace record."""
trace_meta = {"experiment": "v2", "dataset": "eval-100"}
payload = _otel_payload(
[
_attr("traceroot.trace.metadata", json.dumps(trace_meta)),
]
)
traces, _spans = transform_otel_to_clickhouse(payload, project_id="proj-1")
assert len(traces) == 1
assert "metadata" in traces[0]
assert json.loads(traces[0]["metadata"]) == trace_meta
def test_no_metadata_when_no_extra_attributes():
"""When only known attributes exist, metadata is not set on the span."""
payload = _otel_payload(
[
_attr("traceroot.span.input", "hello"),
_attr("traceroot.span.output", "world"),
_attr("traceroot.span.type", "LLM"),
]
)
_traces, spans = transform_otel_to_clickhouse(payload, project_id="proj-1")
assert len(spans) == 1
assert "metadata" not in spans[0]