Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider spans with exception spanEvents as errors #30064

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pkg/trace/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -686,13 +686,24 @@ func (a *Agent) runSamplers(now time.Time, ts *info.TagStats, pt traceutil.Proce

func traceContainsError(trace pb.Trace) bool {
for _, span := range trace {
if span.Error != 0 {
if spanIsError(span) {
return true
}
}
return false
}

func spanIsError(span *pb.Span) bool {
return span.Error != 0 || spanContainsExceptionSpanEvent(span)
}

func spanContainsExceptionSpanEvent(span *pb.Span) bool {
if hasExceptionSpanEvents, ok := span.Meta["_dd.span_events.has_exception"]; ok && hasExceptionSpanEvents == "true" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When will this tag be set?
When:

I ask because if the logic is consistent, it's best to implement it here, instead of letting every library implement individually.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Marco! tag is set for any exception (see otlp.go)

return true
}
return false
}

func filteredByTags(root *pb.Span, require, reject []*config.Tag, requireRegex, rejectRegex []*config.TagRegex) bool {
for _, tag := range reject {
if v, ok := root.Meta[tag.K]; ok && (tag.V == "" || v == tag.V) {
Expand Down
26 changes: 17 additions & 9 deletions pkg/trace/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1273,10 +1273,10 @@ func TestSampling(t *testing.T) {
}
}

func TestSample(t *testing.T) {
func TestSampleTrace(t *testing.T) {
now := time.Now()
cfg := &config.AgentConfig{TargetTPS: 5, ErrorTPS: 1000, Features: make(map[string]struct{})}
genSpan := func(decisionMaker string, priority sampler.SamplingPriority, err int32) traceutil.ProcessedTrace {
genSpan := func(decisionMaker string, priority sampler.SamplingPriority, err int32, exceptionInSpanEvent bool) traceutil.ProcessedTrace {
root := &pb.Span{
Service: "serv1",
Start: now.UnixNano(),
Expand All @@ -1285,6 +1285,9 @@ func TestSample(t *testing.T) {
Error: err, // If 1, the Error Sampler will keep the trace, if 0, it will not be sampled
Meta: map[string]string{},
}
if exceptionInSpanEvent {
root.Meta["_dd.span_events.has_exception"] = "true" // the Error Sampler will keep the trace
}
chunk := testutil.TraceChunkWithSpan(root)
if decisionMaker != "" {
chunk.Tags["_dd.p.dm"] = decisionMaker
Expand All @@ -1300,37 +1303,42 @@ func TestSample(t *testing.T) {
keepWithFeature bool
}{
"userdrop-error-no-dm-sampled": {
trace: genSpan("", sampler.PriorityUserDrop, 1),
trace: genSpan("", sampler.PriorityUserDrop, 1, false),
keep: false,
keepWithFeature: true,
},
"userdrop-error-manual-dm-unsampled": {
trace: genSpan("-4", sampler.PriorityUserDrop, 1),
trace: genSpan("-4", sampler.PriorityUserDrop, 1, false),
keep: false,
keepWithFeature: false,
},
"userdrop-error-agent-dm-sampled": {
trace: genSpan("-1", sampler.PriorityUserDrop, 1),
trace: genSpan("-1", sampler.PriorityUserDrop, 1, false),
keep: false,
keepWithFeature: true,
},
"userkeep-error-no-dm-sampled": {
trace: genSpan("", sampler.PriorityUserKeep, 1),
trace: genSpan("", sampler.PriorityUserKeep, 1, false),
keep: true,
keepWithFeature: true,
},
"userkeep-error-agent-dm-sampled": {
trace: genSpan("-1", sampler.PriorityUserKeep, 1),
trace: genSpan("-1", sampler.PriorityUserKeep, 1, false),
keep: true,
keepWithFeature: true,
},
"autodrop-error-sampled": {
trace: genSpan("", sampler.PriorityAutoDrop, 1),
trace: genSpan("", sampler.PriorityAutoDrop, 1, false),
keep: true,
keepWithFeature: true,
},
"autodrop-errorspanevent-sampled": {
trace: genSpan("", sampler.PriorityAutoDrop, 0, true),
keep: true,
keepWithFeature: true,
},
"autodrop-not-sampled": {
trace: genSpan("", sampler.PriorityAutoDrop, 0),
trace: genSpan("", sampler.PriorityAutoDrop, 0, false),
keep: false,
keepWithFeature: false,
},
Expand Down
6 changes: 6 additions & 0 deletions pkg/trace/api/otlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,12 @@ func (o *OTLPReceiver) convertSpan(rattr map[string]string, lib pcommon.Instrume
if in.Events().Len() > 0 {
transform.SetMetaOTLP(span, "events", transform.MarshalEvents(in.Events()))
}
for i := range in.Events().Len() {
if in.Events().At(i).Name() == "exception" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we do contains?

Copy link
Author

@Guillaume-Barrier Guillaume-Barrier Oct 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from what I can see in the otel specs, the name of an exception span event should always be "exception", is there any reason not to use the built-in string comparison operator ==?

span.Meta["_dd.span_events.has_exception"] = "true"
break
}
}
if in.Links().Len() > 0 {
transform.SetMetaOTLP(span, "_dd.span_links", transform.MarshalLinks(in.Links()))
}
Expand Down
111 changes: 57 additions & 54 deletions pkg/trace/api/otlp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1206,22 +1206,23 @@ func testOTLPConvertSpan(enableReceiveResourceSpansV2 bool, t *testing.T) {
Duration: 200000000,
Error: 1,
Meta: map[string]string{
"name": "john",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"env": "staging",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"events": `[{"time_unix_nano":123,"name":"boom","attributes":{"key":"Out of memory","accuracy":"2.4"},"dropped_attributes_count":2},{"time_unix_nano":456,"name":"exception","attributes":{"exception.message":"Out of memory","exception.type":"mem","exception.stacktrace":"1/2/3"},"dropped_attributes_count":2}]`,
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128", "attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"span.kind": "server",
"name": "john",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"env": "staging",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"events": `[{"time_unix_nano":123,"name":"boom","attributes":{"key":"Out of memory","accuracy":"2.4"},"dropped_attributes_count":2},{"time_unix_nano":456,"name":"exception","attributes":{"exception.message":"Out of memory","exception.type":"mem","exception.stacktrace":"1/2/3"},"dropped_attributes_count":2}]`,
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128", "attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"span.kind": "server",
"_dd.span_events.has_exception": "true",
},
Metrics: map[string]float64{
"approx": 1.2,
Expand Down Expand Up @@ -1331,26 +1332,27 @@ func testOTLPConvertSpan(enableReceiveResourceSpansV2 bool, t *testing.T) {
Duration: 200000000,
Error: 1,
Meta: map[string]string{
"name": "john",
"env": "prod",
"deployment.environment": "prod",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"events": "[{\"time_unix_nano\":123,\"name\":\"boom\",\"attributes\":{\"message\":\"Out of memory\",\"accuracy\":\"2.4\"},\"dropped_attributes_count\":2},{\"time_unix_nano\":456,\"name\":\"exception\",\"attributes\":{\"exception.message\":\"Out of memory\",\"exception.type\":\"mem\",\"exception.stacktrace\":\"1/2/3\"},\"dropped_attributes_count\":2}]",
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128","attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"http.method": "GET",
"http.route": "/path",
"peer.service": "userbase",
"span.kind": "server",
"name": "john",
"env": "prod",
"deployment.environment": "prod",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"events": "[{\"time_unix_nano\":123,\"name\":\"boom\",\"attributes\":{\"message\":\"Out of memory\",\"accuracy\":\"2.4\"},\"dropped_attributes_count\":2},{\"time_unix_nano\":456,\"name\":\"exception\",\"attributes\":{\"exception.message\":\"Out of memory\",\"exception.type\":\"mem\",\"exception.stacktrace\":\"1/2/3\"},\"dropped_attributes_count\":2}]",
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128","attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"http.method": "GET",
"http.route": "/path",
"peer.service": "userbase",
"span.kind": "server",
"_dd.span_events.has_exception": "true",
},
Metrics: map[string]float64{
"approx": 1.2,
Expand Down Expand Up @@ -1459,24 +1461,25 @@ func testOTLPConvertSpan(enableReceiveResourceSpansV2 bool, t *testing.T) {
Duration: 200000000,
Error: 1,
Meta: map[string]string{
"name": "john",
"env": "staging",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"events": "[{\"time_unix_nano\":123,\"name\":\"boom\",\"attributes\":{\"message\":\"Out of memory\",\"accuracy\":\"2.4\"},\"dropped_attributes_count\":2},{\"time_unix_nano\":456,\"name\":\"exception\",\"attributes\":{\"exception.message\":\"Out of memory\",\"exception.type\":\"mem\",\"exception.stacktrace\":\"1/2/3\"},\"dropped_attributes_count\":2}]",
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128","attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"http.method": "GET",
"http.route": "/path",
"span.kind": "server",
"name": "john",
"env": "staging",
"otel.status_code": "Error",
"otel.status_description": "Error",
"otel.library.name": "ddtracer",
"otel.library.version": "v2",
"service.version": "v1.2.3",
"w3c.tracestate": "state",
"version": "v1.2.3",
"otel.trace_id": "72df520af2bde7a5240031ead750e5f3",
"events": "[{\"time_unix_nano\":123,\"name\":\"boom\",\"attributes\":{\"message\":\"Out of memory\",\"accuracy\":\"2.4\"},\"dropped_attributes_count\":2},{\"time_unix_nano\":456,\"name\":\"exception\",\"attributes\":{\"exception.message\":\"Out of memory\",\"exception.type\":\"mem\",\"exception.stacktrace\":\"1/2/3\"},\"dropped_attributes_count\":2}]",
"_dd.span_links": `[{"trace_id":"fedcba98765432100123456789abcdef","span_id":"abcdef0123456789","trace_state":"dd=asdf256,ee=jkl;128","attributes":{"a1":"v1","a2":"v2"},"dropped_attributes_count":24},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","attributes":{"a3":"v2","a4":"v4"}},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210","dropped_attributes_count":2},{"trace_id":"abcdef0123456789abcdef0123456789","span_id":"fedcba9876543210"}]`,
"error.msg": "Out of memory",
"error.type": "mem",
"error.stack": "1/2/3",
"http.method": "GET",
"http.route": "/path",
"span.kind": "server",
"_dd.span_events.has_exception": "true",
},
Metrics: map[string]float64{
"approx": 1.2,
Expand Down
6 changes: 6 additions & 0 deletions pkg/trace/transform/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ func OtelSpanToDDSpan(
if otelspan.Events().Len() > 0 {
ddspan.Meta["events"] = MarshalEvents(otelspan.Events())
}
for i := range otelspan.Events().Len() {
if otelspan.Events().At(i).Name() == "exception" {
ddspan.Meta["_dd.span_events.has_exception"] = "true"
break
}
}
if otelspan.Links().Len() > 0 {
ddspan.Meta["_dd.span_links"] = MarshalLinks(otelspan.Links())
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
enhancements:
- |
APM: Consider spans with OpenTelemetry exception Span Events as error spans.
Loading