Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize tracing span tags with proper error annotation #938

Merged
merged 1 commit into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions runtime/client_http_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"time"

"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/ext"
"github.com/pkg/errors"
"github.com/uber/zanzibar/v2/runtime/jsonwrapper"

Expand Down Expand Up @@ -187,9 +188,9 @@ func (req *ClientHTTPRequest) WriteBytes(
// Do will send the request out.
func (req *ClientHTTPRequest) Do() (*ClientHTTPResponse, error) {
opName := fmt.Sprintf("%s.%s(%s)", req.ClientID, req.MethodName, req.ClientTargetEndpoint)
urlTag := opentracing.Tag{Key: "URL", Value: req.httpReq.URL}
methodTag := opentracing.Tag{Key: "Method", Value: req.httpReq.Method}
span, ctx := opentracing.StartSpanFromContext(req.ctx, opName, urlTag, methodTag)
urlTag := opentracing.Tag{Key: string(ext.HTTPUrl), Value: req.httpReq.URL}
methodTag := opentracing.Tag{Key: string(ext.HTTPMethod), Value: req.httpReq.Method}
span, ctx := opentracing.StartSpanFromContext(req.ctx, opName, urlTag, methodTag, ext.SpanKindRPCClient, tracingComponentTag)
err := req.InjectSpanToHeader(span, opentracing.HTTPHeaders)
if err != nil {
/* coverage ignore next line */
Expand All @@ -207,6 +208,7 @@ func (req *ClientHTTPRequest) Do() (*ClientHTTPResponse, error) {
res, retryCount, err = req.executeDoWithRetry(ctx) // new code for retry and timeout per ep level
}

updateClientSpanWithError(span, res, err)
span.Finish()

AppendLogFieldsToContext(req.ctx, zap.Int64(fmt.Sprintf(logFieldClientAttempts, req.ClientID), retryCount))
Expand Down
9 changes: 5 additions & 4 deletions runtime/server_http_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ func (req *ServerHTTPRequest) start() {

if req.tracer != nil {
opName := fmt.Sprintf("%s.%s", req.EndpointName, req.HandlerName)
urlTag := opentracing.Tag{Key: "URL", Value: req.URL}
MethodTag := opentracing.Tag{Key: "Method", Value: req.Method}
urlTag := opentracing.Tag{Key: string(ext.HTTPUrl), Value: req.URL}
MethodTag := opentracing.Tag{Key: string(ext.HTTPMethod), Value: req.Method}
carrier := opentracing.HTTPHeadersCarrier(req.httpRequest.Header)
spanContext, err := req.tracer.Extract(opentracing.HTTPHeaders, carrier)
var span opentracing.Span
Expand All @@ -201,9 +201,10 @@ func (req *ServerHTTPRequest) start() {
/* coverage ignore next line */
req.contextLogger.WarnZ(req.Context(), "Error Extracting Trace Headers", zap.Error(err))
}
span = req.tracer.StartSpan(opName, urlTag, MethodTag)
span = req.tracer.StartSpan(opName, urlTag, MethodTag, tracingComponentTag, ext.SpanKindRPCServer)
ext.LogError(span, err)
} else {
span = req.tracer.StartSpan(opName, urlTag, MethodTag, ext.RPCServerOption(spanContext))
span = req.tracer.StartSpan(opName, urlTag, MethodTag, tracingComponentTag, ext.RPCServerOption(spanContext))
}
req.span = span
}
Expand Down
7 changes: 6 additions & 1 deletion runtime/server_http_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"time"

"github.com/buger/jsonparser"
"github.com/opentracing/opentracing-go/ext"
"github.com/pkg/errors"
"github.com/uber-go/tally"
"github.com/uber/zanzibar/v2/runtime/jsonwrapper"
Expand Down Expand Up @@ -126,14 +127,18 @@ func (res *ServerHTTPResponse) finish(ctx context.Context) {
tagged.Counter(endpointStatus).Inc(1)
}

span := res.Request.GetSpan()
logFn := res.contextLogger.Debug
if !known || res.StatusCode >= 400 && res.StatusCode < 600 {
tagged.Counter(endpointAppErrors).Inc(1)
logFn = res.contextLogger.WarnZ
if span != nil {
ext.Error.Set(span, true)
}
}

span := res.Request.GetSpan()
if span != nil {
ext.HTTPStatusCode.Set(span, uint16(res.StatusCode))
span.Finish()
}

Expand Down
25 changes: 25 additions & 0 deletions runtime/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,19 @@
package zanzibar

import (
"net/http"

"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/ext"
"github.com/uber/jaeger-client-go"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

var (
tracingComponentTag = opentracing.Tag{Key: string(ext.Component), Value: "zanzibar"}
)

// The span context struct in otel is not exportable, an this is the way to access fields in span context
// https://github.com/open-telemetry/opentelemetry-go/blob/v1.16.0/bridge/opentracing/README.md#extended-functionality
type otelSpanContextProvider interface {
Expand Down Expand Up @@ -55,3 +62,21 @@ func extractSpanLogFields(span opentracing.Span) []zap.Field {
}
return fields
}

// updateClientSpanWithError updates a client span with the error tags and logs
func updateClientSpanWithError(span opentracing.Span, res *http.Response, err error) {
if span == nil {
return
}

if res != nil {
ext.HTTPStatusCode.Set(span, uint16(res.StatusCode))
if res.StatusCode >= 400 {
ext.Error.Set(span, true)
}
}
if err != nil {
ext.Error.Set(span, true)
ext.LogError(span, err)
}
}
94 changes: 87 additions & 7 deletions runtime/tracing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@
package zanzibar

import (
"io"
"net/http"
"testing"

"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/uber/jaeger-client-go"
Expand All @@ -36,13 +40,7 @@ func TestExtractSpanLogFields(t *testing.T) {
})

t.Run("jaeger span", func(t *testing.T) {
tracer, closer, err := config.Configuration{
ServiceName: "test",
Sampler: &config.SamplerConfig{
Type: "const",
Param: 1,
}}.NewTracer(config.Reporter(jaeger.NewInMemoryReporter()))
require.NoError(t, err)
tracer, closer := createTestTracer(t)
defer closer.Close()

span := tracer.StartSpan("op")
Expand All @@ -58,3 +56,85 @@ func TestExtractSpanLogFields(t *testing.T) {
}, fields)
})
}

func TestUpdateClientSpanWithError(t *testing.T) {
tests := []struct {
name string
res *http.Response
err error
errTagExpected bool
statusCodeTagExpected bool
}{
{
name: "nil response; no err",
},
{
name: "nil response; err",
err: errors.New("some error"),
errTagExpected: true,
},
{
name: "200 response; no err",
res: &http.Response{StatusCode: http.StatusOK},
statusCodeTagExpected: true,
},
{
name: "200 response; err",
res: &http.Response{StatusCode: http.StatusOK},
err: errors.New("some error"),
errTagExpected: true,
statusCodeTagExpected: true,
},
{
name: "400 response; no err",
res: &http.Response{StatusCode: http.StatusBadRequest},
errTagExpected: true,
statusCodeTagExpected: true,
},
{
name: "400 response; err",
res: &http.Response{StatusCode: http.StatusBadRequest},
err: errors.New("some error"),
errTagExpected: true,
statusCodeTagExpected: true,
},
}

tracer, closer := createTestTracer(t)
defer closer.Close()

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
span := tracer.StartSpan("test")
updateClientSpanWithError(span, test.res, test.err)
jSpan, ok := span.(*jaeger.Span)
require.True(t, ok)

_, exist := jSpan.Tags()["error"]
assert.Equal(t, test.errTagExpected, exist)
statusCode, exist := jSpan.Tags()["http.status_code"]
assert.Equal(t, test.statusCodeTagExpected, exist)
if test.statusCodeTagExpected {
assert.Equal(t, uint16(test.res.StatusCode), statusCode)
}
if test.err != nil {
assert.Len(t, jSpan.Logs(), 1)
}
})
}
}

func TestUpdateClientSpanWithErrorNilSpan(t *testing.T) {

}

func createTestTracer(t *testing.T) (opentracing.Tracer, io.Closer) {
tracer, closer, err := config.Configuration{
ServiceName: "test",
Sampler: &config.SamplerConfig{
Type: "const",
Param: 1,
}}.NewTracer(config.Reporter(jaeger.NewInMemoryReporter()))
require.NoError(t, err)
return tracer, closer
}
125 changes: 125 additions & 0 deletions test/jaeger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ package gateway
import (
"bytes"
"context"
"fmt"
"net/http"
"testing"
"time"

"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/ext"
"github.com/stretchr/testify/assert"
"github.com/uber/jaeger-client-go"
"github.com/uber/jaeger-client-go/config"
Expand Down Expand Up @@ -94,6 +96,17 @@ func TestHTTPEndpointToHTTPClient(t *testing.T) {
assert.Equal(t, "bar.Hello(Bar::helloWorld)", clientSpan.GetOperationName())
assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName())
assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId())

assertStrTag(t, clientSpan, string(ext.SpanKind), "client")
assertStrTag(t, clientSpan, string(ext.Component), "zanzibar")
assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET")
assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), http.StatusOK)

assertStrTag(t, endpointSpan, string(ext.SpanKind), "server")
assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar")
assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET")
assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello")
assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusOK)
}

func TestHTTPEndpointToHTTPClientWithUpstreamSpan(t *testing.T) {
Expand Down Expand Up @@ -177,6 +190,84 @@ func TestHTTPEndpointToHTTPClientWithUpstreamSpan(t *testing.T) {
assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName())
assert.Equal(t, int64(upstreamSpanID), endpointSpan.GetParentSpanId())
assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId())

assertStrTag(t, clientSpan, string(ext.SpanKind), "client")
assertStrTag(t, clientSpan, string(ext.Component), "zanzibar")
assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET")
assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), http.StatusOK)

assertStrTag(t, endpointSpan, string(ext.SpanKind), "server")
assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar")
assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET")
assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello")
assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusOK)
}

func TestHTTPEndpointToHTTPClientNon200(t *testing.T) {
gateway, err := testGateway.CreateGateway(t, map[string]interface{}{
"clients.bar.serviceName": "barService",
}, &testGateway.Options{
CountMetrics: true,
KnownHTTPBackends: []string{"bar"},
TestBinary: util.DefaultMainFile("example-gateway"),
ConfigFiles: util.DefaultConfigFiles("example-gateway"),
JaegerFlushMillis: 1,
})
if !assert.NoError(t, err, "got bootstrap err") {
return
}
defer gateway.Close()

cg := gateway.(*testGateway.ChildProcessGateway)
gateway.HTTPBackends()["bar"].HandleFunc(
"GET", "/bar/hello",
func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
if _, err := w.Write([]byte(`"hello"`)); err != nil {
t.Fatal("can't write fake response")
}
},
)

_, err = gateway.MakeRequest(
"GET",
"/bar/hello",
nil, nil,
)
if !assert.NoError(t, err, "got http error") {
return
}

// Wait until all spans are flushed
time.Sleep(time.Millisecond * 100)

batches := cg.JaegerAgent.GetJaegerBatches()

var spans []*jaegerGen.Span
for _, batch := range batches {
for _, span := range batch.Spans {
spans = append(spans, span)
}
}

assert.Equal(t, 2, len(spans))
endpointSpan, clientSpan := spans[1], spans[0]
assert.Equal(t, "bar.Hello(Bar::helloWorld)", clientSpan.GetOperationName())
assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName())
assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId())

assertStrTag(t, clientSpan, string(ext.SpanKind), "client")
assertStrTag(t, clientSpan, string(ext.Component), "zanzibar")
assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET")
assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), 500)
assertBoolTag(t, clientSpan, string(ext.Error), true)

assertStrTag(t, endpointSpan, string(ext.SpanKind), "server")
assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar")
assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET")
assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello")
assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), 500)
assertBoolTag(t, endpointSpan, string(ext.Error), true)
}

func TestHTTPEndpointToTChannelClient(t *testing.T) {
Expand Down Expand Up @@ -344,6 +435,13 @@ func TestHTTPEndpointToTChannelClientWithUpstreamSpan(t *testing.T) {
assert.Equal(t, "baz.call", endpointSpan.GetOperationName())
assert.Equal(t, int64(upstreamSpanID), endpointSpan.GetParentSpanId())
assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId())

assertStrTag(t, clientSpan, string(ext.SpanKind), "client")
assertStrTag(t, endpointSpan, string(ext.SpanKind), "server")
assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar")
assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "POST")
assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/baz/call")
assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusNoContent)
}

func TestTChannelEndpoint(t *testing.T) {
Expand Down Expand Up @@ -427,3 +525,30 @@ func TestTChannelEndpoint(t *testing.T) {
assert.Equal(t, "SimpleService::Call", endpointSpan.GetOperationName())
assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId())
}

func assertStrTag(t *testing.T, span *jaegerGen.Span, key, val string) {
for _, tag := range span.GetTags() {
if tag.GetKey() == key && tag.VType == jaegerGen.TagType_STRING && tag.GetVStr() == val {
return
}
}
assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %s} not found", key, val))
}

func assertIntTag(t *testing.T, span *jaegerGen.Span, key string, val int64) {
for _, tag := range span.GetTags() {
if tag.GetKey() == key && tag.VType == jaegerGen.TagType_LONG && tag.GetVLong() == val {
return
}
}
assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %d} not found", key, val))
}

func assertBoolTag(t *testing.T, span *jaegerGen.Span, key string, val bool) {
for _, tag := range span.GetTags() {
if tag.GetKey() == key && tag.VType == jaegerGen.TagType_BOOL && tag.GetVBool() == val {
return
}
}
assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %t} not found", key, val))
}
Loading