diff --git a/otelx/jaeger.go b/otelx/jaeger.go index 0ad07a2c..84972e65 100644 --- a/otelx/jaeger.go +++ b/otelx/jaeger.go @@ -18,9 +18,15 @@ import ( "go.opentelemetry.io/otel/trace" ) -// Optionally, Config.Providers.Jaeger.LocalAgentAddress can be set. -// NOTE: If Config.Providers.Jaeger.Sampling.ServerURL is not specfied, -// AlwaysSample is used. +// SetupJaeger configures and returns a Jaeger tracer. +// +// The returned tracer will by default attempt to send spans to a local Jaeger agent. +// Optionally, [otelx.JaegerConfig.LocalAgentAddress] can be set to specify a different target. +// +// By default, unless a parent sampler has taken a sampling decision, every span is sampled. +// [otelx.JaegerSampling.TraceIdRatio] may be used to customize the sampling probability, +// optionally alongside [otelx.JaegerSampling.ServerURL] to consult a remote server +// for the sampling strategy to be used. func SetupJaeger(t *Tracer, tracerName string, c *Config) (trace.Tracer, error) { host, port, err := net.SplitHostPort(c.Providers.Jaeger.LocalAgentAddress) if err != nil { @@ -45,30 +51,35 @@ func SetupJaeger(t *Tracer, tracerName string, c *Config) (trace.Tracer, error) } samplingServerURL := c.Providers.Jaeger.Sampling.ServerURL + traceIdRatio := c.Providers.Jaeger.Sampling.TraceIdRatio + + sampler := sdktrace.TraceIDRatioBased(traceIdRatio) if samplingServerURL != "" { - jaegerRemoteSampler := jaegerremote.New( + sampler = jaegerremote.New( "jaegerremote", jaegerremote.WithSamplingServerURL(samplingServerURL), - jaegerremote.WithInitialSampler(sdktrace.TraceIDRatioBased(c.Providers.Jaeger.Sampling.TraceIdRatio)), + jaegerremote.WithInitialSampler(sampler), ) - tpOpts = append(tpOpts, sdktrace.WithSampler(jaegerRemoteSampler)) - } else { - tpOpts = append(tpOpts, sdktrace.WithSampler(sdktrace.AlwaysSample())) } + + // Respect any sampling decision taken by the client. + sampler = sdktrace.ParentBased(sampler) + tpOpts = append(tpOpts, sdktrace.WithSampler(sampler)) + tp := sdktrace.NewTracerProvider(tpOpts...) otel.SetTracerProvider(tp) // At the moment, software across our cloud stack only support Zipkin (B3) - // and Jaeger propagation formats. Proposals for standardized formats for - // context propagation are in the works (ref: https://www.w3.org/TR/trace-context/ + // and Jaeger propagation formats. For interoperability with other setups, + // we also configure propagation using standardized formats for + // context propagation (ref: https://www.w3.org/TR/trace-context/ // and https://www.w3.org/TR/baggage/). - // - // Simply add propagation.TraceContext{} and propagation.Baggage{} - // here to enable those as well. prop := propagation.NewCompositeTextMapPropagator( jaegerPropagator.Jaeger{}, b3.New(b3.WithInjectEncoding(b3.B3MultipleHeader|b3.B3SingleHeader)), + propagation.TraceContext{}, + propagation.Baggage{}, ) otel.SetTextMapPropagator(prop) return tp.Tracer(tracerName), nil diff --git a/otelx/otel_test.go b/otelx/otel_test.go index 09d8e7af..f6b60991 100644 --- a/otelx/otel_test.go +++ b/otelx/otel_test.go @@ -13,12 +13,16 @@ import ( "net/http" "net/http/httptest" "net/url" + "strings" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/trace" "golang.org/x/sync/errgroup" "google.golang.org/protobuf/proto" @@ -65,8 +69,8 @@ type zipkinSpanRequest struct { Tags map[string]string } -func TestJaegerTracer(t *testing.T) { - done := make(chan struct{}) +// runTestJaegerAgent starts a mock server listening on a random port for Jaeger spans sent over UDP. +func runTestJaegerAgent(t *testing.T, errs *errgroup.Group, done chan<- struct{}) net.Conn { addr := "127.0.0.1:0" udpAddr, err := net.ResolveUDPAddr("udp", addr) @@ -75,8 +79,6 @@ func TestJaegerTracer(t *testing.T) { srv, err := net.ListenUDP("udp", udpAddr) require.NoError(t, err) - errs := errgroup.Group{} - errs.Go(func() error { t.Logf("Starting test UDP server for Jaeger spans on %s", srv.LocalAddr().String()) @@ -91,7 +93,7 @@ func TestJaegerTracer(t *testing.T) { continue } if len(buf) != 0 { - t.Log("recieved span!") + t.Log("received span!") done <- struct{}{} } break @@ -99,12 +101,24 @@ func TestJaegerTracer(t *testing.T) { return nil }) + return srv +} + +func TestJaegerTracer(t *testing.T) { + done := make(chan struct{}) + errs := errgroup.Group{} + + srv := runTestJaegerAgent(t, &errs, done) + jt, err := New(testTracingComponent, logrusx.New("ory/x", "1"), &Config{ ServiceName: "Ory X", Provider: "jaeger", Providers: ProvidersConfig{ Jaeger: JaegerConfig{ LocalAgentAddress: srv.LocalAddr().String(), + Sampling: JaegerSampling{ + TraceIdRatio: 1, + }, }, }, }) @@ -123,6 +137,52 @@ func TestJaegerTracer(t *testing.T) { require.NoError(t, errs.Wait()) } +func TestJaegerTracerRespectsParentSamplingDecision(t *testing.T) { + done := make(chan struct{}) + errs := errgroup.Group{} + + srv := runTestJaegerAgent(t, &errs, done) + + jt, err := New(testTracingComponent, logrusx.New("ory/x", "1"), &Config{ + ServiceName: "Ory X", + Provider: "jaeger", + Providers: ProvidersConfig{ + Jaeger: JaegerConfig{ + LocalAgentAddress: srv.LocalAddr().String(), + Sampling: JaegerSampling{ + // Effectively disable local sampling. + TraceIdRatio: 0, + }, + }, + }, + }) + require.NoError(t, err) + + traceId := strings.Repeat("a", 32) + spanId := strings.Repeat("b", 16) + sampledFlag := "1" + traceHeaders := map[string]string{"uber-trace-id": traceId + ":" + spanId + ":0:" + sampledFlag} + + ctx := otel.GetTextMapPropagator().Extract(context.Background(), propagation.MapCarrier(traceHeaders)) + spanContext := trace.SpanContextFromContext(ctx) + + assert.True(t, spanContext.IsValid()) + assert.True(t, spanContext.IsSampled()) + assert.True(t, spanContext.IsRemote()) + + trc := jt.Tracer() + _, span := trc.Start(ctx, "testSpan", trace.WithLinks(trace.Link{SpanContext: spanContext})) + span.SetAttributes(attribute.Bool("testAttribute", true)) + span.End() + + select { + case <-done: + case <-time.After(15 * time.Second): + t.Fatalf("Test server did not receive spans") + } + require.NoError(t, errs.Wait()) +} + func TestZipkinTracer(t *testing.T) { done := make(chan struct{}) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {