Skip to content

Commit

Permalink
feat(backendconnection): use host IP/host port to talk to collector
Browse files Browse the repository at this point in the history
This ensures that traffic is routed to the collector pod on the same
node.
  • Loading branch information
basti1302 committed Aug 23, 2024
1 parent e5b3366 commit f193ae3
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 89 deletions.
18 changes: 9 additions & 9 deletions internal/backendconnection/otelcolresources/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ func (c *oTelColConfig) hasAuthentication() bool {
type exportProtocol string

const (
OtlpGrpcHostPort = 40317
OtlpHttpHostPort = 40318
// ^ We deliberately do not use the default grpc/http ports as host ports. If there is another OTel collector
// daemonset in the cluster (which is not managed by the operator), it will very likely use the 4317/4318 as host
// ports. When the operator creates its daemonset, the pods of one of the two otelcol daemonsets will fail to start
// due to port conflicts.

grpcExportProtocol exportProtocol = "grpc"
httpExportProtocol exportProtocol = "http"
rbacApiVersion = "rbac.authorization.k8s.io/v1"
Expand Down Expand Up @@ -84,13 +91,6 @@ const (
otlpGrpcPort = 4317
otlpHttpPort = 4318

// We deliberately do not use the default grpc/http ports as host ports. If there is another OTel collector
// daemonset in the cluster (which is not managed by the operator), it will very likely use the 4317/4318 as host
// ports. When the operator creates its daemonset, the pods of one of the two otelcol daemonsets will fail to start
// due to port conflicts.
otlpGrpcHostPort = 40317
otlpHttpHostPort = 40318

probesHttpPort = 13133
)

Expand Down Expand Up @@ -519,13 +519,13 @@ func daemonSet(config *oTelColConfig) *appsv1.DaemonSet {
Name: "otlp",
Protocol: corev1.ProtocolTCP,
ContainerPort: otlpGrpcPort,
HostPort: int32(otlpGrpcHostPort),
HostPort: int32(OtlpGrpcHostPort),
},
{
Name: "otlp-http",
Protocol: corev1.ProtocolTCP,
ContainerPort: otlpHttpPort,
HostPort: int32(otlpHttpHostPort),
HostPort: int32(OtlpHttpHostPort),
},
},
Env: env,
Expand Down
18 changes: 11 additions & 7 deletions internal/dash0/webhook/dash0_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,17 +196,19 @@ var _ = Describe("The Dash0 webhook", func() {
{
VolumeMounts: 2,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
Dash0CollectorBaseUrlEnvVarIdx: 2,
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
{
VolumeMounts: 3,
Dash0VolumeMountIdx: 2,
EnvVars: 4,
EnvVars: 5,
NodeOptionsEnvVarIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
NodeIpIdx: 3,
Dash0CollectorBaseUrlEnvVarIdx: 4,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
},
Expand All @@ -232,18 +234,20 @@ var _ = Describe("The Dash0 webhook", func() {
{
VolumeMounts: 2,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
NodeOptionsUsesValueFrom: true,
Dash0CollectorBaseUrlEnvVarIdx: 2,
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
{
VolumeMounts: 3,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
NodeOptionsValue: "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require something-else --experimental-modules",
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 0,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
Expand Down
38 changes: 27 additions & 11 deletions internal/dash0/workloads/workload_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources"
"github.com/dash0hq/dash0-operator/internal/dash0/util"
)

Expand All @@ -32,6 +33,7 @@ const (
envVarNodeOptionsName = "NODE_OPTIONS"
envVarNodeOptionsValue = "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry"
envVarDash0CollectorBaseUrlName = "DASH0_OTEL_COLLECTOR_BASE_URL"
envVarDash0NodeIp = "DASH0_NODE_IP"
)

var (
Expand Down Expand Up @@ -239,10 +241,23 @@ func (m *ResourceModifier) addEnvironmentVariables(container *corev1.Container,
// For now, we directly modify NODE_OPTIONS. Consider migrating to an LD_PRELOAD hook at some point.
m.handleNodeOptionsEnvVar(container, perContainerLogger)

m.addOrReplaceEnvironmentVariable(
m.addOrReplaceEnvironmentVariableWithValue(
container,
envVarDash0CollectorBaseUrlName,
m.instrumentationMetadata.OTelCollectorBaseUrl,
corev1.EnvVar{
Name: envVarDash0NodeIp,
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "status.hostIP",
},
},
},
)
m.addOrReplaceEnvironmentVariableWithValue(
container,
corev1.EnvVar{
Name: envVarDash0CollectorBaseUrlName,
Value: fmt.Sprintf("http://$(%s):%d", envVarDash0NodeIp, otelcolresources.OtlpHttpHostPort),
},
)
}

Expand Down Expand Up @@ -284,22 +299,22 @@ func (m *ResourceModifier) handleNodeOptionsEnvVar(
}
}

func (m *ResourceModifier) addOrReplaceEnvironmentVariable(container *corev1.Container, name string, value string) {
func (m *ResourceModifier) addOrReplaceEnvironmentVariableWithValue(container *corev1.Container, envVar corev1.EnvVar) {
if container.Env == nil {
container.Env = make([]corev1.EnvVar, 0)
}
idx := slices.IndexFunc(container.Env, func(c corev1.EnvVar) bool {
return c.Name == name
return c.Name == envVar.Name
})

if idx < 0 {
container.Env = append(container.Env, corev1.EnvVar{
Name: name,
Value: value,
})
} else {
container.Env = append(container.Env, envVar)
} else if envVar.Value != "" {
container.Env[idx].ValueFrom = nil
container.Env[idx].Value = value
container.Env[idx].Value = envVar.Value
} else {
container.Env[idx].Value = ""
container.Env[idx].ValueFrom = envVar.ValueFrom
}
}

Expand Down Expand Up @@ -394,6 +409,7 @@ func (m *ResourceModifier) removeMount(container *corev1.Container) {

func (m *ResourceModifier) removeEnvironmentVariables(container *corev1.Container) {
m.removeNodeOptions(container)
m.removeEnvironmentVariable(container, envVarDash0NodeIp)
m.removeEnvironmentVariable(container, envVarDash0CollectorBaseUrlName)
}

Expand Down
64 changes: 13 additions & 51 deletions internal/dash0/workloads/workload_modifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,19 @@ var _ = Describe("Dash0 Workload Modification", func() {
{
VolumeMounts: 2,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
Dash0CollectorBaseUrlEnvVarIdx: 2,
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
{
VolumeMounts: 3,
Dash0VolumeMountIdx: 2,
EnvVars: 4,
EnvVars: 5,
NodeOptionsEnvVarIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
NodeIpIdx: 3,
Dash0CollectorBaseUrlEnvVarIdx: 4,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
},
Expand All @@ -105,18 +107,20 @@ var _ = Describe("Dash0 Workload Modification", func() {
{
VolumeMounts: 2,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
NodeOptionsUsesValueFrom: true,
Dash0CollectorBaseUrlEnvVarIdx: 2,
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 3,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
{
VolumeMounts: 3,
Dash0VolumeMountIdx: 1,
EnvVars: 3,
EnvVars: 4,
NodeOptionsEnvVarIdx: 1,
NodeOptionsValue: "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require something-else --experimental-modules",
NodeIpIdx: 2,
Dash0CollectorBaseUrlEnvVarIdx: 0,
Dash0CollectorBaseUrlEnvVarExpectedValue: OTelCollectorBaseUrlTest,
},
Expand Down Expand Up @@ -252,50 +256,6 @@ var _ = Describe("Dash0 Workload Modification", func() {
})
})

Describe("when updating instrumentation from 0.5.1 to 0.6.0", func() {
It("should remove the old --require from NODE_OPTIONS (when it is the only content of NODE_OPTIONS)", func() {
workload := InstrumentedDeployment(TestNamespaceName, DeploymentNamePrefix)
Expect(workload.Spec.Template.Spec.Containers[0].Env[0].Name).To(Equal("NODE_OPTIONS"))
workload.Spec.Template.Spec.Containers[0].Env[0].Value = "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0hq/opentelemetry"
hasBeenModified := workloadModifier.ModifyDeployment(workload)
Expect(hasBeenModified).To(BeTrue())
VerifyModifiedDeployment(workload, BasicInstrumentedPodSpecExpectations())
})

It("should remove the old --require from NODE_OPTIONS (when it is at the start of NODE_OPTIONS)", func() {
workload := InstrumentedDeployment(TestNamespaceName, DeploymentNamePrefix)
Expect(workload.Spec.Template.Spec.Containers[0].Env[0].Name).To(Equal("NODE_OPTIONS"))
workload.Spec.Template.Spec.Containers[0].Env[0].Value = "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require /something/else"
hasBeenModified := workloadModifier.ModifyDeployment(workload)
Expect(hasBeenModified).To(BeTrue())
expectations := BasicInstrumentedPodSpecExpectations()
expectations.Containers[0].NodeOptionsValue = "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require /something/else"
VerifyModifiedDeployment(workload, expectations)
})

It("should remove the old --require from NODE_OPTIONS (when it is at the end of NODE_OPTIONS)", func() {
workload := InstrumentedDeployment(TestNamespaceName, DeploymentNamePrefix)
Expect(workload.Spec.Template.Spec.Containers[0].Env[0].Name).To(Equal("NODE_OPTIONS"))
workload.Spec.Template.Spec.Containers[0].Env[0].Value = "--require /something/else --require /opt/dash0/instrumentation/node.js/node_modules/@dash0hq/opentelemetry"
hasBeenModified := workloadModifier.ModifyDeployment(workload)
Expect(hasBeenModified).To(BeTrue())
expectations := BasicInstrumentedPodSpecExpectations()
expectations.Containers[0].NodeOptionsValue = "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require /something/else"
VerifyModifiedDeployment(workload, expectations)
})

It("should remove the old --require from NODE_OPTIONS (when it is in the middle of NODE_OPTIONS)", func() {
workload := InstrumentedDeployment(TestNamespaceName, DeploymentNamePrefix)
Expect(workload.Spec.Template.Spec.Containers[0].Env[0].Name).To(Equal("NODE_OPTIONS"))
workload.Spec.Template.Spec.Containers[0].Env[0].Value = "--require /something/else --require /opt/dash0/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require /another/thing"
hasBeenModified := workloadModifier.ModifyDeployment(workload)
Expect(hasBeenModified).To(BeTrue())
expectations := BasicInstrumentedPodSpecExpectations()
expectations.Containers[0].NodeOptionsValue = "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry --require /something/else --require /another/thing"
VerifyModifiedDeployment(workload, expectations)
})
})

Describe("when reverting workloads", func() {
It("should remove Dash0 from an instrumented cron job", func() {
workload := InstrumentedCronJob(TestNamespaceName, CronJobNamePrefix)
Expand Down Expand Up @@ -337,13 +297,15 @@ var _ = Describe("Dash0 Workload Modification", func() {
Dash0VolumeMountIdx: -1,
EnvVars: 1,
NodeOptionsEnvVarIdx: -1,
NodeIpIdx: -1,
Dash0CollectorBaseUrlEnvVarIdx: -1,
},
{
VolumeMounts: 2,
Dash0VolumeMountIdx: -1,
EnvVars: 2,
NodeOptionsEnvVarIdx: -1,
NodeIpIdx: -1,
Dash0CollectorBaseUrlEnvVarIdx: -1,
},
},
Expand Down
2 changes: 1 addition & 1 deletion test/util/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const (
ConfigurationReloaderImageTest = "some-registry.com:1234/dash0hq/configuration-reloader:10.11.12"
FilelogOffsetSynchImageTest = "some-registry.com:1234/dash0hq/filelog-offset-synch:13.14.15"

OTelCollectorBaseUrlTest = "http://dash0-operator-opentelemetry-collector.dash0-system.svc.cluster.local:4318"
OTelCollectorBaseUrlTest = "http://$(DASH0_NODE_IP):40318"
EndpointTest = "endpoint.dash0.com:4317"
AuthorizationTokenTest = "authorization-token"
SecretRefTest = "secret-ref"
Expand Down
18 changes: 17 additions & 1 deletion test/util/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,10 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1
Name: "NODE_OPTIONS",
ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"}},
},
{
Name: "DASH0_NODE_IP",
ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "wrong.field.ref"}},
},
{
// this ValueFrom will be removed and replaced by a simple Value
Name: "DASH0_OTEL_COLLECTOR_BASE_URL",
Expand Down Expand Up @@ -738,7 +742,11 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1
Value: "--require something-else --experimental-modules",
},
{
Name: "TEST2",
Name: "DASH0_NODE_IP",
Value: "will be replaced by a value from",
},
{
Name: "TEST3",
Value: "value",
},
},
Expand Down Expand Up @@ -817,6 +825,10 @@ func InstrumentedDeploymentWithMoreBellsAndWhistles(namespace string, name strin
Name: "NODE_OPTIONS",
Value: "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry",
},
{
Name: "DASH0_NODE_IP",
ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.hostIP"}},
},
{
Name: "DASH0_OTEL_COLLECTOR_BASE_URL",
Value: OTelCollectorBaseUrlTest,
Expand Down Expand Up @@ -892,6 +904,10 @@ func simulateInstrumentedPodSpec(podSpec *corev1.PodSpec, meta *metav1.ObjectMet
Name: "NODE_OPTIONS",
Value: "--require /__dash0__/instrumentation/node.js/node_modules/@dash0hq/opentelemetry",
},
{
Name: "DASH0_NODE_IP",
ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.hostIP"}},
},
{
Name: "DASH0_OTEL_COLLECTOR_BASE_URL",
Value: OTelCollectorBaseUrlTest,
Expand Down
Loading

0 comments on commit f193ae3

Please sign in to comment.