Skip to content

Commit

Permalink
feat(self-monitoring): add daemonset/deployment uid to collector
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Sep 13, 2024
1 parent 37c1bcb commit cddbfc6
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 29 deletions.
9 changes: 9 additions & 0 deletions images/pkg/common/otel.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ func InitOTelSdk(
log.Println("Env var 'K8S_NODE_NAME' is not set")
}

daemonSetUid := os.Getenv("K8S_DAEMONSET_UID")
deploymentUid := os.Getenv("K8S_DEPLOYMENT_UID")

var doMeterShutdown func(ctx context.Context) error

if _, isSet = os.LookupEnv("OTEL_EXPORTER_OTLP_ENDPOINT"); isSet {
Expand Down Expand Up @@ -70,6 +73,12 @@ func InitOTelSdk(
attributes := make([]attribute.KeyValue, 0, len(extraResourceAttributes)+2)
attributes = append(attributes, semconv.K8SPodUID(podUid))
attributes = append(attributes, semconv.K8SNodeName(nodeName))
if daemonSetUid != "" {
attributes = append(attributes, semconv.K8SDaemonSetUID(daemonSetUid))
}
if deploymentUid != "" {
attributes = append(attributes, semconv.K8SDeploymentUID(deploymentUid))
}
for key, value := range extraResourceAttributes {
attributes = append(attributes, attribute.String(key, value))
}
Expand Down
30 changes: 15 additions & 15 deletions internal/backendconnection/otelcolresources/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -934,51 +934,51 @@ func assembleDeploymentCollectorContainer(
}

func daemonsetServiceAccountName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "sa")
return renderName(namePrefix, openTelemetryCollector, "sa")
}

func deploymentServiceAccountName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "sa")
return renderName(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "sa")
}

func FilelogReceiverOffsetsConfigMapName(namePrefix string) string {
return name(namePrefix, "filelogoffsets", "cm")
return renderName(namePrefix, "filelogoffsets", "cm")
}

func DaemonSetCollectorConfigConfigMapName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDaemonSetNameSuffix, "cm")
return renderName(namePrefix, openTelemetryCollectorDaemonSetNameSuffix, "cm")
}

func DeploymentCollectorConfigConfigMapName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "cm")
return renderName(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "cm")
}

func DaemonSetClusterRoleName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "cr")
return renderName(namePrefix, openTelemetryCollector, "cr")
}

func DeploymentClusterRoleName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "cr")
return renderName(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "cr")
}

func DaemonSetClusterRoleBindingName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "crb")
return renderName(namePrefix, openTelemetryCollector, "crb")
}

func DeploymentClusterRoleBindingName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "crb")
return renderName(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "crb")
}

func roleName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "role")
return renderName(namePrefix, openTelemetryCollector, "role")
}

func roleBindingName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "rolebinding")
return renderName(namePrefix, openTelemetryCollector, "rolebinding")
}

func ServiceName(namePrefix string) string {
return name(namePrefix, openTelemetryCollector, "service")
return renderName(namePrefix, openTelemetryCollector, "service")
}

func serviceLabels() map[string]string {
Expand All @@ -988,14 +988,14 @@ func serviceLabels() map[string]string {
}

func DaemonSetName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDaemonSetNameSuffix, "daemonset")
return renderName(namePrefix, openTelemetryCollectorDaemonSetNameSuffix, "daemonset")
}

func DeploymentName(namePrefix string) string {
return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "deployment")
return renderName(namePrefix, openTelemetryCollectorDeploymentNameSuffix, "deployment")
}

func name(prefix string, parts ...string) string {
func renderName(prefix string, parts ...string) string {
return strings.Join(append([]string{prefix}, parts...), "-")
}

Expand Down
46 changes: 40 additions & 6 deletions internal/backendconnection/otelcolresources/otelcol_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import (
"github.com/cisco-open/k8s-objectmatcher/patch"
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -93,11 +95,11 @@ func (m *OTelColResourceManager) createOrUpdateResource(
desiredResource client.Object,
logger *logr.Logger,
) (bool, bool, error) {
existingObject, err := m.createEmptyReceiverFor(desiredResource)
existingResource, err := m.createEmptyReceiverFor(desiredResource)
if err != nil {
return false, false, err
}
err = m.Client.Get(ctx, client.ObjectKeyFromObject(desiredResource), existingObject)
err = m.Client.Get(ctx, client.ObjectKeyFromObject(desiredResource), existingResource)
if err != nil {
if !apierrors.IsNotFound(err) {
return false, false, err
Expand All @@ -108,8 +110,8 @@ func (m *OTelColResourceManager) createOrUpdateResource(
}
return true, false, nil
} else {
// object needs to be updated
hasChanged, err := m.updateResource(ctx, existingObject, desiredResource, logger)
// object might need to be updated
hasChanged, err := m.updateResource(ctx, existingResource, desiredResource, logger)
if err != nil {
return false, false, err
}
Expand Down Expand Up @@ -156,7 +158,7 @@ func (m *OTelColResourceManager) createResource(

func (m *OTelColResourceManager) updateResource(
ctx context.Context,
existingObject client.Object,
existingResource client.Object,
desiredResource client.Object,
logger *logr.Logger,
) (bool, error) {
Expand All @@ -171,9 +173,14 @@ func (m *OTelColResourceManager) updateResource(
if err := m.setOwnerReference(desiredResource, logger); err != nil {
return false, err
}
// This will change the collector daemonset and collector deployment one more time after it has been created
// initially, by setting their own UID as an environment variable in all containers. Obviously, this cannot be done
// when creating the daemonset/deployment. The next reconcile cycle after creating the resources will set the UID
// environment variable, and modifying the containers will automatically restart them.
m.amendDeploymentAndDaemonSetWithSelfReferenceUIDs(existingResource, desiredResource)

patchResult, err := patch.DefaultPatchMaker.Calculate(
existingObject,
existingResource,
desiredResource,
patch.IgnoreField("kind"),
patch.IgnoreField("apiVersion"),
Expand Down Expand Up @@ -238,6 +245,33 @@ func (m *OTelColResourceManager) setOwnerReference(
return nil
}

func (m *OTelColResourceManager) amendDeploymentAndDaemonSetWithSelfReferenceUIDs(existingResource client.Object, desiredResource client.Object) {
name := desiredResource.GetName()
uid := existingResource.GetUID()
if name == DaemonSetName(m.OTelCollectorNamePrefix) {
daemonset := desiredResource.(*appsv1.DaemonSet)
addSelfReferenceUidToAllContainers(&daemonset.Spec.Template.Spec.Containers, "K8S_DAEMONSET_UID", uid)
} else if name == DeploymentName(m.OTelCollectorNamePrefix) {
deployment := desiredResource.(*appsv1.Deployment)
addSelfReferenceUidToAllContainers(&deployment.Spec.Template.Spec.Containers, "K8S_DEPLOYMENT_UID", uid)
}
}

func addSelfReferenceUidToAllContainers(containers *[]corev1.Container, envVarName string, uid types.UID) {
for i, container := range *containers {
selfReferenceUidIsAlreadyPresent := slices.ContainsFunc(container.Env, func(envVar corev1.EnvVar) bool {
return envVar.Name == envVarName
})
if !selfReferenceUidIsAlreadyPresent {
container.Env = append(container.Env, corev1.EnvVar{
Name: envVarName,
Value: string(uid),
})
(*containers)[i] = container
}
}
}

func (m *OTelColResourceManager) DeleteResources(
ctx context.Context,
namespace string,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package otelcolresources
import (
"context"
"fmt"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -65,8 +66,18 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func()
})

AfterEach(func() {
err := k8sClient.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(Dash0OperatorNamespace))
Expect(err).ToNot(HaveOccurred())
Expect(oTelColResourceManager.DeleteResources(
ctx,
Dash0OperatorNamespace,
TestImages,
dash0MonitoringResource,
selfmonitoring.SelfMonitoringConfiguration{},
&logger,
)).To(Succeed())
Eventually(func(g Gomega) {
VerifyCollectorResourcesDoNotExist(ctx, k8sClient, Dash0OperatorNamespace)
}, 500*time.Millisecond, 20*time.Millisecond).Should(Succeed())
Expect(k8sClient.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(Dash0OperatorNamespace))).To(Succeed())
})

Describe("when dealing with individual resources", func() {
Expand Down Expand Up @@ -273,7 +284,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func()
deployment := GetOTelColDeployment(ctx, k8sClient, Dash0OperatorNamespace)
Expect(k8sClient.Delete(ctx, deployment)).To(Succeed())

resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err :=
resourcesHaveBeenCreated, _, err :=
oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources(
ctx,
Dash0OperatorNamespace,
Expand All @@ -284,15 +295,14 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func()
)
Expect(err).ToNot(HaveOccurred())
Expect(resourcesHaveBeenCreated).To(BeTrue())
Expect(resourcesHaveBeenUpdated).To(BeFalse())

VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace)
})
})

Describe("when all OpenTelemetry collector resources are up to date", func() {
It("should report that nothing has changed", func() {
// create resources (so we are sure that everything is in the desired state)
// create resources
_, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources(
ctx,
Dash0OperatorNamespace,
Expand All @@ -303,9 +313,24 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func()
)
Expect(err).ToNot(HaveOccurred())

// Now run another create/update, to make sure resourcesHaveBeenCreated/resourcesHaveBeenUpdated come back
// as false.
resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err :=
// The next run will still report that resources have been updated, since it adds K8S_DAEMONSET_UID and
// K8S_DEPLOYMENT_UID to the daemon set and deployment respectively (this cannot be done when creating the
// resources).
resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources(
ctx,
Dash0OperatorNamespace,
TestImages,
dash0MonitoringResource,
selfmonitoring.SelfMonitoringConfiguration{},
&logger,
)
Expect(err).ToNot(HaveOccurred())
Expect(resourcesHaveBeenCreated).To(BeFalse())
Expect(resourcesHaveBeenUpdated).To(BeTrue())

// Now run a final create/update, to make sure resourcesHaveBeenCreated/resourcesHaveBeenUpdated come back
// as false and all resources are in their final desired state.
resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err =
oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources(
ctx,
Dash0OperatorNamespace,
Expand Down
1 change: 1 addition & 0 deletions test/util/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/dash0hq/dash0-operator/internal/dash0/util"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
Expand Down

0 comments on commit cddbfc6

Please sign in to comment.