From 0a3e04ed4336ffb5404c72f7f14d3071da0a36bc Mon Sep 17 00:00:00 2001 From: Michele Mancioppi Date: Thu, 22 Aug 2024 11:29:17 +0200 Subject: [PATCH] feat(self-monitoring): add support for self-monitoring This commit lays the foundation for self-monitoring: 1. Add a cluster-scoped Dash0OperatorConfiguration CRD, which contains a default endpoint and authorization setting, which will be used for self-monitoring. 2. When self-monitoring is enabled in the Dash0OperatorConfiguration resource, the controller deployment and the collector daemonset are extended with environment variables like OTEL_EXPORTER_OTLP_ENDPOINT etc., based on the operator configuration resource export settings. 3. On removal of the Dash0OperatorConfiguration resource, the same environment variables are removed from the controller deployment and collector daemonset. 4. Add metrics and tracing to the filelogoffsetsynch container via the Go OTel SDK. Other collector containers and the operator controller process will be extended with the Go OTel SDK in a later iteration. 5. Add a new end-to-end test for self-monitoring telemetry. --- .sonarcloud.properties | 7 + Makefile | 31 +- PROJECT | 9 + api/dash0monitoring/resource_interface.go | 34 + .../v1alpha1/dash0monitoring_types.go | 223 ++--- .../v1alpha1/operator_configuration_types.go | 213 +++++ api/dash0monitoring/v1alpha1/types_common.go | 135 +++ .../v1alpha1/zz_generated.deepcopy.go | 117 +++ cmd/main.go | 231 +++-- .../operator.dash0.com_dash0monitorings.yaml | 5 +- ...dash0.com_dash0operatorconfigurations.yaml | 263 +++++ config/rbac/dash0_viewer_role.yaml | 4 +- config/rbac/role.yaml | 27 + go.mod | 2 +- go.sum | 4 +- .../templates/operator/cluster-roles.yaml | 32 + ...ustom-resource-definition-monitoring.yaml} | 8 +- ...rce-definition-operator-configuration.yaml | 254 +++++ .../operator/deployment-and-webhook.yaml | 6 +- .../__snapshot__/cluster-roles_test.yaml.snap | 27 + ...urce-definition-monitoring_test.yaml.snap} | 5 +- ...tion-operator-configuration_test.yaml.snap | 239 +++++ .../deployment-and-webhook_test.yaml.snap | 4 +- ...-resource-definition-monitoring_test.yaml} | 2 +- ...efinition-operator-configuration_test.yaml | 7 + .../operator/deployment-and-webhook_test.yaml | 10 +- images/configreloader/Dockerfile | 4 + images/filelogoffsetsynch/Dockerfile | 4 + .../src/filelogoffsetsynch.go | 235 ++++- images/filelogoffsetsynch/src/go.mod | 17 + images/filelogoffsetsynch/src/go.sum | 53 ++ .../backendconnection_manager.go | 13 +- .../backendconnection_manager_test.go | 53 +- .../otelcolresources/collector_config_map.go | 24 +- .../collector_config_map_test.go | 65 +- .../otelcolresources/desired_state.go | 153 +-- .../otelcolresources/desired_state_test.go | 139 ++- .../otelcolresources/otelcol_resources.go | 25 +- .../otelcol_resources_test.go | 7 + ...suite_test.go => controller_suite_test.go} | 7 + internal/dash0/controller/dash0_controller.go | 98 +- .../dash0/controller/dash0_controller_test.go | 31 +- .../operator_configuration_controller.go | 218 +++++ .../operator_configuration_controller_test.go | 900 ++++++++++++++++++ .../dash0/instrumentation/instrumenter.go | 9 +- .../instrumentation/instrumenter_test.go | 2 +- internal/dash0/removal/removal_suite_test.go | 13 +- .../dash0/selfmonitoring/self_monitoring.go | 649 +++++++++++++ internal/dash0/util/constants.go | 10 + internal/dash0/util/controller.go | 363 ++++--- internal/dash0/util/types.go | 22 +- .../webhook/attach_dangling_events_test.go | 14 +- internal/dash0/webhook/dash0_webhook_test.go | 2 +- test-resources/bin/render-templates.sh | 1 + test-resources/bin/test-cleanup.sh | 1 + .../bin/test-roundtrip-01-aum-operator-cr.sh | 7 +- .../bin/test-roundtrip-02-operator-cr-aum.sh | 10 +- .../dash0operatorconfiguration/.gitignore | 1 + ...0operatorconfiguration.token.yaml.template | 12 + test-resources/node.js/express/Dockerfile | 2 +- .../dash0_operator_configuration_resource.go | 88 ++ ...sh0operatorconfiguration.e2e.yaml.template | 10 + test/e2e/e2e_test.go | 29 + test/e2e/spans.go | 107 ++- test/e2e/verify_instrumentation.go | 6 +- test/util/constants.go | 85 +- test/util/dash0_monitoring_resource.go | 6 +- test/util/operator_resource.go | 261 +++++ test/util/verification.go | 22 +- 69 files changed, 4925 insertions(+), 752 deletions(-) create mode 100644 api/dash0monitoring/resource_interface.go create mode 100644 api/dash0monitoring/v1alpha1/operator_configuration_types.go create mode 100644 api/dash0monitoring/v1alpha1/types_common.go create mode 100644 config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml rename helm-chart/dash0-operator/templates/operator/{custom-resource-definition-dash0.yaml => custom-resource-definition-monitoring.yaml} (99%) create mode 100644 helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml rename helm-chart/dash0-operator/tests/operator/__snapshot__/{custom-resource-definition-dash0_test.yaml.snap => custom-resource-definition-monitoring_test.yaml.snap} (99%) create mode 100644 helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap rename helm-chart/dash0-operator/tests/operator/{custom-resource-definition-dash0_test.yaml => custom-resource-definition-monitoring_test.yaml} (73%) create mode 100644 helm-chart/dash0-operator/tests/operator/custom-resource-definition-operator-configuration_test.yaml rename internal/dash0/controller/{dash0_controller_suite_test.go => controller_suite_test.go} (94%) create mode 100644 internal/dash0/controller/operator_configuration_controller.go create mode 100644 internal/dash0/controller/operator_configuration_controller_test.go create mode 100644 internal/dash0/selfmonitoring/self_monitoring.go create mode 100644 internal/dash0/util/constants.go create mode 100644 test-resources/customresources/dash0operatorconfiguration/.gitignore create mode 100644 test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template create mode 100644 test/e2e/dash0_operator_configuration_resource.go create mode 100644 test/e2e/dash0operatorconfiguration.e2e.yaml.template create mode 100644 test/util/operator_resource.go diff --git a/.sonarcloud.properties b/.sonarcloud.properties index ffff1464..8083af3c 100644 --- a/.sonarcloud.properties +++ b/.sonarcloud.properties @@ -7,3 +7,10 @@ sonar.projectKey=dash0hq_dash0-operator sonar.sources=. sonar.tests=test sonar.test.inclusions=**/*_test.go,test/** + +# enables disabling Sonar rules +sonar.issue.ignore.multicriteria=e1 + +# Do not report the keyword TODO as an issue. +sonar.issue.ignore.multicriteria.e1.ruleKey=go:S1135 +sonar.issue.ignore.multicriteria.e1.resourceKey=**/*.go diff --git a/Makefile b/Makefile index 9318d0c9..bdcf1153 100644 --- a/Makefile +++ b/Makefile @@ -163,6 +163,7 @@ golangci-lint: .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint + @echo -------------------------------- $(GOLANGCI_LINT) run helm lint helm-chart/dash0-operator --set operator.disableSecretCheck=true --set operator.disableOtlpEndpointCheck=true @@ -192,38 +193,40 @@ docker-build: \ docker-build-filelog-offset-synch ## Build all container images. define build_container_image -$(eval $@IMAGE_REPOSITORY = $(1)) +$(eval $@_IMAGE_REPOSITORY = $(1)) $(eval $@_IMAGE_TAG = $(2)) -if [[ -n "${$@IMAGE_REPOSITORY}" ]]; then \ - if [[ "${$@IMAGE_REPOSITORY}" = *"/"* ]]; then \ - echo "not rebuilding the image ${$@IMAGE_REPOSITORY}, this looks like a remote image"; \ - else \ - $(CONTAINER_TOOL) build -t ${$@IMAGE_REPOSITORY}:${$@_IMAGE_TAG} .; \ - fi; \ -elif [[ -n "${OPERATOR_HELM_CHART_URL}" ]]; then \ - echo "not rebuilding image, a remote Helm chart is used with the default image from the chart"; \ +$(eval $@_CONTEXT = $(3)) +if [[ -n "$($@_IMAGE_REPOSITORY)" ]]; then \ + if [[ "$($@_IMAGE_REPOSITORY)" = *"/"* ]]; then \ + echo "not rebuilding the image $($@_IMAGE_REPOSITORY), this looks like a remote image"; \ + else \ + echo $(CONTAINER_TOOL) build -t $($@_IMAGE_REPOSITORY):$($@_IMAGE_TAG) $($@_CONTEXT); \ + $(CONTAINER_TOOL) build -t $($@_IMAGE_REPOSITORY):$($@_IMAGE_TAG) $($@_CONTEXT); \ + fi; \ +elif [[ -n "$(OPERATOR_HELM_CHART_URL)" ]]; then \ + echo "not rebuilding image, a remote Helm chart is used with the default image from the chart"; \ fi endef .PHONY: docker-build-controller docker-build-controller: ## Build the manager container image. - @$(call build_container_image,$(CONTROLLER_IMG_REPOSITORY),$(CONTROLLER_IMG_TAG)) + @$(call build_container_image,$(CONTROLLER_IMG_REPOSITORY),$(CONTROLLER_IMG_TAG),".") .PHONY: docker-build-instrumentation docker-build-instrumentation: ## Build the instrumentation image. - @$(call build_container_image,$(INSTRUMENTATION_IMG_REPOSITORY),$(INSTRUMENTATION_IMG_TAG)) + @$(call build_container_image,$(INSTRUMENTATION_IMG_REPOSITORY),$(INSTRUMENTATION_IMG_TAG),images/instrumentation) .PHONY: docker-build-collector docker-build-collector: ## Build the OpenTelemetry collector container image. - @$(call build_container_image,$(COLLECTOR_IMG_REPOSITORY),$(COLLECTOR_IMG_TAG)) + @$(call build_container_image,$(COLLECTOR_IMG_REPOSITORY),$(COLLECTOR_IMG_TAG),images/collector) .PHONY: docker-build-config-reloader docker-build-config-reloader: ## Build the config reloader container image. - @$(call build_container_image,$(CONFIGURATION_RELOADER_IMG_REPOSITORY),$(CONFIGURATION_RELOADER_IMG_TAG)) + @$(call build_container_image,$(CONFIGURATION_RELOADER_IMG_REPOSITORY),$(CONFIGURATION_RELOADER_IMG_TAG),images/configreloader) .PHONY: docker-build-filelog-offset-synch docker-build-filelog-offset-synch: ## Build the filelog offset synch container image. - @$(call build_container_image,$(FILELOG_OFFSET_SYNCH_IMG_REPOSITORY),$(FILELOG_OFFSET_SYNCH_IMG_TAG)) + @$(call build_container_image,$(FILELOG_OFFSET_SYNCH_IMG_REPOSITORY),$(FILELOG_OFFSET_SYNCH_IMG_TAG),images/filelogoffsetsynch) ifndef ignore-not-found ignore-not-found = false diff --git a/PROJECT b/PROJECT index 5cfd8b59..7556bb85 100644 --- a/PROJECT +++ b/PROJECT @@ -24,4 +24,13 @@ resources: webhooks: defaulting: true webhookVersion: v1 +- api: + crdVersion: v1 + namespaced: false + controller: true + domain: dash0.com + group: operator + kind: Dash0OperatorConfiguration + path: github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1 + version: v1alpha1 version: "3" diff --git a/api/dash0monitoring/resource_interface.go b/api/dash0monitoring/resource_interface.go new file mode 100644 index 00000000..19d7f60a --- /dev/null +++ b/api/dash0monitoring/resource_interface.go @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package dash0monitoring + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Dash0Resource interface { + GetResourceTypeName() string + GetNaturalLanguageResourceTypeName() string + Get() client.Object + GetName() string + GetUid() types.UID + GetCreationTimestamp() metav1.Time + GetReceiver() client.Object + GetListReceiver() client.ObjectList + IsClusterResource() bool + RequestToName(ctrl.Request) string + + IsAvailable() bool + SetAvailableConditionToUnknown() + EnsureResourceIsMarkedAsAvailable() + EnsureResourceIsMarkedAsDegraded(string, string) + EnsureResourceIsMarkedAsAboutToBeDeleted() + IsMarkedForDeletion() bool + + Items(client.ObjectList) []client.Object + At(client.ObjectList, int) Dash0Resource +} diff --git a/api/dash0monitoring/v1alpha1/dash0monitoring_types.go b/api/dash0monitoring/v1alpha1/dash0monitoring_types.go index e6c54010..2607175b 100644 --- a/api/dash0monitoring/v1alpha1/dash0monitoring_types.go +++ b/api/dash0monitoring/v1alpha1/dash0monitoring_types.go @@ -4,14 +4,20 @@ package v1alpha1 import ( + "fmt" "slices" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0common "github.com/dash0hq/dash0-operator/api/dash0monitoring" ) const ( - FinalizerId = "operator.dash0.com/dash0-monitoring-finalizer" + MonitoringFinalizerId = "operator.dash0.com/dash0-monitoring-finalizer" ) // Dash0MonitoringSpec describes the details of monitoring a single Kubernetes namespace with Dash0 and sending @@ -60,131 +66,6 @@ type Dash0MonitoringSpec struct { InstrumentWorkloads InstrumentWorkloadsMode `json:"instrumentWorkloads,omitempty"` } -// Export describes the observability backend to which telemetry data will be sent. This can either be Dash0 or another -// OTLP-compatible backend. You can also combine up to three exporters (i.e. Dash0 plus gRPC plus HTTP). This allows -// sending the same data to two or three targets simultaneously. At least one exporter has to be defined. -// -// +kubebuilder:validation:MinProperties=1 -// +kubebuilder:validation:MaxProperties=3 -type Export struct { - // The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. - // - // +kubebuilder:validation:Optional - Dash0 *Dash0Configuration `json:"dash0,omitempty"` - - // The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via HTTP. - // - // +kubebuilder:validation:Optional - Http *HttpConfiguration `json:"http,omitempty"` - - // The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via gRPC. - // - // +kubebuilder:validation:Optional - Grpc *GrpcConfiguration `json:"grpc,omitempty"` -} - -// Dash0Configuration describes to which Dash0 ingress endpoint telemetry data will be sent. -type Dash0Configuration struct { - // The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value - // needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - // https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - // `dash0.com:4317`. - // - // +kubebuilder:validation:Required - Endpoint string `json:"endpoint"` - - // The name of the Dash0 dataset to which telemetry data will be sent. This property is optional. If omitted, the - // dataset "default" will be used. - // - // +kubebuilder:default=default - Dataset string `json:"dataset,omitempty"` - - // Mandatory authorization settings for sending data to Dash0. - // - // +kubebuilder:validation:Required - Authorization Authorization `json:"authorization"` -} - -// Authorization contains the authorization settings for Dash0. -// -// +kubebuilder:validation:MinProperties=1 -// +kubebuilder:validation:MaxProperties=1 -type Authorization struct { - // The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has - // to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - // token for your Dash0 organization can be copied from https://app.dash0.com/settings. - // - // +kubebuilder:validation:Optional - Token *string `json:"token"` // either token or secret ref, with token taking precedence - - // A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is - // ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - // https://app.dash0.com/settings. - // - // +kubebuilder:validation:Optional - SecretRef *SecretRef `json:"secretRef"` -} - -type SecretRef struct { - // The name of the secret containing the Dash0 authorization token. Defaults to "dash0-authorization-secret". - // +kubebuilder:default=dash0-authorization-secret - Name string `json:"name"` - - // The key of the value which contains the Dash0 authorization token. Defaults to "token" - // +kubebuilder:default=token - Key string `json:"key"` -} - -// HttpConfiguration describe the settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver -// via HTTP. -type HttpConfiguration struct { - // The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. - // - // +kubebuilder:validation:Required - Endpoint string `json:"endpoint"` - - // Additional headers to be sent with each HTTP request, for example for authorization. This property is optional. - // - // +kubebuilder:validation:Optional - Headers []Header `json:"headers,omitempty"` - - // The encoding of the OTLP data when sent via HTTP. Can be either proto or json, defaults to proto. - // - // +kubebuilder:default=proto - Encoding OtlpEncoding `json:"encoding,omitempty"` -} - -// GrpcConfiguration descibe the settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver -// via gRPC. -type GrpcConfiguration struct { - // The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. - // - // +kubebuilder:validation:Required - Endpoint string `json:"endpoint"` - - // Additional headers to be sent with each gRPC request, for example for authorization. This property is optional. - // - // +kubebuilder:validation:Optional - Headers []Header `json:"headers,omitempty"` -} - -// OtlpEncoding describes the encoding of the OTLP data when sent via HTTP. -// -// +kubebuilder:validation:Enum=proto;json -type OtlpEncoding string - -const ( - Proto OtlpEncoding = "proto" - Json OtlpEncoding = "json" -) - -type Header struct { - // +kubebuilder:validation:Required - Name string `json:"name"` - // +kubebuilder:validation:Required - Value string `json:"value"` -} - // InstrumentWorkloadsMode describes when exactly workloads will be instrumented. Only one of the following modes // may be specified. If none of the following policies is specified, the default one is All. See // Dash0MonitoringSpec#InstrumentWorkloads for more details. @@ -205,25 +86,7 @@ const ( var allInstrumentWorkloadsMode = []InstrumentWorkloadsMode{All, CreatedAndUpdated, None} -func ReadBooleanOptOutSetting(setting *bool) bool { - return readOptionalBooleanWithDefault(setting, true) -} - -func readOptionalBooleanWithDefault(setting *bool, defaultValue bool) bool { - if setting == nil { - return defaultValue - } - return *setting -} - -type ConditionType string - -const ( - ConditionTypeAvailable ConditionType = "Available" - ConditionTypeDegraded ConditionType = "Degraded" -) - -// Dash0MonitoringStatus defines the observed state of the Dash0 monitoring resource. +// Dash0MonitoringStatus defines the observed state of the Dash0Monitoring monitoring resource. type Dash0MonitoringStatus struct { Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"` @@ -232,10 +95,11 @@ type Dash0MonitoringStatus struct { PreviousInstrumentWorkloads InstrumentWorkloadsMode `json:"previousInstrumentWorkloads,omitempty"` } -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status - -// Dash0Monitoring is the Schema for the Dash0Monitoring API +// Dash0Monitoring is the schema for the Dash0Monitoring API +// +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +groupName=operator.dash0.com type Dash0Monitoring struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -284,7 +148,7 @@ func (d *Dash0Monitoring) SetAvailableConditionToUnknown() { Type: string(ConditionTypeAvailable), Status: metav1.ConditionUnknown, Reason: "ReconcileStarted", - Message: "Dash0 has started resource reconciliation.", + Message: "Dash0 has started monitoring resource reconciliation for this namespace.", }) meta.SetStatusCondition( &d.Status.Conditions, @@ -292,7 +156,7 @@ func (d *Dash0Monitoring) SetAvailableConditionToUnknown() { Type: string(ConditionTypeDegraded), Status: metav1.ConditionTrue, Reason: "ReconcileStarted", - Message: "Dash0 is still starting.", + Message: "Dash0 monitoring resource reconciliation is in progress.", }) } @@ -306,7 +170,7 @@ func (d *Dash0Monitoring) EnsureResourceIsMarkedAsAvailable() { Type: string(ConditionTypeAvailable), Status: metav1.ConditionTrue, Reason: "ReconcileFinished", - Message: "Dash0 is active in this namespace now.", + Message: "Dash0 monitoring is active in this namespace now.", }) meta.RemoveStatusCondition(&d.Status.Conditions, string(ConditionTypeDegraded)) } @@ -314,7 +178,7 @@ func (d *Dash0Monitoring) EnsureResourceIsMarkedAsAvailable() { func (d *Dash0Monitoring) EnsureResourceIsMarkedAsAboutToBeDeleted() { d.EnsureResourceIsMarkedAsDegraded( "Dash0MonitoringResourceHasBeenRemoved", - "Dash0 is inactive in this namespace now.", + "Dash0 monitoring is inactive in this namespace now.", ) } @@ -343,6 +207,59 @@ func (d *Dash0Monitoring) EnsureResourceIsMarkedAsDegraded( }) } +func (d *Dash0Monitoring) GetResourceTypeName() string { + return "Dash0Monitoring" +} + +func (d *Dash0Monitoring) GetNaturalLanguageResourceTypeName() string { + return "Dash0 monitoring resource" +} + +func (d *Dash0Monitoring) Get() client.Object { + return d +} + +func (d *Dash0Monitoring) GetName() string { + return d.Name +} + +func (d *Dash0Monitoring) GetUid() types.UID { + return d.UID +} + +func (d *Dash0Monitoring) GetCreationTimestamp() metav1.Time { + return d.CreationTimestamp +} + +func (d *Dash0Monitoring) GetReceiver() client.Object { + return &Dash0Monitoring{} +} + +func (d *Dash0Monitoring) GetListReceiver() client.ObjectList { + return &Dash0MonitoringList{} +} + +func (d *Dash0Monitoring) IsClusterResource() bool { + return false +} + +func (d *Dash0Monitoring) RequestToName(ctrl.Request) string { + return fmt.Sprintf("%s/%s", d.Namespace, d.Name) +} + +func (d *Dash0Monitoring) Items(list client.ObjectList) []client.Object { + items := list.(*Dash0MonitoringList).Items + result := make([]client.Object, len(items)) + for i := range items { + result[i] = &items[i] + } + return result +} + +func (d *Dash0Monitoring) At(list client.ObjectList, index int) dash0common.Dash0Resource { + return &list.(*Dash0MonitoringList).Items[index] +} + //+kubebuilder:object:root=true // Dash0MonitoringList contains a list of Dash0Monitoring resources. diff --git a/api/dash0monitoring/v1alpha1/operator_configuration_types.go b/api/dash0monitoring/v1alpha1/operator_configuration_types.go new file mode 100644 index 00000000..663fad1e --- /dev/null +++ b/api/dash0monitoring/v1alpha1/operator_configuration_types.go @@ -0,0 +1,213 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0common "github.com/dash0hq/dash0-operator/api/dash0monitoring" +) + +// Dash0OperatorConfigurationSpec describes cluster-wide configuration settings for the Dash0 Kubernetes operator. +type Dash0OperatorConfigurationSpec struct { + // The configuration of the default observability backend to which telemetry data will be sent by the operator, as + // well as the backend that will receive the operator's self-monitoring data. This property is mandatory. + // This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. + // Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least + // one exporter has to be defined. + // + // Please note that self-monitoring data is only sent to one backend, with Dash0 taking precedence over gRPC and + // HTTP, and gRPC taking precedence over HTTP if multiple exports are defined. Furthermore, HTTP export with JSON + // encoding is not supported for self-monitoring telemetry. + // + // +kubebuilder:validation:Required + Export *Export `json:"export,omitempty"` + + // Global opt-out for self-monitoring for this operator + // +kubebuilder:default={enabled: true} + SelfMonitoring SelfMonitoring `json:"selfMonitoring,omitempty"` +} + +// SelfMonitoring describes how the operator will report telemetry about its working to the backend. +type SelfMonitoring struct { + // +kubebuilder:default=true + Enabled bool `json:"enabled"` +} + +// Dash0OperatorConfigurationStatus defines the observed state of the Dash0 operator configuration resource. +type Dash0OperatorConfigurationStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"` +} + +// Dash0OperatorConfiguration is the schema for the Dash0OperatorConfiguration API +// +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Cluster +// +kubebuilder:subresource:status +// +groupName=operator.dash0.com +type Dash0OperatorConfiguration struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec Dash0OperatorConfigurationSpec `json:"spec,omitempty"` + Status Dash0OperatorConfigurationStatus `json:"status,omitempty"` +} + +func (d *Dash0OperatorConfiguration) IsMarkedForDeletion() bool { + deletionTimestamp := d.GetDeletionTimestamp() + return deletionTimestamp != nil && !deletionTimestamp.IsZero() +} + +func (d *Dash0OperatorConfiguration) IsAvailable() bool { + if condition := d.getCondition(ConditionTypeAvailable); condition != nil { + return condition.Status == metav1.ConditionTrue + } + return false +} + +func (d *Dash0OperatorConfiguration) getCondition(conditionType ConditionType) *metav1.Condition { + for _, c := range d.Status.Conditions { + if c.Type == string(conditionType) { + return &c + + } + } + return nil +} + +func (d *Dash0OperatorConfiguration) SetAvailableConditionToUnknown() { + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(ConditionTypeAvailable), + Status: metav1.ConditionUnknown, + Reason: "ReconcileStarted", + Message: "Dash0 has started resource reconciliation for the cluster-wide operator configuration.", + }) + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(ConditionTypeDegraded), + Status: metav1.ConditionTrue, + Reason: "ReconcileStarted", + Message: "Dash0 operator configuration resource reconciliation is in progress.", + }) +} + +func (d *Dash0OperatorConfiguration) EnsureResourceIsMarkedAsAvailable() { + // If the available status is already true, the status condition is not updated, except for Reason, Message and + // ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is + // effectively idempotent. + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(ConditionTypeAvailable), + Status: metav1.ConditionTrue, + Reason: "ReconcileFinished", + Message: "Dash0 operator configuration is available in this cluster now.", + }) + meta.RemoveStatusCondition(&d.Status.Conditions, string(ConditionTypeDegraded)) +} + +func (d *Dash0OperatorConfiguration) EnsureResourceIsMarkedAsAboutToBeDeleted() { + d.EnsureResourceIsMarkedAsDegraded( + "Dash0OperatorConfigurationResourceHasBeenRemoved", + "Dash0 operator configuration is inactive in this cluster now.", + ) +} + +func (d *Dash0OperatorConfiguration) EnsureResourceIsMarkedAsDegraded( + reason string, + message string, +) { + // If the available status is already false, the status condition is not updated, except for Reason, Message and + // ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is + // effectively idempotent. + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(ConditionTypeAvailable), + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + }) + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(ConditionTypeDegraded), + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) +} + +func (d *Dash0OperatorConfiguration) GetResourceTypeName() string { + return "Dash0OperatorConfiguration" +} + +func (d *Dash0OperatorConfiguration) GetNaturalLanguageResourceTypeName() string { + return "Dash0 operator configuration resource" +} + +func (d *Dash0OperatorConfiguration) Get() client.Object { + return d +} + +func (d *Dash0OperatorConfiguration) GetName() string { + return d.Name +} + +func (d *Dash0OperatorConfiguration) GetUid() types.UID { + return d.UID +} + +func (d *Dash0OperatorConfiguration) GetCreationTimestamp() metav1.Time { + return d.CreationTimestamp +} + +func (d *Dash0OperatorConfiguration) GetReceiver() client.Object { + return &Dash0OperatorConfiguration{} +} + +func (d *Dash0OperatorConfiguration) GetListReceiver() client.ObjectList { + return &Dash0OperatorConfigurationList{} +} + +func (d *Dash0OperatorConfiguration) IsClusterResource() bool { + return true +} + +func (d *Dash0OperatorConfiguration) RequestToName(ctrl.Request) string { + return d.Name +} + +func (d *Dash0OperatorConfiguration) Items(list client.ObjectList) []client.Object { + items := list.(*Dash0OperatorConfigurationList).Items + result := make([]client.Object, len(items)) + for i := range items { + result[i] = &items[i] + } + return result +} + +func (d *Dash0OperatorConfiguration) At(list client.ObjectList, index int) dash0common.Dash0Resource { + return &list.(*Dash0OperatorConfigurationList).Items[index] +} + +//+kubebuilder:object:root=true + +// Dash0OperatorConfigurationList contains a list of Dash0OperatorConfiguration resources. +type Dash0OperatorConfigurationList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Dash0OperatorConfiguration `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Dash0OperatorConfiguration{}, &Dash0OperatorConfigurationList{}) +} diff --git a/api/dash0monitoring/v1alpha1/types_common.go b/api/dash0monitoring/v1alpha1/types_common.go new file mode 100644 index 00000000..1cb8d535 --- /dev/null +++ b/api/dash0monitoring/v1alpha1/types_common.go @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +type ConditionType string + +const ( + ConditionTypeAvailable ConditionType = "Available" + ConditionTypeDegraded ConditionType = "Degraded" +) + +// Export describes the observability backend to which telemetry data will be sent. This can either be Dash0 or another +// OTLP-compatible backend. You can also combine up to three exporters (i.e. Dash0 plus gRPC plus HTTP). This allows +// sending the same data to two or three targets simultaneously. At least one exporter has to be defined. +// +// +kubebuilder:validation:MinProperties=1 +type Export struct { + // The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. + // + // +kubebuilder:validation:Optional + Dash0 *Dash0Configuration `json:"dash0,omitempty"` + + // The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via HTTP. + // + // +kubebuilder:validation:Optional + Http *HttpConfiguration `json:"http,omitempty"` + + // The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via gRPC. + // + // +kubebuilder:validation:Optional + Grpc *GrpcConfiguration `json:"grpc,omitempty"` +} + +// Dash0Configuration describes to which Dash0 ingress endpoint telemetry data will be sent. +type Dash0Configuration struct { + // The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value + // needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom + // https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in + // `dash0.com:4317`. + // + // +kubebuilder:validation:Required + Endpoint string `json:"endpoint"` + + // The name of the Dash0 dataset to which telemetry data will be sent. This property is optional. If omitted, the + // dataset "default" will be used. + // + // +kubebuilder:default=default + Dataset string `json:"dataset,omitempty"` + + // Mandatory authorization settings for sending data to Dash0. + // + // +kubebuilder:validation:Required + Authorization Authorization `json:"authorization"` +} + +// Authorization contains the authorization settings for Dash0. +// +// +kubebuilder:validation:MinProperties=1 +// +kubebuilder:validation:MaxProperties=1 +type Authorization struct { + // The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has + // to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization + // token for your Dash0 organization can be copied from https://app.dash0.com/settings. + // + // +kubebuilder:validation:Optional + Token *string `json:"token"` // either token or secret ref, with token taking precedence + + // A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + // ignored if the token property is set. The authorization token for your Dash0 organization can be copied from + // https://app.dash0.com/settings. + // + // +kubebuilder:validation:Optional + SecretRef *SecretRef `json:"secretRef"` +} + +type SecretRef struct { + // The name of the secret containing the Dash0 authorization token. Defaults to "dash0-authorization-secret". + // +kubebuilder:default=dash0-authorization-secret + Name string `json:"name"` + + // The key of the value which contains the Dash0 authorization token. Defaults to "token" + // +kubebuilder:default=token + Key string `json:"key"` +} + +// HttpConfiguration describe the settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver +// via HTTP. +type HttpConfiguration struct { + // The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. + // + // +kubebuilder:validation:Required + Endpoint string `json:"endpoint"` + + // Additional headers to be sent with each HTTP request, for example for authorization. This property is optional. + // + // +kubebuilder:validation:Optional + Headers []Header `json:"headers,omitempty"` + + // The encoding of the OTLP data when sent via HTTP. Can be either proto or json, defaults to proto. + // + // +kubebuilder:default=proto + Encoding OtlpEncoding `json:"encoding,omitempty"` +} + +// GrpcConfiguration descibe the settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver +// via gRPC. +type GrpcConfiguration struct { + // The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. + // + // +kubebuilder:validation:Required + Endpoint string `json:"endpoint"` + + // Additional headers to be sent with each gRPC request, for example for authorization. This property is optional. + // + // +kubebuilder:validation:Optional + Headers []Header `json:"headers,omitempty"` +} + +// OtlpEncoding describes the encoding of the OTLP data when sent via HTTP. +// +// +kubebuilder:validation:Enum=proto;json +type OtlpEncoding string + +const ( + Proto OtlpEncoding = "proto" + Json OtlpEncoding = "json" +) + +type Header struct { + // +kubebuilder:validation:Required + Name string `json:"name"` + // +kubebuilder:validation:Required + Value string `json:"value"` +} diff --git a/api/dash0monitoring/v1alpha1/zz_generated.deepcopy.go b/api/dash0monitoring/v1alpha1/zz_generated.deepcopy.go index 1b00855b..ab6a42c5 100644 --- a/api/dash0monitoring/v1alpha1/zz_generated.deepcopy.go +++ b/api/dash0monitoring/v1alpha1/zz_generated.deepcopy.go @@ -150,6 +150,108 @@ func (in *Dash0MonitoringStatus) DeepCopy() *Dash0MonitoringStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Dash0OperatorConfiguration) DeepCopyInto(out *Dash0OperatorConfiguration) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dash0OperatorConfiguration. +func (in *Dash0OperatorConfiguration) DeepCopy() *Dash0OperatorConfiguration { + if in == nil { + return nil + } + out := new(Dash0OperatorConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Dash0OperatorConfiguration) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Dash0OperatorConfigurationList) DeepCopyInto(out *Dash0OperatorConfigurationList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Dash0OperatorConfiguration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dash0OperatorConfigurationList. +func (in *Dash0OperatorConfigurationList) DeepCopy() *Dash0OperatorConfigurationList { + if in == nil { + return nil + } + out := new(Dash0OperatorConfigurationList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Dash0OperatorConfigurationList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Dash0OperatorConfigurationSpec) DeepCopyInto(out *Dash0OperatorConfigurationSpec) { + *out = *in + if in.Export != nil { + in, out := &in.Export, &out.Export + *out = new(Export) + (*in).DeepCopyInto(*out) + } + out.SelfMonitoring = in.SelfMonitoring +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dash0OperatorConfigurationSpec. +func (in *Dash0OperatorConfigurationSpec) DeepCopy() *Dash0OperatorConfigurationSpec { + if in == nil { + return nil + } + out := new(Dash0OperatorConfigurationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Dash0OperatorConfigurationStatus) DeepCopyInto(out *Dash0OperatorConfigurationStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dash0OperatorConfigurationStatus. +func (in *Dash0OperatorConfigurationStatus) DeepCopy() *Dash0OperatorConfigurationStatus { + if in == nil { + return nil + } + out := new(Dash0OperatorConfigurationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Export) DeepCopyInto(out *Export) { *out = *in @@ -249,3 +351,18 @@ func (in *SecretRef) DeepCopy() *SecretRef { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SelfMonitoring) DeepCopyInto(out *SelfMonitoring) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SelfMonitoring. +func (in *SelfMonitoring) DeepCopy() *SelfMonitoring { + if in == nil { + return nil + } + out := new(SelfMonitoring) + in.DeepCopyInto(out) + return out +} diff --git a/cmd/main.go b/cmd/main.go index 41b212c9..762f7c62 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -34,11 +34,12 @@ import ( dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/backendconnection" "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" - dash0controller "github.com/dash0hq/dash0-operator/internal/dash0/controller" + "github.com/dash0hq/dash0-operator/internal/dash0/controller" "github.com/dash0hq/dash0-operator/internal/dash0/instrumentation" - dash0removal "github.com/dash0hq/dash0-operator/internal/dash0/removal" - dash0util "github.com/dash0hq/dash0-operator/internal/dash0/util" - dash0webhook "github.com/dash0hq/dash0-operator/internal/dash0/webhook" + "github.com/dash0hq/dash0-operator/internal/dash0/removal" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/util" + "github.com/dash0hq/dash0-operator/internal/dash0/webhook" //+kubebuilder:scaffold:imports ) @@ -268,6 +269,86 @@ func startOperatorManager( return nil } +func readEnvironmentVariables() (*environmentVariables, error) { + operatorNamespace, isSet := os.LookupEnv(operatorNamespaceEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, operatorNamespaceEnvVarName) + } + + deploymentName, isSet := os.LookupEnv(deploymentNameEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, deploymentNameEnvVarName) + } + + oTelCollectorNamePrefix, isSet := os.LookupEnv(oTelCollectorNamePrefixEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, oTelCollectorNamePrefixEnvVarName) + } + + operatorImage, isSet := os.LookupEnv(operatorImageEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, operatorImageEnvVarName) + } + + initContainerImage, isSet := os.LookupEnv(initContainerImageEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, initContainerImageEnvVarName) + } + initContainerImagePullPolicy := + readOptionalPullPolicyFromEnvironmentVariable(initContainerImagePullPolicyEnvVarName) + + collectorImage, isSet := os.LookupEnv(collectorImageEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, collectorImageEnvVarName) + } + collectorImagePullPolicy := readOptionalPullPolicyFromEnvironmentVariable(collectorImageImagePullPolicyEnvVarName) + + configurationReloaderImage, isSet := os.LookupEnv(configurationReloaderImageEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, configurationReloaderImageEnvVarName) + } + configurationReloaderImagePullPolicy := + readOptionalPullPolicyFromEnvironmentVariable(configurationReloaderImagePullPolicyEnvVarName) + + filelogOffsetSynchImage, isSet := os.LookupEnv(filelogOffsetSynchImageEnvVarName) + if !isSet { + return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, filelogOffsetSynchImageEnvVarName) + } + filelogOffsetSynchImagePullPolicy := + readOptionalPullPolicyFromEnvironmentVariable(filelogOffsetSynchImagePullPolicyEnvVarName) + + return &environmentVariables{ + operatorNamespace: operatorNamespace, + deploymentName: deploymentName, + oTelCollectorNamePrefix: oTelCollectorNamePrefix, + operatorImage: operatorImage, + initContainerImage: initContainerImage, + initContainerImagePullPolicy: initContainerImagePullPolicy, + collectorImage: collectorImage, + collectorImagePullPolicy: collectorImagePullPolicy, + configurationReloaderImage: configurationReloaderImage, + configurationReloaderImagePullPolicy: configurationReloaderImagePullPolicy, + filelogOffsetSynchImage: filelogOffsetSynchImage, + filelogOffsetSynchImagePullPolicy: filelogOffsetSynchImagePullPolicy, + }, nil +} + +func readOptionalPullPolicyFromEnvironmentVariable(envVarName string) corev1.PullPolicy { + pullPolicyRaw := os.Getenv(envVarName) + if pullPolicyRaw != "" { + if pullPolicyRaw == string(corev1.PullAlways) || + pullPolicyRaw == string(corev1.PullIfNotPresent) || + pullPolicyRaw == string(corev1.PullNever) { + return corev1.PullPolicy(pullPolicyRaw) + } else { + setupLog.Info( + fmt.Sprintf( + "Ignoring unknown pull policy setting (%s): %s.", envVarName, pullPolicyRaw)) + } + } + return "" +} + func startDash0Controller( ctx context.Context, mgr manager.Manager, @@ -279,7 +360,7 @@ func startDash0Controller( "http://%s-opentelemetry-collector.%s.svc.cluster.local:4318", envVars.oTelCollectorNamePrefix, envVars.operatorNamespace) - images := dash0util.Images{ + images := util.Images{ OperatorImage: envVars.operatorImage, InitContainerImage: envVars.initContainerImage, InitContainerImagePullPolicy: envVars.initContainerImagePullPolicy, @@ -307,11 +388,13 @@ func startDash0Controller( return err } + logCurrentSelfMonitoringSettings(deploymentSelfReference) + k8sClient := mgr.GetClient() instrumenter := &instrumentation.Instrumenter{ Client: k8sClient, Clientset: clientset, - Recorder: mgr.GetEventRecorderFor("dash0-controller"), + Recorder: mgr.GetEventRecorderFor("dash0-monitoring-controller"), Images: images, OTelCollectorBaseUrl: oTelCollectorBaseUrl, } @@ -326,7 +409,19 @@ func startDash0Controller( Clientset: clientset, OTelColResourceManager: oTelColResourceManager, } - dash0Reconciler := &dash0controller.Dash0Reconciler{ + + operatorConfigurationReconciler := &controller.OperatorConfigurationReconciler{ + Client: mgr.GetClient(), + Clientset: clientset, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("dash0-operator-configuration-controller"), + DeploymentSelfReference: deploymentSelfReference, + } + if err := operatorConfigurationReconciler.SetupWithManager(mgr); err != nil { + return fmt.Errorf("unable to set up the operator configuration reconciler: %w", err) + } + + monitoringReconciler := &controller.Dash0Reconciler{ Client: k8sClient, Clientset: clientset, Instrumenter: instrumenter, @@ -335,23 +430,21 @@ func startDash0Controller( OperatorNamespace: envVars.operatorNamespace, } - if err := dash0Reconciler.SetupWithManager(mgr); err != nil { - return fmt.Errorf("unable to set up the Dash0 reconciler: %w", err) + if err := monitoringReconciler.SetupWithManager(mgr); err != nil { + return fmt.Errorf("unable to set up the monitoring reconciler: %w", err) } - setupLog.Info("Dash0 reconciler has been set up.") if os.Getenv("ENABLE_WEBHOOK") != "false" { - if err := (&dash0webhook.Handler{ + if err := (&webhook.Handler{ Client: k8sClient, Recorder: mgr.GetEventRecorderFor("dash0-webhook"), Images: images, OTelCollectorBaseUrl: oTelCollectorBaseUrl, }).SetupWebhookWithManager(mgr); err != nil { - return fmt.Errorf("unable to create the Dash0 webhook: %w", err) + return fmt.Errorf("unable to create the webhook: %w", err) } - setupLog.Info("Dash0 webhook has been set up.") } else { - setupLog.Info("Dash0 webhooks have been disabled via configuration.") + setupLog.Info("Webhook is disabled via configuration.") } return nil @@ -361,7 +454,7 @@ func executeStartupTasks( ctx context.Context, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder, - images dash0util.Images, + images util.Images, oTelCollectorBaseUrl string, operatorNamespace string, deploymentName string, @@ -429,7 +522,7 @@ func instrumentAtStartup( startupTasksK8sClient client.Client, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder, - images dash0util.Images, + images util.Images, oTelCollectorBaseUrl string, ) { startupInstrumenter := &instrumentation.Instrumenter{ @@ -446,95 +539,43 @@ func instrumentAtStartup( startupInstrumenter.InstrumentAtStartup(ctx, startupTasksK8sClient, &setupLog) } -func readEnvironmentVariables() (*environmentVariables, error) { - operatorNamespace, isSet := os.LookupEnv(operatorNamespaceEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, operatorNamespaceEnvVarName) - } - - deploymentName, isSet := os.LookupEnv(deploymentNameEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, deploymentNameEnvVarName) - } - - oTelCollectorNamePrefix, isSet := os.LookupEnv(oTelCollectorNamePrefixEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, oTelCollectorNamePrefixEnvVarName) - } - - operatorImage, isSet := os.LookupEnv(operatorImageEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, operatorImageEnvVarName) - } - - initContainerImage, isSet := os.LookupEnv(initContainerImageEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, initContainerImageEnvVarName) - } - initContainerImagePullPolicy := - readOptionalPullPolicyFromEnvironmentVariable(initContainerImagePullPolicyEnvVarName) - - collectorImage, isSet := os.LookupEnv(collectorImageEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, collectorImageEnvVarName) - } - collectorImagePullPolicy := readOptionalPullPolicyFromEnvironmentVariable(collectorImageImagePullPolicyEnvVarName) - - configurationReloaderImage, isSet := os.LookupEnv(configurationReloaderImageEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, configurationReloaderImageEnvVarName) - } - configurationReloaderImagePullPolicy := - readOptionalPullPolicyFromEnvironmentVariable(configurationReloaderImagePullPolicyEnvVarName) - - filelogOffsetSynchImage, isSet := os.LookupEnv(filelogOffsetSynchImageEnvVarName) - if !isSet { - return nil, fmt.Errorf(mandatoryEnvVarMissingMessageTemplate, filelogOffsetSynchImageEnvVarName) - } - filelogOffsetSynchImagePullPolicy := - readOptionalPullPolicyFromEnvironmentVariable(filelogOffsetSynchImagePullPolicyEnvVarName) - - return &environmentVariables{ - operatorNamespace: operatorNamespace, - deploymentName: deploymentName, - oTelCollectorNamePrefix: oTelCollectorNamePrefix, - operatorImage: operatorImage, - initContainerImage: initContainerImage, - initContainerImagePullPolicy: initContainerImagePullPolicy, - collectorImage: collectorImage, - collectorImagePullPolicy: collectorImagePullPolicy, - configurationReloaderImage: configurationReloaderImage, - configurationReloaderImagePullPolicy: configurationReloaderImagePullPolicy, - filelogOffsetSynchImage: filelogOffsetSynchImage, - filelogOffsetSynchImagePullPolicy: filelogOffsetSynchImagePullPolicy, - }, nil -} - -func readOptionalPullPolicyFromEnvironmentVariable(envVarName string) corev1.PullPolicy { - pullPolicyRaw := os.Getenv(envVarName) - if pullPolicyRaw != "" { - if pullPolicyRaw == string(corev1.PullAlways) || - pullPolicyRaw == string(corev1.PullIfNotPresent) || - pullPolicyRaw == string(corev1.PullNever) { - return corev1.PullPolicy(pullPolicyRaw) - } else { - setupLog.Info( - fmt.Sprintf( - "Ignoring unknown pull policy setting (%s): %s.", envVarName, pullPolicyRaw)) - } +func logCurrentSelfMonitoringSettings(deploymentSelfReference *appsv1.Deployment) { + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + deploymentSelfReference, + controller.ManagerContainerName, + ) + if err != nil { + setupLog.Error(err, "cannot determine whether self-monitoring is enabled in the controller deployment") + } + + if selfMonitoringConfiguration.Enabled { + endpointAndHeaders := selfmonitoring.ConvertExportConfigurationToEnvVarSettings(selfMonitoringConfiguration.Export) + setupLog.Info( + "Self-monitoring settings on controller deployment:", + "enabled", + selfMonitoringConfiguration.Enabled, + "endpoint", + endpointAndHeaders.Endpoint, + ) + } else { + setupLog.Info( + "Self-monitoring settings on controller deployment:", + "enabled", + selfMonitoringConfiguration.Enabled, + ) } - return "" } func deleteDash0MonitoringResourcesInAllNamespaces(logger *logr.Logger) error { - handler, err := dash0removal.NewOperatorPreDeleteHandler() + handler, err := removal.NewOperatorPreDeleteHandler() if err != nil { logger.Error(err, "Failed to create the OperatorPreDeleteHandler.") return err } err = handler.DeleteAllDash0MonitoringResources() if err != nil { - logger.Error(err, "Failed to delete all Dash0 monitoring resources.") + logger.Error(err, "Failed to delete all monitoring resources.") return err } return nil diff --git a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml index 9717ca3b..b0e1bd83 100644 --- a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml +++ b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml @@ -17,7 +17,7 @@ spec: - name: v1alpha1 schema: openAPIV3Schema: - description: Dash0Monitoring is the Schema for the Dash0Monitoring API + description: Dash0Monitoring is the schema for the Dash0Monitoring API properties: apiVersion: description: |- @@ -47,7 +47,6 @@ spec: This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least one exporter has to be defined. - maxProperties: 3 minProperties: 1 properties: dash0: @@ -212,7 +211,7 @@ spec: - export type: object status: - description: Dash0MonitoringStatus defines the observed state of the Dash0 + description: Dash0MonitoringStatus defines the observed state of the Dash0Monitoring monitoring resource. properties: conditions: diff --git a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml new file mode 100644 index 00000000..fbfddb4f --- /dev/null +++ b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml @@ -0,0 +1,263 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: dash0operatorconfigurations.operator.dash0.com +spec: + group: operator.dash0.com + names: + kind: Dash0OperatorConfiguration + listKind: Dash0OperatorConfigurationList + plural: dash0operatorconfigurations + singular: dash0operatorconfiguration + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Dash0OperatorConfiguration is the schema for the Dash0OperatorConfiguration + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Dash0OperatorConfigurationSpec describes cluster-wide configuration + settings for the Dash0 Kubernetes operator. + properties: + export: + description: |- + The configuration of the default observability backend to which telemetry data will be sent by the operator, as + well as the backend that will receive the operator's self-monitoring data. This property is mandatory. + This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. + Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least + one exporter has to be defined. + + + Please note that self-monitoring data is only sent to one backend, with Dash0 taking precedence over gRPC and + HTTP, and gRPC taking precedence over HTTP if multiple exports are defined. Furthermore, HTTP export with JSON + encoding is not supported for self-monitoring telemetry. + minProperties: 1 + properties: + dash0: + description: The configuration of the Dash0 ingress endpoint to + which telemetry data will be sent. + properties: + authorization: + description: Mandatory authorization settings for sending + data to Dash0. + maxProperties: 1 + minProperties: 1 + properties: + secretRef: + description: |- + A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + ignored if the token property is set. The authorization token for your Dash0 organization can be copied from + https://app.dash0.com/settings. + properties: + key: + default: token + description: The key of the value which contains the + Dash0 authorization token. Defaults to "token" + type: string + name: + default: dash0-authorization-secret + description: The name of the secret containing the + Dash0 authorization token. Defaults to "dash0-authorization-secret". + type: string + required: + - key + - name + type: object + token: + description: |- + The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has + to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization + token for your Dash0 organization can be copied from https://app.dash0.com/settings. + type: string + type: object + dataset: + default: default + description: |- + The name of the Dash0 dataset to which telemetry data will be sent. This property is optional. If omitted, the + dataset "default" will be used. + type: string + endpoint: + description: |- + The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value + needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom + https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in + `dash0.com:4317`. + type: string + required: + - authorization + - endpoint + type: object + grpc: + description: The settings for an exporter to send telemetry to + an arbitrary OTLP-compatible receiver via gRPC. + properties: + endpoint: + description: The URL of the OTLP-compatible receiver to which + telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each gRPC + request, for example for authorization. This property is + optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + http: + description: The settings for an exporter to send telemetry to + an arbitrary OTLP-compatible receiver via HTTP. + properties: + encoding: + default: proto + description: The encoding of the OTLP data when sent via HTTP. + Can be either proto or json, defaults to proto. + enum: + - proto + - json + type: string + endpoint: + description: The URL of the OTLP-compatible receiver to which + telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each HTTP + request, for example for authorization. This property is + optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + type: object + selfMonitoring: + default: + enabled: true + description: Global opt-out for self-monitoring for this operator + properties: + enabled: + default: true + type: boolean + required: + - enabled + type: object + type: object + status: + description: Dash0OperatorConfigurationStatus defines the observed state + of the Dash0 operator configuration resource. + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/rbac/dash0_viewer_role.yaml b/config/rbac/dash0_viewer_role.yaml index ed0a78f2..fc42490e 100644 --- a/config/rbac/dash0_viewer_role.yaml +++ b/config/rbac/dash0_viewer_role.yaml @@ -1,4 +1,4 @@ -# permissions for end users to view dash0monitorings. +# permissions for end users to view operator.dash0.com CRs. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -15,6 +15,7 @@ rules: - operator.dash0.com resources: - dash0monitorings + - dash0operatorconfigurations verbs: - get - list @@ -23,5 +24,6 @@ rules: - operator.dash0.com resources: - dash0monitorings/status + - dash0operatorconfigurations/status verbs: - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 4e1da2fe..c822fe58 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -78,3 +78,30 @@ rules: - get - patch - update +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/finalizers + verbs: + - update +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/status + verbs: + - get + - patch + - update diff --git a/go.mod b/go.mod index cad7e567..76549cdf 100644 --- a/go.mod +++ b/go.mod @@ -57,7 +57,7 @@ require ( golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/net v0.28.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.23.0 // indirect + golang.org/x/sys v0.24.0 // indirect golang.org/x/term v0.23.0 // indirect golang.org/x/text v0.17.0 // indirect golang.org/x/time v0.5.0 // indirect diff --git a/go.sum b/go.sum index fbce450d..aa6540f0 100644 --- a/go.sum +++ b/go.sum @@ -125,8 +125,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index f3bde3d4..5e3addb2 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -100,6 +100,38 @@ rules: - patch - update +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch + +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/finalizers + verbs: + - update + +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +- apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/status + verbs: + - get + - patch + - update # Permissions required to manage OTel collector resources. - apiGroups: diff --git a/helm-chart/dash0-operator/templates/operator/custom-resource-definition-dash0.yaml b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-monitoring.yaml similarity index 99% rename from helm-chart/dash0-operator/templates/operator/custom-resource-definition-dash0.yaml rename to helm-chart/dash0-operator/templates/operator/custom-resource-definition-monitoring.yaml index 8394e3ce..0e4670cc 100644 --- a/helm-chart/dash0-operator/templates/operator/custom-resource-definition-dash0.yaml +++ b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-monitoring.yaml @@ -1,3 +1,4 @@ +--- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: @@ -16,7 +17,7 @@ spec: - name: v1alpha1 schema: openAPIV3Schema: - description: Dash0Monitoring is the Schema for the Dash0Monitoring API + description: Dash0Monitoring is the schema for the Dash0Monitoring API properties: apiVersion: description: |- @@ -46,7 +47,6 @@ spec: This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least one exporter has to be defined. - maxProperties: 3 minProperties: 1 properties: dash0: @@ -211,7 +211,7 @@ spec: - export type: object status: - description: Dash0MonitoringStatus defines the observed state of the Dash0 + description: Dash0MonitoringStatus defines the observed state of the Dash0Monitoring monitoring resource. properties: conditions: @@ -287,4 +287,4 @@ spec: served: true storage: true subresources: - status: {} + status: {} \ No newline at end of file diff --git a/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml new file mode 100644 index 00000000..ea02b997 --- /dev/null +++ b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml @@ -0,0 +1,254 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: dash0operatorconfigurations.operator.dash0.com +spec: + group: operator.dash0.com + names: + kind: Dash0OperatorConfiguration + listKind: Dash0OperatorConfigurationList + plural: dash0operatorconfigurations + singular: dash0operatorconfiguration + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Dash0OperatorConfiguration is the schema for the Dash0OperatorConfiguration + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Dash0OperatorConfigurationSpec describes cluster-wide configuration + settings for the Dash0 Kubernetes operator. + properties: + export: + description: |- + The configuration of the default observability backend to which telemetry data will be sent by the operator, as + well as the backend that will receive the operator's self-monitoring data. This property is mandatory. + This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. + Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least + one exporter has to be defined. + + + Please note that self-monitoring data is only sent to one backend, with Dash0 taking precedence over gRPC and + HTTP, and gRPC taking precedence over HTTP if multiple exports are defined. Furthermore, HTTP export with JSON + encoding is not supported for self-monitoring telemetry. + minProperties: 1 + properties: + dash0: + description: The configuration of the Dash0 ingress endpoint to + which telemetry data will be sent. + properties: + authorization: + description: Mandatory authorization settings for sending + data to Dash0. + maxProperties: 1 + minProperties: 1 + properties: + secretRef: + description: |- + A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + ignored if the token property is set. The authorization token for your Dash0 organization can be copied from + https://app.dash0.com/settings. + properties: + key: + default: token + description: The key of the value which contains the + Dash0 authorization token. Defaults to "token" + type: string + name: + default: dash0-authorization-secret + description: The name of the secret containing the + Dash0 authorization token. Defaults to "dash0-authorization-secret". + type: string + required: + - key + - name + type: object + token: + description: |- + The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has + to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization + token for your Dash0 organization can be copied from https://app.dash0.com/settings. + type: string + type: object + dataset: + default: default + description: |- + The name of the Dash0 dataset to which telemetry data will be sent. This property is optional. If omitted, the + dataset "default" will be used. + type: string + endpoint: + description: |- + The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value + needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom + https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in + `dash0.com:4317`. + type: string + required: + - authorization + - endpoint + type: object + grpc: + description: The settings for an exporter to send telemetry to + an arbitrary OTLP-compatible receiver via gRPC. + properties: + endpoint: + description: The URL of the OTLP-compatible receiver to which + telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each gRPC + request, for example for authorization. This property is + optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + http: + description: The settings for an exporter to send telemetry to + an arbitrary OTLP-compatible receiver via HTTP. + properties: + encoding: + default: proto + description: The encoding of the OTLP data when sent via HTTP. + Can be either proto or json, defaults to proto. + enum: + - proto + - json + type: string + endpoint: + description: The URL of the OTLP-compatible receiver to which + telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each HTTP + request, for example for authorization. This property is + optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + type: object + selfMonitoring: + default: + enabled: true + description: Global opt-out for self-monitoring for this operator + properties: + enabled: + default: true + type: boolean + required: + - enabled + type: object + type: object + status: + description: Dash0OperatorConfigurationStatus defines the observed state + of the Dash0 operator configuration resource. + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current state of this resource." + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml index f2960d7e..c737fa05 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml @@ -61,7 +61,7 @@ spec: labels: app.kubernetes.io/name: dash0-operator app.kubernetes.io/component: controller - dash0.cert-digest: {{ $certFingerprint }} + dash0.com/cert-digest: {{ $certFingerprint }} {{- if .Values.operator.podLabels }} {{- include "dash0-operator.podLabels" . | nindent 8 }} {{- end }} @@ -196,7 +196,7 @@ spec: selector: app.kubernetes.io/name: dash0-operator app.kubernetes.io/component: controller - dash0.cert-digest: {{ $certFingerprint }} + dash0.com/cert-digest: {{ $certFingerprint }} --- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration @@ -285,4 +285,4 @@ spec: selector: app.kubernetes.io/name: dash0-operator app.kubernetes.io/component: controller - dash0.cert-digest: {{ $certFingerprint }} + dash0.com/cert-digest: {{ $certFingerprint }} diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index 754f1506..6aa9bd9d 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -87,6 +87,33 @@ cluster roles should match snapshot: - get - patch - update + - apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch + - apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/finalizers + verbs: + - update + - apiGroups: + - operator.dash0.com + resources: + - dash0operatorconfigurations/status + verbs: + - get + - patch + - update - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-dash0_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-monitoring_test.yaml.snap similarity index 99% rename from helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-dash0_test.yaml.snap rename to helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-monitoring_test.yaml.snap index 7f466eb3..28819c79 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-dash0_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-monitoring_test.yaml.snap @@ -18,7 +18,7 @@ custom resource definition should match snapshot: - name: v1alpha1 schema: openAPIV3Schema: - description: Dash0Monitoring is the Schema for the Dash0Monitoring API + description: Dash0Monitoring is the schema for the Dash0Monitoring API properties: apiVersion: description: |- @@ -48,7 +48,6 @@ custom resource definition should match snapshot: This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least one exporter has to be defined. - maxProperties: 3 minProperties: 1 properties: dash0: @@ -200,7 +199,7 @@ custom resource definition should match snapshot: - export type: object status: - description: Dash0MonitoringStatus defines the observed state of the Dash0 monitoring resource. + description: Dash0MonitoringStatus defines the observed state of the Dash0Monitoring monitoring resource. properties: conditions: items: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap new file mode 100644 index 00000000..eacd0707 --- /dev/null +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap @@ -0,0 +1,239 @@ +custom resource definition should match snapshot: + 1: | + apiVersion: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: dash0operatorconfigurations.operator.dash0.com + spec: + group: operator.dash0.com + names: + kind: Dash0OperatorConfiguration + listKind: Dash0OperatorConfigurationList + plural: dash0operatorconfigurations + singular: dash0operatorconfiguration + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Dash0OperatorConfiguration is the schema for the Dash0OperatorConfiguration API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Dash0OperatorConfigurationSpec describes cluster-wide configuration settings for the Dash0 Kubernetes operator. + properties: + export: + description: |- + The configuration of the default observability backend to which telemetry data will be sent by the operator, as + well as the backend that will receive the operator's self-monitoring data. This property is mandatory. + This can either be Dash0 or another OTLP-compatible backend. You can also combine up to three exporters (i.e. + Dash0 plus gRPC plus HTTP). This allows sending the same data to two or three targets simultaneously. At least + one exporter has to be defined. + + + Please note that self-monitoring data is only sent to one backend, with Dash0 taking precedence over gRPC and + HTTP, and gRPC taking precedence over HTTP if multiple exports are defined. Furthermore, HTTP export with JSON + encoding is not supported for self-monitoring telemetry. + minProperties: 1 + properties: + dash0: + description: The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. + properties: + authorization: + description: Mandatory authorization settings for sending data to Dash0. + maxProperties: 1 + minProperties: 1 + properties: + secretRef: + description: |- + A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + ignored if the token property is set. The authorization token for your Dash0 organization can be copied from + https://app.dash0.com/settings. + properties: + key: + default: token + description: The key of the value which contains the Dash0 authorization token. Defaults to "token" + type: string + name: + default: dash0-authorization-secret + description: The name of the secret containing the Dash0 authorization token. Defaults to "dash0-authorization-secret". + type: string + required: + - key + - name + type: object + token: + description: |- + The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has + to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization + token for your Dash0 organization can be copied from https://app.dash0.com/settings. + type: string + type: object + dataset: + default: default + description: |- + The name of the Dash0 dataset to which telemetry data will be sent. This property is optional. If omitted, the + dataset "default" will be used. + type: string + endpoint: + description: |- + The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value + needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom + https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in + `dash0.com:4317`. + type: string + required: + - authorization + - endpoint + type: object + grpc: + description: The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via gRPC. + properties: + endpoint: + description: The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each gRPC request, for example for authorization. This property is optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + http: + description: The settings for an exporter to send telemetry to an arbitrary OTLP-compatible receiver via HTTP. + properties: + encoding: + default: proto + description: The encoding of the OTLP data when sent via HTTP. Can be either proto or json, defaults to proto. + enum: + - proto + - json + type: string + endpoint: + description: The URL of the OTLP-compatible receiver to which telemetry data will be sent. This property is mandatory. + type: string + headers: + description: Additional headers to be sent with each HTTP request, for example for authorization. This property is optional. + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + required: + - endpoint + type: object + type: object + selfMonitoring: + default: + enabled: true + description: Global opt-out for self-monitoring for this operator + properties: + enabled: + default: true + type: boolean + required: + - enabled + type: object + type: object + status: + description: Dash0OperatorConfigurationStatus defines the observed state of the Dash0 operator configuration resource. + properties: + conditions: + items: + description: Condition contains details for one aspect of the current state of this resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap index 1dfcd095..9d25a089 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap @@ -27,7 +27,7 @@ deployment should match snapshot (default values): labels: app.kubernetes.io/component: controller app.kubernetes.io/name: dash0-operator - dash0.cert-digest: dJTiBDRVJUSUZJQ + dash0.com/cert-digest: dJTiBDRVJUSUZJQ spec: automountServiceAccountToken: true containers: @@ -154,4 +154,4 @@ injector service should match snapshot (default settings): selector: app.kubernetes.io/component: controller app.kubernetes.io/name: dash0-operator - dash0.cert-digest: dJTiBDRVJUSUZJQ + dash0.com/cert-digest: dJTiBDRVJUSUZJQ diff --git a/helm-chart/dash0-operator/tests/operator/custom-resource-definition-dash0_test.yaml b/helm-chart/dash0-operator/tests/operator/custom-resource-definition-monitoring_test.yaml similarity index 73% rename from helm-chart/dash0-operator/tests/operator/custom-resource-definition-dash0_test.yaml rename to helm-chart/dash0-operator/tests/operator/custom-resource-definition-monitoring_test.yaml index 4c011515..8e7ea60f 100644 --- a/helm-chart/dash0-operator/tests/operator/custom-resource-definition-dash0_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/custom-resource-definition-monitoring_test.yaml @@ -1,6 +1,6 @@ suite: test custom resource definition templates: - - operator/custom-resource-definition-dash0.yaml + - operator/custom-resource-definition-monitoring.yaml tests: - it: custom resource definition should match snapshot asserts: diff --git a/helm-chart/dash0-operator/tests/operator/custom-resource-definition-operator-configuration_test.yaml b/helm-chart/dash0-operator/tests/operator/custom-resource-definition-operator-configuration_test.yaml new file mode 100644 index 00000000..d26f0de4 --- /dev/null +++ b/helm-chart/dash0-operator/tests/operator/custom-resource-definition-operator-configuration_test.yaml @@ -0,0 +1,7 @@ +suite: test custom resource definition +templates: + - operator/custom-resource-definition-operator-configuration.yaml +tests: + - it: custom resource definition should match snapshot + asserts: + - matchSnapshot: {} \ No newline at end of file diff --git a/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml index ff97cec2..14415dc8 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml @@ -242,13 +242,13 @@ tests: path: spec.template.spec.containers[0].resources.requests.memory value: 32Mi - - it: should render the "dash0.cert-digest" label + - it: should render the "dash0.com/cert-digest" label documentSelector: path: metadata.name value: dash0-operator-controller asserts: - isNotNullOrEmpty: - path: spec.template.metadata.labels["dash0.cert-digest"] + path: spec.template.metadata.labels["dash0.com/cert-digest"] - it: deployment should support referencing images by digest instead of tag documentSelector: @@ -326,11 +326,11 @@ tests: - isNotNullOrEmpty: path: webhooks[0].clientConfig.caBundle - - it: injector service should render the "dash0.cert-digest" label + - it: injector service should render the "dash0.com/cert-digest" label documentIndex: 4 asserts: - isNotNullOrEmpty: - path: spec.selector["dash0.cert-digest"] + path: spec.selector["dash0.com/cert-digest"] - it: injector service should match snapshot (default settings) documentIndex: 4 @@ -344,7 +344,7 @@ tests: metricsPort: 9554 asserts: - isNotNullOrEmpty: - path: spec.selector["dash0.cert-digest"] + path: spec.selector["dash0.com/cert-digest"] - equal: path: spec.ports[0].port value: 9554 diff --git a/images/configreloader/Dockerfile b/images/configreloader/Dockerfile index bb554a67..1414a1e2 100644 --- a/images/configreloader/Dockerfile +++ b/images/configreloader/Dockerfile @@ -3,7 +3,11 @@ COPY ./src /usr/local/go/src/configreloader WORKDIR /usr/local/go/src/configreloader RUN CGO_ENABLED=0 go build -ldflags '-extldflags "-static"' configreloader +FROM alpine:3.20.2 AS certs +RUN apk --update add ca-certificates && apk cache clean + FROM scratch COPY --from=builder /usr/local/go/src/configreloader/configreloader /app/configreloader +COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt USER 65532:65532 ENTRYPOINT [ "/app/configreloader" ] \ No newline at end of file diff --git a/images/filelogoffsetsynch/Dockerfile b/images/filelogoffsetsynch/Dockerfile index eb6b40ef..fdb2f138 100644 --- a/images/filelogoffsetsynch/Dockerfile +++ b/images/filelogoffsetsynch/Dockerfile @@ -3,7 +3,11 @@ COPY ./src /usr/local/go/src/filelogoffsetsynch WORKDIR /usr/local/go/src/filelogoffsetsynch RUN CGO_ENABLED=0 go build -ldflags '-extldflags "-static"' filelogoffsetsynch +FROM alpine:3.20.2 AS certs +RUN apk --update add ca-certificates && apk cache clean + FROM scratch COPY --from=builder /usr/local/go/src/filelogoffsetsynch/filelogoffsetsynch /app/filelogoffsetsynch +COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt USER 65532:65532 ENTRYPOINT [ "/app/filelogoffsetsynch" ] \ No newline at end of file diff --git a/images/filelogoffsetsynch/src/filelogoffsetsynch.go b/images/filelogoffsetsynch/src/filelogoffsetsynch.go index ebd26aa7..128add17 100644 --- a/images/filelogoffsetsynch/src/filelogoffsetsynch.go +++ b/images/filelogoffsetsynch/src/filelogoffsetsynch.go @@ -21,24 +21,57 @@ import ( "syscall" "time" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + otelmetric "go.opentelemetry.io/otel/metric" + metricnoop "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.17.0" + oteltrace "go.opentelemetry.io/otel/trace" + tracenoop "go.opentelemetry.io/otel/trace/noop" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" ) -type settings struct { +type Settings struct { Clientset *kubernetes.Clientset NodeName string ConfigMapNamespace string ConfigMapName string - FilelogOffsetDirectoryPath string + FileLogOffsetDirectoryPath string } type patch struct { BinaryData map[string]string `json:"binaryData,omitempty"` } +const ( + metricNamePrefix = "dash0operator.filelogoffsetsynch." +) + +var ( + currentValue string + + tracerProvider oteltrace.TracerProvider + tracer = otel.Tracer("dash0.com/operator/filelogoffsetsynch") + + meterProvider otelmetric.MeterProvider + metricNameCompressedSize = fmt.Sprintf("%s.%s", metricNamePrefix, "update.compressed_size") + offsetFileSize otelmetric.Int64Gauge + metricNameUpdateCounter = fmt.Sprintf("%s.%s", metricNamePrefix, "update.counter") + updateCountMeter otelmetric.Int64Counter + metricNameUpdateDuration = fmt.Sprintf("%s.%s", metricNamePrefix, "update.duration") + updateDurationSecondsMeter otelmetric.Float64Histogram +) + // TODO Add support for sending_queue on separate exporter // TODO Set up compaction // TODO Set up metrics & logs @@ -64,20 +97,121 @@ func main() { log.Fatalln("Required env var 'K8S_CONFIGMAP_NAME' is not set") } + podUid, isSet := os.LookupEnv("K8S_POD_UID") + if !isSet { + log.Println("Env var 'K8S_POD_UID' is not set") + } + nodeName, isSet := os.LookupEnv("K8S_NODE_NAME") if !isSet { - log.Fatalln("Required env var 'K8S_NODE_NAME' is not set") + log.Println("Env var 'K8S_NODE_NAME' is not set") } - filelogOffsetDirectoryPath, isSet := os.LookupEnv("FILELOG_OFFSET_DIRECTORY_PATH") + fileLogOffsetDirectoryPath, isSet := os.LookupEnv("FILELOG_OFFSET_DIRECTORY_PATH") if !isSet { log.Fatalln("Required env var 'FILELOG_OFFSET_DIRECTORY_PATH' is not set") } + ctx := context.Background() + // creates the in-cluster config config, err := rest.InClusterConfig() if err != nil { - log.Fatalf("Cannot create the Kube API client: %v\n", err) + log.Fatalf("Cannot create the Kube API client: %v", err) + } + + var doMeterShutdown func(ctx context.Context) error + var doTracerShutdown func(ctx context.Context) error + + if _, isSet = os.LookupEnv("OTEL_EXPORTER_OTLP_ENDPOINT"); isSet { + var metricExporter metric.Exporter + var spanExporter trace.SpanExporter + + protocol, isProtocolSet := os.LookupEnv("OTEL_EXPORTER_OTLP_PROTOCOL") + if !isProtocolSet { + // http/protobuf is the default transport protocol, see spec: + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md + protocol = "http/protobuf" + } + + switch protocol { + case "grpc": + if metricExporter, err = otlpmetricgrpc.New(ctx); err != nil { + log.Fatalf("Cannot create the OTLP gRPC metrics exporter: %v", err) + } + if spanExporter, err = otlptracegrpc.New(ctx); err != nil { + log.Fatalf("Cannot create the OTLP gRPC span exporter: %v", err) + } + case "http/protobuf": + if metricExporter, err = otlpmetrichttp.New(ctx); err != nil { + log.Fatalf("Cannot create the OTLP HTTP metrics exporter: %v", err) + } + if spanExporter, err = otlptracehttp.New(ctx); err != nil { + log.Fatalf("Cannot create the OTLP HTTP span exporter: %v", err) + } + case "http/json": + log.Fatalf("Cannot create the OTLP HTTP exporter: the protocol 'http/json' is currently unsupported") + default: + log.Fatalf("Unexpected OTLP protocol set as value of the 'OTEL_EXPORTER_OTLP_PROTOCOL' environment variable: %v", protocol) + } + + r, err := resource.New(ctx, + resource.WithAttributes( + semconv.K8SPodUID(podUid), + semconv.K8SNodeName(nodeName), + ), + ) + if err != nil { + log.Fatalf("Cannot setup the OTLP resource: %v", err) + } + + sdkMeterProvider := metric.NewMeterProvider( + metric.WithResource(r), + metric.WithReader(metric.NewPeriodicReader(metricExporter, metric.WithTimeout(1*time.Second))), + ) + sdkTracerProvider := trace.NewTracerProvider( + trace.WithResource(r), + trace.WithBatcher(spanExporter, trace.WithBatchTimeout(time.Second)), + ) + + meterProvider = sdkMeterProvider + doMeterShutdown = sdkMeterProvider.Shutdown + tracerProvider = sdkTracerProvider + doTracerShutdown = sdkTracerProvider.Shutdown + } else { + meterProvider = metricnoop.MeterProvider{} + doMeterShutdown = func(ctx context.Context) error { return nil } + tracerProvider = tracenoop.TracerProvider{} + doTracerShutdown = func(ctx context.Context) error { return nil } + } + + otel.SetMeterProvider(meterProvider) + otel.SetTracerProvider(tracerProvider) + + meter := meterProvider.Meter("dash0.operator.filelog_offset_synch") + + if offsetFileSize, err = meter.Int64Gauge( + metricNameCompressedSize, + otelmetric.WithUnit("By"), + otelmetric.WithDescription("The size of the compressed offset file"), + ); err != nil { + log.Fatalf("Cannot setup the OTLP meter for the offset file size gauge: %v", err) + } + + if updateCountMeter, err = meter.Int64Counter( + metricNameUpdateCounter, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter of how many times the synch process for filelog offsets occurs, and how many times it succeeds"), + ); err != nil { + log.Fatalf("Cannot setup the OTLP meter for the synch counter: %v", err) + } + + if updateDurationSecondsMeter, err = meter.Float64Histogram( + metricNameUpdateDuration, + otelmetric.WithUnit("1s"), + otelmetric.WithDescription("Counter of how long it takes for the synch process for filelog offsets to complete"), + ); err != nil { + log.Fatalf("Cannot setup the OTLP meter for the synch duration histogram: %v", err) } // creates the clientset @@ -86,31 +220,45 @@ func main() { log.Fatalf("Cannot create the Kube API client: %v\n", err) } - settings := &settings{ + settings := &Settings{ Clientset: clientset, NodeName: nodeName, ConfigMapNamespace: configMapNamespace, ConfigMapName: configMapName, - FilelogOffsetDirectoryPath: filelogOffsetDirectoryPath, + FileLogOffsetDirectoryPath: fileLogOffsetDirectoryPath, } switch *mode { case "init": - if restoredFiles, err := initOffsets(settings); err != nil { + if restoredFiles, err := initOffsets(ctx, settings); err != nil { log.Fatalf("No offset files restored: %v\n", err) } else if restoredFiles == 0 { log.Println("No offset files restored") } case "synch": - if err := synchOffsets(settings); err != nil { + if err := synchOffsets(ctx, settings); err != nil { log.Fatalf("An error occurred while synching file offsets to configmap: %v\n", err) } } + if meterProvider != nil { + timeoutCtx, cancelFun := context.WithTimeout(ctx, time.Second) + if err = doMeterShutdown(timeoutCtx); err != nil { + log.Printf("Failed to shutdown metrics provider, metrics data nay have been lost:%v\n", err) + } + cancelFun() + } + if tracerProvider != nil { + timeoutCtx, cancelFun := context.WithTimeout(ctx, time.Second) + if err = doTracerShutdown(timeoutCtx); err != nil { + log.Printf("Failed to shutdown tracer provider, metrics data nay have been lost:%v\n", err) + } + cancelFun() + } } -func initOffsets(settings *settings) (int, error) { - configMap, err := settings.Clientset.CoreV1().ConfigMaps(settings.ConfigMapNamespace).Get(context.Background(), settings.ConfigMapName, metav1.GetOptions{}) +func initOffsets(ctx context.Context, settings *Settings) (int, error) { + configMap, err := settings.Clientset.CoreV1().ConfigMaps(settings.ConfigMapNamespace).Get(ctx, settings.ConfigMapName, metav1.GetOptions{}) if err != nil { return 0, fmt.Errorf("cannot retrieve %v/%v config map: %w", settings.ConfigMapNamespace, settings.ConfigMapName, err) } @@ -193,27 +341,26 @@ func restoreFile(tr *tar.Reader) (IsArchiveOver, HasRestoredFileFromArchive, err } } -func synchOffsets(settings *settings) error { +func synchOffsets(ctx context.Context, settings *Settings) error { ticker := time.NewTicker(5 * time.Second) shutdown := make(chan os.Signal, 1) done := make(chan bool, 1) signal.Notify(shutdown, syscall.SIGTERM) go func() { - var currentValue string for { select { case <-ticker.C: - if newValue, err := doSynchOffsets(settings, currentValue); err != nil { + if err := doSynchOffsetsAndMeasure(ctx, settings); err != nil { log.Printf("Cannot update offset files: %v\n", err) - } else if len(newValue) > 0 { - currentValue = newValue } case <-shutdown: ticker.Stop() - if _, err := doSynchOffsets(settings, currentValue); err != nil { + + if err := doSynchOffsetsAndMeasure(ctx, settings); err != nil { log.Printf("Cannot update offset files on shutdown: %v\n", err) } + done <- true } } @@ -224,30 +371,65 @@ func synchOffsets(settings *settings) error { return nil } -func doSynchOffsets(settings *settings, currentValue string) (string, error) { +type OffsetSizeBytes int +type IsOffsetUpdated bool + +func doSynchOffsetsAndMeasure(ctx context.Context, settings *Settings) error { + ctx, span := tracer.Start(ctx, "synch-offsets") + defer span.End() + + start := time.Now() + + offsetUpdated, offsetUpdateSize, err := doSynchOffsets(settings) + + elapsed := time.Since(start) + span.SetAttributes(attribute.Int("elapsed", int(elapsed.Milliseconds()))) + + attributes := []attribute.KeyValue{} + if err != nil { + span.RecordError(err) + log.Printf("Cannot update offset files: %v\n", err) + + attributes = append(attributes, + attribute.String("error.type", "CannotUpdateOffsetFiles"), + attribute.String("error.message", err.Error()), + ) + } else if offsetUpdated { + updateCountMeter.Add(ctx, 1, otelmetric.WithAttributes( + attributes..., + )) + offsetFileSize.Record(ctx, int64(offsetUpdateSize)) + updateDurationSecondsMeter.Record(ctx, elapsed.Seconds(), otelmetric.WithAttributes(attributes...)) + } + + return err +} + +func doSynchOffsets(settings *Settings) (IsOffsetUpdated, OffsetSizeBytes, error) { var buf bytes.Buffer // Compress folder to tar, store bytes in configmap - tarredFiles, err := tarFolder(settings.FilelogOffsetDirectoryPath, &buf) + tarredFiles, err := tarFolder(settings.FileLogOffsetDirectoryPath, &buf) if err != nil { - return "", fmt.Errorf("cannot prepare offset files for storing: %w", err) + return false, -1, fmt.Errorf("cannot prepare offset files for storing: %w", err) } if tarredFiles == 0 { - return "", nil + return false, -1, nil } newValue := base64.StdEncoding.EncodeToString(buf.Bytes()) if newValue == currentValue { - return currentValue, nil + return false, -1, nil } if err := patchConfigMap(settings.Clientset, settings.NodeName, settings.ConfigMapNamespace, settings.ConfigMapName, newValue); err != nil { - return "", fmt.Errorf("cannot store offset files in configmap %v/%v: %w", settings.ConfigMapNamespace, settings.ConfigMapName, err) + return false, -1, fmt.Errorf("cannot store offset files in configmap %v/%v: %w", settings.ConfigMapNamespace, settings.ConfigMapName, err) } - return newValue, nil + currentValue = newValue + return false, OffsetSizeBytes(len(buf.Bytes())), nil } func patchConfigMap(clientset *kubernetes.Clientset, nodeName string, configMapNamespace string, configMapName string, newValueBase64 string) error { @@ -326,11 +508,14 @@ func tarFile(writer *tar.Writer, path string, info os.FileInfo) (HasAddedFileToA if err != nil { return false, err } - defer file.Close() if _, err := io.Copy(writer, file); err != nil { return false, err } + if err := file.Close(); err != nil { + return true, err + } + return true, nil } diff --git a/images/filelogoffsetsynch/src/go.mod b/images/filelogoffsetsynch/src/go.mod index 5656edea..fdc0a6a5 100644 --- a/images/filelogoffsetsynch/src/go.mod +++ b/images/filelogoffsetsynch/src/go.mod @@ -3,15 +3,23 @@ module github.com/dash0hq/dash0-operator/images/filelogoffsetsynch go 1.22.4 require ( + go.opentelemetry.io/otel v1.29.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 + go.opentelemetry.io/otel/metric v1.29.0 + go.opentelemetry.io/otel/sdk v1.29.0 + go.opentelemetry.io/otel/sdk/metric v1.29.0 k8s.io/apimachinery v0.31.0 k8s.io/client-go v0.31.0 ) require ( + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect @@ -21,6 +29,7 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -28,12 +37,20 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 // indirect + go.opentelemetry.io/otel/trace v1.29.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/net v0.28.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sys v0.24.0 // indirect golang.org/x/term v0.23.0 // indirect golang.org/x/text v0.17.0 // indirect golang.org/x/time v0.6.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd // indirect + google.golang.org/grpc v1.65.0 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/images/filelogoffsetsynch/src/go.sum b/images/filelogoffsetsynch/src/go.sum index 90f836f2..17b7fe90 100644 --- a/images/filelogoffsetsynch/src/go.sum +++ b/images/filelogoffsetsynch/src/go.sum @@ -1,3 +1,5 @@ +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -6,8 +8,11 @@ github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtz github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= @@ -32,6 +37,10 @@ github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQu github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -70,6 +79,38 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= +go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 h1:U2guen0GhqH8o/G2un8f/aG/y++OuW6MyCo6hT9prXk= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0/go.mod h1:yeGZANgEcpdx/WK0IvvRFC+2oLiMS2u4L/0Rj2M2Qr0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 h1:aLmmtjRke7LPDQ3lvpFz+kNEH43faFhzW7v8BFIEydg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0/go.mod h1:TC1pyCt6G9Sjb4bQpShH+P5R53pO6ZuGnHuuln9xMeE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 h1:nSiV3s7wiCam610XcLbYOmMfJxB9gO4uK3Xgv5gmTgg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0/go.mod h1:hKn/e/Nmd19/x1gvIHwtOwVWM+VhuITSWip3JUDghj0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 h1:JAv0Jwtl01UFiyWZEMiJZBiTlv5A50zNs8lsthXqIio= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0/go.mod h1:QNKLmUEAq2QUbPQUfvw4fmv0bgbK7UlOSFCnXyfvSNc= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= +go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= +go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= +go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= +go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo= +go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok= +go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08= +go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg= +go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY= +go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= +go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -111,6 +152,18 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094 h1:0+ozOGcrp+Y8Aq8TLNN2Aliibms5LEzsq99ZZmAGYm0= +google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094/go.mod h1:fJ/e3If/Q67Mj99hin0hMhiNyCRmt6BQ2aWIJshUSJw= +google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo= +google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd h1:6TEm2ZxXoQmFWFlt1vNxvVOa1Q0dXFQD1m/rYjXmS0E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/backendconnection/backendconnection_manager.go b/internal/backendconnection/backendconnection_manager.go index 11520a8f..0f47b508 100644 --- a/internal/backendconnection/backendconnection_manager.go +++ b/internal/backendconnection/backendconnection_manager.go @@ -13,6 +13,7 @@ import ( dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" "github.com/dash0hq/dash0-operator/internal/dash0/util" ) @@ -26,11 +27,12 @@ const ( failedToCreateMsg = "failed to create the OpenTelemetry collector instance, no telemetry will be reported to Dash0" ) -func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( +func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx context.Context, images util.Images, operatorNamespace string, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + monitoringResource *dash0v1alpha1.Dash0Monitoring, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, ) error { logger := log.FromContext(ctx) @@ -39,7 +41,8 @@ func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInDash0 ctx, operatorNamespace, images, - dash0MonitoringResource, + monitoringResource, + selfMonitoringConfiguration, &logger, ) @@ -56,11 +59,12 @@ func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInDash0 return nil } -func (m *BackendConnectionManager) RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( +func (m *BackendConnectionManager) RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx context.Context, images util.Images, operatorNamespace string, dash0MonitoringResourceToBeDeleted *dash0v1alpha1.Dash0Monitoring, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, ) error { logger := log.FromContext(ctx) list := &dash0v1alpha1.Dash0MonitoringList{} @@ -108,6 +112,7 @@ func (m *BackendConnectionManager) RemoveOpenTelemetryCollectorIfNoDash0Monitori operatorNamespace, images, dash0MonitoringResourceToBeDeleted, + selfMonitoringConfiguration, &logger, ); err != nil { logger.Error(err, "Failed to delete the OpenTelemetry collector resources, requeuing reconcile request.") diff --git a/internal/backendconnection/backendconnection_manager_test.go b/internal/backendconnection/backendconnection_manager_test.go index cf994f73..9ad66c96 100644 --- a/internal/backendconnection/backendconnection_manager_test.go +++ b/internal/backendconnection/backendconnection_manager_test.go @@ -12,11 +12,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" . "github.com/dash0hq/dash0-operator/test/util" ) @@ -28,7 +29,7 @@ var ( Spec: dash0v1alpha1.Dash0MonitoringSpec{ Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -75,7 +76,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { Describe("when validation checks fail", func() { It("should fail if no endpoint is provided", func() { - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, @@ -90,13 +91,14 @@ var _ = Describe("The backend connection manager", Ordered, func() { }, }, }, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).To(HaveOccurred()) VerifyCollectorResourcesDoNotExist(ctx, k8sClient, operatorNamespace) }) It("should fail if neither authorization token nor secret ref are provided for Dash0 exporter", func() { - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, @@ -104,12 +106,14 @@ var _ = Describe("The backend connection manager", Ordered, func() { Spec: dash0v1alpha1.Dash0MonitoringSpec{ Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{}, }, }, }, - }) + }, + selfmonitoring.SelfMonitoringConfiguration{}, + ) Expect(err).To(HaveOccurred()) VerifyCollectorResourcesDoNotExist(ctx, k8sClient, operatorNamespace) }) @@ -123,17 +127,19 @@ var _ = Describe("The backend connection manager", Ordered, func() { operatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) }) It("should create all resources", func() { - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) @@ -159,11 +165,12 @@ var _ = Describe("The backend connection manager", Ordered, func() { }) Expect(err).ToNot(HaveOccurred()) - err = manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err = manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) @@ -193,20 +200,22 @@ var _ = Describe("The backend connection manager", Ordered, func() { createdObjects = append(createdObjects, thirdDash0MonitoringResource) // Let the manager create the collector so there is something to delete. - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, secondDash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) - err = manager.RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( + err = manager.RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx, TestImages, operatorNamespace, secondDash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) // since other Dash0 monitoring resources still exist, the collector resources should not be deleted @@ -219,16 +228,17 @@ var _ = Describe("The backend connection manager", Ordered, func() { createdObjects = append(createdObjects, existingDash0MonitoringResource) // Let the manager create the collector so there is something to delete. - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, existingDash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) - err = manager.RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( + err = manager.RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx, TestImages, operatorNamespace, @@ -244,7 +254,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { Spec: dash0v1alpha1.Dash0MonitoringSpec{ Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -252,6 +262,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { }, }, }, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) @@ -264,20 +275,22 @@ var _ = Describe("The backend connection manager", Ordered, func() { createdObjects = append(createdObjects, dash0MonitoringResource) // Let the manager create the collector so there is something to delete. - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) - err = manager.RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( + err = manager.RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) // verify the collector is deleted when the Dash0 monitoring resource provided as a parameter is the only @@ -287,20 +300,22 @@ var _ = Describe("The backend connection manager", Ordered, func() { It("should delete the collector if no Dash0 monitoring resource exists", func() { // Let the manager create the collector so there is something to delete. - err := manager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + err := manager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) - err = manager.RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( + err = manager.RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx, TestImages, operatorNamespace, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, ) Expect(err).ToNot(HaveOccurred()) VerifyCollectorResourcesDoNotExist(ctx, k8sClient, operatorNamespace) diff --git a/internal/backendconnection/otelcolresources/collector_config_map.go b/internal/backendconnection/otelcolresources/collector_config_map.go index 2bdd44ad..4d0620dc 100644 --- a/internal/backendconnection/otelcolresources/collector_config_map.go +++ b/internal/backendconnection/otelcolresources/collector_config_map.go @@ -13,9 +13,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" ) -type otlpExporter struct { +type OtlpExporter struct { Name string Endpoint string Headers []dash0v1alpha1.Header @@ -27,10 +28,11 @@ var ( collectorConfigurationTemplateSource string collectorConfigurationTemplate = template.Must( template.New("collector-configuration").Parse(collectorConfigurationTemplateSource)) + authHeaderValue = fmt.Sprintf("Bearer ${env:%s}", authTokenEnvVarName) ) -func collectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { - exporters, err := assembleExporters(config.Export) +func assembleCollectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { + exporters, err := ConvertExportSettingsToExporterList(config.Export) if err != nil { return nil, fmt.Errorf("cannot assemble the exporters for the configuration: %w", err) } @@ -66,8 +68,8 @@ func collectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { }, nil } -func assembleExporters(export dash0v1alpha1.Export) ([]otlpExporter, error) { - var exporters []otlpExporter +func ConvertExportSettingsToExporterList(export dash0v1alpha1.Export) ([]OtlpExporter, error) { + var exporters []OtlpExporter if export.Dash0 == nil && export.Grpc == nil && export.Http == nil { return nil, fmt.Errorf("no exporter configuration found") @@ -79,16 +81,16 @@ func assembleExporters(export dash0v1alpha1.Export) ([]otlpExporter, error) { return nil, fmt.Errorf("no endpoint provided for the Dash0 exporter, unable to create the OpenTelemetry collector") } headers := []dash0v1alpha1.Header{{ - Name: "Authorization", - Value: "Bearer ${env:AUTH_TOKEN}", + Name: util.AuthorizationHeaderName, + Value: authHeaderValue, }} if d0.Dataset != "" && d0.Dataset != "default" { headers = append(headers, dash0v1alpha1.Header{ - Name: "X-Dash0-Dataset", + Name: util.Dash0DatasetHeaderName, Value: d0.Dataset, }) } - exporters = append(exporters, otlpExporter{ + exporters = append(exporters, OtlpExporter{ Name: "otlp/dash0", Endpoint: export.Dash0.Endpoint, Headers: headers, @@ -100,7 +102,7 @@ func assembleExporters(export dash0v1alpha1.Export) ([]otlpExporter, error) { if grpc.Endpoint == "" { return nil, fmt.Errorf("no endpoint provided for the gRPC exporter, unable to create the OpenTelemetry collector") } - grpcExporter := otlpExporter{ + grpcExporter := OtlpExporter{ Name: "otlp/grpc", Endpoint: grpc.Endpoint, Headers: grpc.Headers, @@ -120,7 +122,7 @@ func assembleExporters(export dash0v1alpha1.Export) ([]otlpExporter, error) { return nil, fmt.Errorf("no encoding provided for the HTTP exporter, unable to create the OpenTelemetry collector") } encoding := string(http.Encoding) - httpExporter := otlpExporter{ + httpExporter := OtlpExporter{ Name: fmt.Sprintf("otlphttp/%s", encoding), Endpoint: http.Endpoint, Encoding: encoding, diff --git a/internal/backendconnection/otelcolresources/collector_config_map_test.go b/internal/backendconnection/otelcolresources/collector_config_map_test.go index 34846772..e8c8e7ac 100644 --- a/internal/backendconnection/otelcolresources/collector_config_map_test.go +++ b/internal/backendconnection/otelcolresources/collector_config_map_test.go @@ -10,6 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -22,10 +23,14 @@ const ( HttpEndpointTest = "https://example.com:4318" ) +var ( + bearerWithAuthToken = fmt.Sprintf("Bearer ${env:%s}", authTokenEnvVarName) +) + var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { It("should fail if no exporter is configured", func() { - _, err := collectorConfigMap(&oTelColConfig{ + _, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{}, @@ -34,7 +39,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should fail to render the Dash0 exporter when no endpoint is provided", func() { - _, err := collectorConfigMap(&oTelColConfig{ + _, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -53,12 +58,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render the Dash0 exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -79,25 +84,25 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(exporter2).ToNot(BeNil()) dash0OtlpExporter := exporter2.(map[string]interface{}) Expect(dash0OtlpExporter).ToNot(BeNil()) - Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointTest)) + Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointDash0Test)) headersRaw := dash0OtlpExporter["headers"] Expect(headersRaw).ToNot(BeNil()) headers := headersRaw.(map[string]interface{}) Expect(headers).To(HaveLen(1)) - Expect(headers["Authorization"]).To(Equal("Bearer ${env:AUTH_TOKEN}")) - Expect(headers["X-Dash0-Dataset"]).To(BeNil()) + Expect(headers[util.AuthorizationHeaderName]).To(Equal(bearerWithAuthToken)) + Expect(headers[util.Dash0DatasetHeaderName]).To(BeNil()) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) verifyPipelines(collectorConfig, "otlp/dash0") }) It("should render the Dash0 exporter with custom dataset", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Dataset: "custom-dataset", Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, @@ -119,20 +124,20 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(exporter2).ToNot(BeNil()) dash0OtlpExporter := exporter2.(map[string]interface{}) Expect(dash0OtlpExporter).ToNot(BeNil()) - Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointTest)) + Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointDash0Test)) headersRaw := dash0OtlpExporter["headers"] Expect(headersRaw).ToNot(BeNil()) headers := headersRaw.(map[string]interface{}) Expect(headers).To(HaveLen(2)) - Expect(headers["Authorization"]).To(Equal("Bearer ${env:AUTH_TOKEN}")) - Expect(headers["X-Dash0-Dataset"]).To(Equal("custom-dataset")) + Expect(headers[util.AuthorizationHeaderName]).To(Equal(bearerWithAuthToken)) + Expect(headers[util.Dash0DatasetHeaderName]).To(Equal("custom-dataset")) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) verifyPipelines(collectorConfig, "otlp/dash0") }) It("should fail to render a gRPC exporter when no endpoint is provided", func() { - _, err := collectorConfigMap(&oTelColConfig{ + _, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -152,7 +157,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render an arbitrary gRPC exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -198,7 +203,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should fail to render an HTTP exporter when no endpoint is provided", func() { - _, err := collectorConfigMap(&oTelColConfig{ + _, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -218,7 +223,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should fail to render an HTTP exporter when no encoding is provided", func() { - _, err := collectorConfigMap(&oTelColConfig{ + _, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -239,7 +244,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render an arbitrary HTTP exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -286,12 +291,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render the Dash0 exporter together with a gRPC exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -319,12 +324,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(exporter2).ToNot(BeNil()) dash0OtlpExporter := exporter2.(map[string]interface{}) Expect(dash0OtlpExporter).ToNot(BeNil()) - Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointTest)) + Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointDash0Test)) headersRaw := dash0OtlpExporter["headers"] Expect(headersRaw).ToNot(BeNil()) headers := headersRaw.(map[string]interface{}) Expect(headers).To(HaveLen(1)) - Expect(headers["Authorization"]).To(Equal("Bearer ${env:AUTH_TOKEN}")) + Expect(headers[util.AuthorizationHeaderName]).To(Equal(bearerWithAuthToken)) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) exporter3 := exporters["otlp/grpc"] @@ -342,12 +347,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render the Dash0 exporter together with an HTTP exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -376,12 +381,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(exporter2).ToNot(BeNil()) dash0OtlpExporter := exporter2.(map[string]interface{}) Expect(dash0OtlpExporter).ToNot(BeNil()) - Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointTest)) + Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointDash0Test)) headersRaw := dash0OtlpExporter["headers"] Expect(headersRaw).ToNot(BeNil()) headers := headersRaw.(map[string]interface{}) Expect(headers).To(HaveLen(1)) - Expect(headers["Authorization"]).To(Equal("Bearer ${env:AUTH_TOKEN}")) + Expect(headers[util.AuthorizationHeaderName]).To(Equal(bearerWithAuthToken)) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) exporter3 := exporters["otlphttp/proto"] @@ -399,7 +404,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render a gRPC exporter together with an HTTP exporter", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -458,12 +463,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { }) It("should render a combination of all three exporter types", func() { - configMap, err := collectorConfigMap(&oTelColConfig{ + configMap, err := assembleCollectorConfigMap(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -499,12 +504,12 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(exporter2).ToNot(BeNil()) dash0OtlpExporter := exporter2.(map[string]interface{}) Expect(dash0OtlpExporter).ToNot(BeNil()) - Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointTest)) + Expect(dash0OtlpExporter["endpoint"]).To(Equal(EndpointDash0Test)) headersRaw := dash0OtlpExporter["headers"] Expect(headersRaw).ToNot(BeNil()) headers := headersRaw.(map[string]interface{}) Expect(headers).To(HaveLen(1)) - Expect(headers["Authorization"]).To(Equal("Bearer ${env:AUTH_TOKEN}")) + Expect(headers[util.AuthorizationHeaderName]).To(Equal(bearerWithAuthToken)) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) exporter3 := exporters["otlp/grpc"] diff --git a/internal/backendconnection/otelcolresources/desired_state.go b/internal/backendconnection/otelcolresources/desired_state.go index 1a8eca74..f68b0201 100644 --- a/internal/backendconnection/otelcolresources/desired_state.go +++ b/internal/backendconnection/otelcolresources/desired_state.go @@ -17,14 +17,21 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" "github.com/dash0hq/dash0-operator/internal/dash0/util" ) type oTelColConfig struct { - Namespace string - NamePrefix string - Export dash0v1alpha1.Export - Images util.Images + Namespace string + NamePrefix string + Export dash0v1alpha1.Export + SelfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration + Images util.Images +} + +type collectorConfigurationTemplateValues struct { + Exporters []OtlpExporter + IgnoreLogsFromNamespaces []string } const ( @@ -35,15 +42,7 @@ const ( // ports. When the operator creates its daemonset, the pods of one of the two otelcol daemonsets would fail to start // due to port conflicts. - rbacApiVersion = "rbac.authorization.k8s.io/v1" -) - -type collectorConfigurationTemplateValues struct { - Exporters []otlpExporter - IgnoreLogsFromNamespaces []string -} - -const ( + rbacApiVersion = "rbac.authorization.k8s.io/v1" serviceComponent = "agent-collector" openTelemetryCollector = "opentelemetry-collector" @@ -90,22 +89,22 @@ var ( func assembleDesiredState(config *oTelColConfig) ([]client.Object, error) { var desiredState []client.Object desiredState = append(desiredState, serviceAccount(config)) - collectorCM, err := collectorConfigMap(config) + collectorConfigMap, err := assembleCollectorConfigMap(config) if err != nil { return desiredState, err } - desiredState = append(desiredState, collectorCM) - desiredState = append(desiredState, filelogOffsetsConfigMap(config)) - desiredState = append(desiredState, clusterRole(config)) - desiredState = append(desiredState, clusterRoleBinding(config)) - desiredState = append(desiredState, role(config)) - desiredState = append(desiredState, roleBinding(config)) - desiredState = append(desiredState, service(config)) - ds, err := daemonSet(config) + desiredState = append(desiredState, collectorConfigMap) + desiredState = append(desiredState, assembleFilelogOffsetsConfigMap(config)) + desiredState = append(desiredState, assembleClusterRole(config)) + desiredState = append(desiredState, assembleClusterRoleBinding(config)) + desiredState = append(desiredState, assembleRole(config)) + desiredState = append(desiredState, assembleRoleBinding(config)) + desiredState = append(desiredState, assembleService(config)) + collectorDaemonSet, err := assembleDaemonSet(config) if err != nil { return desiredState, err } - desiredState = append(desiredState, ds) + desiredState = append(desiredState, collectorDaemonSet) return desiredState, nil } @@ -123,7 +122,7 @@ func serviceAccount(config *oTelColConfig) *corev1.ServiceAccount { } } -func filelogOffsetsConfigMap(config *oTelColConfig) *corev1.ConfigMap { +func assembleFilelogOffsetsConfigMap(config *oTelColConfig) *corev1.ConfigMap { return &corev1.ConfigMap{ TypeMeta: metav1.TypeMeta{ Kind: "ConfigMap", @@ -137,7 +136,7 @@ func filelogOffsetsConfigMap(config *oTelColConfig) *corev1.ConfigMap { } } -func role(config *oTelColConfig) *rbacv1.Role { +func assembleRole(config *oTelColConfig) *rbacv1.Role { return &rbacv1.Role{ TypeMeta: metav1.TypeMeta{ Kind: "Role", @@ -158,7 +157,7 @@ func role(config *oTelColConfig) *rbacv1.Role { } } -func roleBinding(config *oTelColConfig) *rbacv1.RoleBinding { +func assembleRoleBinding(config *oTelColConfig) *rbacv1.RoleBinding { return &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{ Kind: "RoleBinding", @@ -182,7 +181,7 @@ func roleBinding(config *oTelColConfig) *rbacv1.RoleBinding { } } -func clusterRole(config *oTelColConfig) *rbacv1.ClusterRole { +func assembleClusterRole(config *oTelColConfig) *rbacv1.ClusterRole { return &rbacv1.ClusterRole{ TypeMeta: metav1.TypeMeta{ Kind: "ClusterRole", @@ -213,7 +212,7 @@ func clusterRole(config *oTelColConfig) *rbacv1.ClusterRole { } } -func clusterRoleBinding(config *oTelColConfig) *rbacv1.ClusterRoleBinding { +func assembleClusterRoleBinding(config *oTelColConfig) *rbacv1.ClusterRoleBinding { return &rbacv1.ClusterRoleBinding{ TypeMeta: metav1.TypeMeta{ Kind: "ClusterRoleBinding", @@ -237,7 +236,7 @@ func clusterRoleBinding(config *oTelColConfig) *rbacv1.ClusterRoleBinding { } } -func service(config *oTelColConfig) *corev1.Service { +func assembleService(config *oTelColConfig) *corev1.Service { return &corev1.Service{ TypeMeta: metav1.TypeMeta{ Kind: "Service", @@ -275,7 +274,7 @@ func service(config *oTelColConfig) *corev1.Service { } } -func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { +func assembleDaemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { configMapItems := []corev1.KeyToPath{{ Key: collectorConfigurationYaml, Path: collectorConfigurationYaml, @@ -370,11 +369,29 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { filelogReceiverOffsetsVolumeMount, } - nodeNameFieldSpec := &corev1.ObjectFieldSelector{ + nodeNameFieldSpec := corev1.ObjectFieldSelector{ FieldPath: "spec.nodeName", } - env := []corev1.EnvVar{ + podUidFieldSpec := corev1.ObjectFieldSelector{ + FieldPath: "metadata.uid", + } + + k8sNodeNameEnvVar := corev1.EnvVar{ + Name: "K8S_NODE_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &nodeNameFieldSpec, + }, + } + + k8sPodUidEnvVar := corev1.EnvVar{ + Name: "K8S_POD_UID", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &podUidFieldSpec, + }, + } + + collectorEnv := []corev1.EnvVar{ { Name: "MY_POD_IP", ValueFrom: &corev1.EnvVarSource{ @@ -383,12 +400,8 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { }, }, }, - { - Name: "K8S_NODE_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: nodeNameFieldSpec, - }, - }, + k8sNodeNameEnvVar, + k8sPodUidEnvVar, { Name: "DASH0_COLLECTOR_PID_FILE", Value: collectorPidFilePath, @@ -400,28 +413,14 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { } if config.Export.Dash0 != nil { - token := config.Export.Dash0.Authorization.Token - secretRef := config.Export.Dash0.Authorization.SecretRef - if token != nil && *token != "" { - env = append(env, corev1.EnvVar{ - Name: authTokenEnvVarName, - Value: *token, - }) - } else if secretRef != nil && secretRef.Name != "" && secretRef.Key != "" { - env = append(env, corev1.EnvVar{ - Name: authTokenEnvVarName, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: secretRef.Name, - }, - Key: secretRef.Key, - }, - }, - }) - } else { - return nil, fmt.Errorf("neither token nor secretRef provided for the Dash0 exporter") + authTokenEnvVar, err := util.CreateEnvVarForAuthorization( + *config.Export.Dash0, + authTokenEnvVarName, + ) + if err != nil { + return nil, err } + collectorEnv = append(collectorEnv, authTokenEnvVar) } probe := corev1.Probe{ @@ -454,7 +453,7 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { HostPort: int32(OtlpHttpHostPort), }, }, - Env: env, + Env: collectorEnv, LivenessProbe: &probe, ReadinessProbe: &probe, Resources: corev1.ResourceRequirements{ @@ -481,6 +480,8 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { Name: "GOMEMLIMIT", Value: "4MiB", }, + k8sNodeNameEnvVar, + k8sPodUidEnvVar, }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -516,12 +517,8 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { Name: "FILELOG_OFFSET_DIRECTORY_PATH", Value: offsetsDirPath, }, - { - Name: "K8S_NODE_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: nodeNameFieldSpec, - }, - }, + k8sNodeNameEnvVar, + k8sPodUidEnvVar, }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -557,12 +554,8 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { Name: "FILELOG_OFFSET_DIRECTORY_PATH", Value: offsetsDirPath, }, - { - Name: "K8S_NODE_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: nodeNameFieldSpec, - }, - }, + k8sNodeNameEnvVar, + k8sPodUidEnvVar, }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -575,7 +568,7 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { filelogOffsetSynchContainer.ImagePullPolicy = config.Images.FilelogOffsetSynchImagePullPolicy } - ds := &appsv1.DaemonSet{ + collectorDaemonSet := &appsv1.DaemonSet{ TypeMeta: metav1.TypeMeta{ Kind: "DaemonSet", APIVersion: "apps/v1", @@ -614,7 +607,19 @@ func daemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { }, }, } - return ds, nil + + if config.SelfMonitoringConfiguration.Enabled { + err := selfmonitoring.EnableSelfMonitoringInCollectorDaemonSet( + collectorDaemonSet, + config.SelfMonitoringConfiguration, + config.Images.GetOperatorVersion(), + ) + if err != nil { + return nil, err + } + } + + return collectorDaemonSet, nil } func serviceAccountName(namePrefix string) string { diff --git a/internal/backendconnection/otelcolresources/desired_state_test.go b/internal/backendconnection/otelcolresources/desired_state_test.go index 69907b55..55cb42f6 100644 --- a/internal/backendconnection/otelcolresources/desired_state_test.go +++ b/internal/backendconnection/otelcolresources/desired_state_test.go @@ -5,6 +5,7 @@ package otelcolresources import ( "fmt" + "reflect" "strings" appsv1 "k8s.io/api/apps/v1" @@ -12,6 +13,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -45,21 +48,14 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f desiredState, err := assembleDesiredState(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, - Export: dash0v1alpha1.Export{ - Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, - Authorization: dash0v1alpha1.Authorization{ - Token: &AuthorizationTokenTest, - }, - }, - }, - Images: TestImages, + Export: Dash0ExportWithEndpointAndToken(), + Images: TestImages, }) Expect(err).ToNot(HaveOccurred()) Expect(desiredState).To(HaveLen(9)) collectorConfigConfigMapContent := getCollectorConfigConfigMapContent(desiredState) - Expect(collectorConfigConfigMapContent).To(ContainSubstring(fmt.Sprintf("endpoint: %s", EndpointTestQuoted))) + Expect(collectorConfigConfigMapContent).To(ContainSubstring(fmt.Sprintf("endpoint: %s", EndpointDash0TestQuoted))) Expect(collectorConfigConfigMapContent).NotTo(ContainSubstring("file/traces")) Expect(collectorConfigConfigMapContent).NotTo(ContainSubstring("file/metrics")) Expect(collectorConfigConfigMapContent).NotTo(ContainSubstring("file/logs")) @@ -126,14 +122,7 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f desiredState, err := assembleDesiredState(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, - Export: dash0v1alpha1.Export{ - Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, - Authorization: dash0v1alpha1.Authorization{ - Token: &AuthorizationTokenTest, - }, - }, - }, + Export: Dash0ExportWithEndpointAndToken(), }) Expect(err).ToNot(HaveOccurred()) @@ -151,14 +140,7 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f desiredState, err := assembleDesiredState(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, - Export: dash0v1alpha1.Export{ - Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, - Authorization: dash0v1alpha1.Authorization{ - SecretRef: &SecretRefTest, - }, - }, - }, + Export: Dash0ExportWithEndpointAndSecretRef(), }) Expect(err).ToNot(HaveOccurred()) @@ -178,12 +160,7 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f desiredState, err := assembleDesiredState(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, - Export: dash0v1alpha1.Export{ - Http: &dash0v1alpha1.HttpConfiguration{ - Endpoint: EndpointTest, - Encoding: dash0v1alpha1.Proto, - }, - }, + Export: HttpExportTest(), }) Expect(err).ToNot(HaveOccurred()) @@ -196,6 +173,53 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f authTokenEnvVar := findEnvVarByName(container.Env, "AUTH_TOKEN") Expect(authTokenEnvVar).To(BeNil()) }) + + It("should correctly apply enabled self-monitoring on the daemonset", func() { + desiredState, err := assembleDesiredState(&oTelColConfig{ + Namespace: namespace, + NamePrefix: namePrefix, + Export: Dash0ExportWithEndpointAndToken(), + SelfMonitoringConfiguration: selfmonitoring.SelfMonitoringConfiguration{ + Enabled: true, + Export: Dash0ExportWithEndpointTokenAndInsightsDataset(), + }, + Images: TestImages, + }) + Expect(err).NotTo(HaveOccurred()) + + daemonSet := getDaemonSet(desiredState) + selfMonitoringConfiguration, err := parseBackSelfMonitoringEnvVarsFromCollectorDaemonSet(daemonSet) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) + Expect(selfMonitoringConfiguration.Export.Dash0).ToNot(BeNil()) + Expect(selfMonitoringConfiguration.Export.Dash0.Endpoint).To(Equal(EndpointDash0WithProtocolTest)) + Expect(selfMonitoringConfiguration.Export.Dash0.Dataset).To(Equal(util.DatasetInsights)) + Expect(*selfMonitoringConfiguration.Export.Dash0.Authorization.Token).To(Equal(AuthorizationTokenTest)) + Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) + }) + + It("should correctly apply disabled self-monitoring on the daemonset", func() { + desiredState, err := assembleDesiredState(&oTelColConfig{ + Namespace: namespace, + NamePrefix: namePrefix, + Export: Dash0ExportWithEndpointAndToken(), + SelfMonitoringConfiguration: selfmonitoring.SelfMonitoringConfiguration{ + Enabled: false, + Export: Dash0ExportWithEndpointTokenAndInsightsDataset(), + }, + Images: TestImages, + }) + Expect(err).NotTo(HaveOccurred()) + + daemonSet := getDaemonSet(desiredState) + selfMonitoringConfiguration, err := parseBackSelfMonitoringEnvVarsFromCollectorDaemonSet(daemonSet) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) + Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) + }) }) func getConfigMap(desiredState []client.Object, matcher func(c *corev1.ConfigMap) bool) *corev1.ConfigMap { @@ -265,3 +289,54 @@ func findVolumeMountByName(objects []corev1.VolumeMount, name string) *corev1.Vo } return nil } + +// Note: There is no real need to parse the env vars on the daemonset back into a SelfMonitoringConfiguration, we could +// just read the env vars and check that they have the expected values. We might want to refactor/simplify later. +// However, this also tests the functionality used in +// selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment. +func parseBackSelfMonitoringEnvVarsFromCollectorDaemonSet(collectorDemonSet *appsv1.DaemonSet) ( + selfmonitoring.SelfMonitoringConfiguration, + error, +) { + selfMonitoringConfigurations := make(map[string]selfmonitoring.SelfMonitoringConfiguration) + + // Check that we have the OTel environment variabless set on all init containers and regular containers. + // for _, container := range collectorDemonSet.Spec.Template.Spec.InitContainers { + // if selfMonitoringConfiguration, err := selfmonitoring.ParseSelfMonitoringConfigurationFromContainer(&container); err != nil { + // return selfmonitoring.SelfMonitoringConfiguration{}, err + // } else { + // selfMonitoringConfigurations[container.Name] = selfMonitoringConfiguration + // } + // } + + for _, container := range collectorDemonSet.Spec.Template.Spec.Containers { + if selfMonitoringConfiguration, err := + selfmonitoring.ParseSelfMonitoringConfigurationFromContainer(&container); err != nil { + return selfmonitoring.SelfMonitoringConfiguration{}, err + } else { + selfMonitoringConfigurations[container.Name] = selfMonitoringConfiguration + } + } + + // verify that the configurations on all init containers and regular containers are consistent + var referenceMonitoringConfiguration *selfmonitoring.SelfMonitoringConfiguration + for _, selfMonitoringConfiguration := range selfMonitoringConfigurations { + // Note: Using a local var in the loop fixes golangci-lint complaint exportloopref, see + // https://github.com/kyoh86/exportloopref. + loopLocalSelfMonitoringConfiguration := selfMonitoringConfiguration + if referenceMonitoringConfiguration == nil { + referenceMonitoringConfiguration = &loopLocalSelfMonitoringConfiguration + } else { + if !reflect.DeepEqual(*referenceMonitoringConfiguration, loopLocalSelfMonitoringConfiguration) { + return selfmonitoring.SelfMonitoringConfiguration{}, + fmt.Errorf("inconsistent self-monitoring configurations: %v", selfMonitoringConfigurations) + } + } + } + + if referenceMonitoringConfiguration != nil { + return *referenceMonitoringConfiguration, nil + } else { + return selfmonitoring.SelfMonitoringConfiguration{}, nil + } +} diff --git a/internal/backendconnection/otelcolresources/otelcol_resources.go b/internal/backendconnection/otelcolresources/otelcol_resources.go index 32d401c1..6f2456d2 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources.go @@ -8,7 +8,6 @@ import ( "errors" "reflect" - "github.com/dash0hq/dash0-operator/internal/dash0/util" "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" appsv1 "k8s.io/api/apps/v1" @@ -21,6 +20,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/util" ) type OTelColResourceManager struct { @@ -34,14 +35,16 @@ func (m *OTelColResourceManager) CreateOrUpdateOpenTelemetryCollectorResources( ctx context.Context, namespace string, images util.Images, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + monitoringResource *dash0v1alpha1.Dash0Monitoring, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, logger *logr.Logger, ) (bool, bool, error) { config := &oTelColConfig{ - Namespace: namespace, - NamePrefix: m.OTelCollectorNamePrefix, - Export: dash0MonitoringResource.Spec.Export, - Images: images, + Namespace: namespace, + NamePrefix: m.OTelCollectorNamePrefix, + Export: monitoringResource.Spec.Export, + SelfMonitoringConfiguration: selfMonitoringConfiguration, + Images: images, } desiredState, err := assembleDesiredState(config) if err != nil { @@ -203,13 +206,15 @@ func (m *OTelColResourceManager) DeleteResources( namespace string, images util.Images, dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, logger *logr.Logger, ) error { config := &oTelColConfig{ - Namespace: namespace, - NamePrefix: m.OTelCollectorNamePrefix, - Export: dash0MonitoringResource.Spec.Export, - Images: images, + Namespace: namespace, + NamePrefix: m.OTelCollectorNamePrefix, + Export: dash0MonitoringResource.Spec.Export, + SelfMonitoringConfiguration: selfMonitoringConfiguration, + Images: images, } allObjects, err := assembleDesiredState(config) if err != nil { diff --git a/internal/backendconnection/otelcolresources/otelcol_resources_test.go b/internal/backendconnection/otelcolresources/otelcol_resources_test.go index 22b0f98b..8746a9cb 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources_test.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources_test.go @@ -12,6 +12,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -116,6 +117,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) @@ -152,6 +154,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) @@ -170,6 +173,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) @@ -182,6 +186,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) @@ -200,6 +205,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) @@ -211,6 +217,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Dash0OperatorNamespace, TestImages, dash0MonitoringResource, + selfmonitoring.SelfMonitoringConfiguration{}, &logger, ) Expect(err).ToNot(HaveOccurred()) diff --git a/internal/dash0/controller/dash0_controller_suite_test.go b/internal/dash0/controller/controller_suite_test.go similarity index 94% rename from internal/dash0/controller/dash0_controller_suite_test.go rename to internal/dash0/controller/controller_suite_test.go index 6148f3de..b7be7b25 100644 --- a/internal/dash0/controller/dash0_controller_suite_test.go +++ b/internal/dash0/controller/controller_suite_test.go @@ -8,6 +8,7 @@ import ( "path/filepath" "runtime" "testing" + "time" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -26,6 +27,12 @@ import ( "github.com/onsi/gomega/format" ) +const ( + timeout = 10 * time.Second + consistentlyTimeout = 2 * time.Second + pollingInterval = 50 * time.Millisecond +) + var ( cfg *rest.Config k8sClient client.Client diff --git a/internal/dash0/controller/dash0_controller.go b/internal/dash0/controller/dash0_controller.go index 5943553e..062b48dd 100644 --- a/internal/dash0/controller/dash0_controller.go +++ b/internal/dash0/controller/dash0_controller.go @@ -21,14 +21,10 @@ import ( dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/backendconnection" "github.com/dash0hq/dash0-operator/internal/dash0/instrumentation" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" "github.com/dash0hq/dash0-operator/internal/dash0/util" ) -type DanglingEventsTimeouts struct { - InitialTimeout time.Duration - Backoff wait.Backoff -} - type Dash0Reconciler struct { client.Client Clientset *kubernetes.Clientset @@ -36,15 +32,15 @@ type Dash0Reconciler struct { BackendConnectionManager *backendconnection.BackendConnectionManager Images util.Images OperatorNamespace string - DanglingEventsTimeouts *DanglingEventsTimeouts + DanglingEventsTimeouts *util.DanglingEventsTimeouts } const ( - updateStatusFailedMessage = "Failed to update Dash0 monitoring status conditions, requeuing reconcile request." + updateStatusFailedMessageMonitoring = "Failed to update Dash0 monitoring status conditions, requeuing reconcile request." ) var ( - defaultDanglingEventsTimeouts = &DanglingEventsTimeouts{ + defaultDanglingEventsTimeouts = &util.DanglingEventsTimeouts{ InitialTimeout: 30 * time.Second, Backoff: wait.Backoff{ Steps: 3, @@ -101,23 +97,32 @@ func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, nil } - dash0MonitoringResource, stopReconcile, err := util.VerifyUniqueDash0MonitoringResourceExists( + checkResourceResult, err := util.VerifyThatUniqueResourceExists( ctx, r.Client, - updateStatusFailedMessage, req, + &dash0v1alpha1.Dash0Monitoring{}, + updateStatusFailedMessageMonitoring, &logger, ) - if err != nil { + if checkResourceResult.ResourceDoesNotExist { + // If the resource is not found, the checkResourceResult contains IsNotFound err, but we do not want to requeue + // the request, hence this condition needs to be checked first, before the err != nil check (which requeues the + // request). + return ctrl.Result{}, nil + } else if err != nil { + // For all other errors, we assume it is a temporary error and requeue the request. return ctrl.Result{}, err - } else if stopReconcile { + } else if checkResourceResult.StopReconcile { return ctrl.Result{}, nil } + monitoringResource := checkResourceResult.Resource.(*dash0v1alpha1.Dash0Monitoring) isFirstReconcile, err := util.InitStatusConditions( ctx, - r.Status(), - dash0MonitoringResource, + r.Client, + monitoringResource, + monitoringResource.Status.Conditions, &logger, ) if err != nil { @@ -128,15 +133,15 @@ func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl isMarkedForDeletion, runCleanupActions, err := util.CheckImminentDeletionAndHandleFinalizers( ctx, r.Client, - dash0MonitoringResource, - dash0v1alpha1.FinalizerId, + monitoringResource, + dash0v1alpha1.MonitoringFinalizerId, &logger, ) if err != nil { // The error has already been logged in checkImminentDeletionAndHandleFinalizers return ctrl.Result{}, err } else if runCleanupActions { - err = r.runCleanupActions(ctx, dash0MonitoringResource, &logger) + err = r.runCleanupActions(ctx, monitoringResource, &logger) if err != nil { // error has already been logged in runCleanupActions return ctrl.Result{}, err @@ -152,41 +157,42 @@ func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Make sure that an OpenTelemetry collector instance has been created in the namespace of the operator, and that // its configuration is up-to-date. - if err = r.BackendConnectionManager.EnsureOpenTelemetryCollectorIsDeployedInDash0OperatorNamespace( + if err = r.BackendConnectionManager.EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace( ctx, r.Images, r.OperatorNamespace, - dash0MonitoringResource, + monitoringResource, + r.readSelfMonitoringConfigurationFromOperatorConfigurationResource(ctx, &logger), ); err != nil { return ctrl.Result{}, err } var requiredAction util.ModificationMode - dash0MonitoringResource, requiredAction, err = - r.manageInstrumentWorkloadsChanges(ctx, dash0MonitoringResource, isFirstReconcile, &logger) + monitoringResource, requiredAction, err = + r.manageInstrumentWorkloadsChanges(ctx, monitoringResource, isFirstReconcile, &logger) if err != nil { // The error has already been logged in manageInstrumentWorkloadsChanges return ctrl.Result{}, err } if isFirstReconcile || requiredAction == util.ModificationModeInstrumentation { - if err = r.Instrumenter.CheckSettingsAndInstrumentExistingWorkloads(ctx, dash0MonitoringResource, &logger); err != nil { + if err = r.Instrumenter.CheckSettingsAndInstrumentExistingWorkloads(ctx, monitoringResource, &logger); err != nil { // The error has already been logged in checkSettingsAndInstrumentExistingWorkloads logger.Info("Requeuing reconcile request.") return ctrl.Result{}, err } } else if requiredAction == util.ModificationModeUninstrumentation { - if err = r.Instrumenter.UninstrumentWorkloadsIfAvailable(ctx, dash0MonitoringResource, &logger); err != nil { + if err = r.Instrumenter.UninstrumentWorkloadsIfAvailable(ctx, monitoringResource, &logger); err != nil { logger.Error(err, "Failed to uninstrument workloads, requeuing reconcile request.") return ctrl.Result{}, err } } - r.scheduleAttachDanglingEvents(ctx, dash0MonitoringResource, &logger) + r.scheduleAttachDanglingEvents(ctx, monitoringResource, &logger) - dash0MonitoringResource.EnsureResourceIsMarkedAsAvailable() - if err = r.Status().Update(ctx, dash0MonitoringResource); err != nil { - logger.Error(err, updateStatusFailedMessage) + monitoringResource.EnsureResourceIsMarkedAsAvailable() + if err = r.Status().Update(ctx, monitoringResource); err != nil { + logger.Error(err, updateStatusFailedMessageMonitoring) return ctrl.Result{}, err } @@ -245,11 +251,15 @@ func (r *Dash0Reconciler) runCleanupActions( return err } - if err := r.BackendConnectionManager.RemoveOpenTelemetryCollectorIfNoDash0MonitoringResourceIsLeft( + if err := r.BackendConnectionManager.RemoveOpenTelemetryCollectorIfNoMonitoringResourceIsLeft( ctx, r.Images, r.OperatorNamespace, dash0MonitoringResource, + // Optimization: Self-monitoring does not create any additional resources in the set of desired objects, thus + // we do not actually need to look up the operator confifguration resource here when we only want to delete all + // OTel collector resources. + selfmonitoring.SelfMonitoringConfiguration{}, ); err != nil { logger.Error(err, "Failed to check if the OpenTelemetry collector instance needs to be removed or failed "+ "removing it.") @@ -261,11 +271,11 @@ func (r *Dash0Reconciler) runCleanupActions( // for any reason or take a while, the resource is no longer marked as available. dash0MonitoringResource.EnsureResourceIsMarkedAsAboutToBeDeleted() if err := r.Status().Update(ctx, dash0MonitoringResource); err != nil { - logger.Error(err, updateStatusFailedMessage) + logger.Error(err, updateStatusFailedMessageMonitoring) return err } - controllerutil.RemoveFinalizer(dash0MonitoringResource, dash0v1alpha1.FinalizerId) + controllerutil.RemoveFinalizer(dash0MonitoringResource, dash0v1alpha1.MonitoringFinalizerId) if err := r.Update(ctx, dash0MonitoringResource); err != nil { logger.Error(err, "Failed to remove the finalizer from the Dash0 monitoring resource, requeuing reconcile "+ "request.") @@ -355,3 +365,31 @@ func (r *Dash0Reconciler) attachDanglingEvents( } } } + +func (r *Dash0Reconciler) readSelfMonitoringConfigurationFromOperatorConfigurationResource( + ctx context.Context, + logger *logr.Logger, +) selfmonitoring.SelfMonitoringConfiguration { + operatorConfigurationResource, err := util.FindUniqueOrMostRecentResourceInScope( + ctx, + r.Client, + "", /* cluster-scope, thus no namespace */ + &dash0v1alpha1.Dash0OperatorConfiguration{}, + logger, + ) + if err != nil || operatorConfigurationResource == nil { + return selfmonitoring.SelfMonitoringConfiguration{ + Enabled: false, + } + } + config, err := selfmonitoring.ConvertOperatorConfigurationResourceToSelfMonitoringConfiguration( + *operatorConfigurationResource.(*dash0v1alpha1.Dash0OperatorConfiguration), + logger, + ) + if err != nil { + return selfmonitoring.SelfMonitoringConfiguration{ + Enabled: false, + } + } + return config +} diff --git a/internal/dash0/controller/dash0_controller_test.go b/internal/dash0/controller/dash0_controller_test.go index 6a85bb44..5341055c 100644 --- a/internal/dash0/controller/dash0_controller_test.go +++ b/internal/dash0/controller/dash0_controller_test.go @@ -19,7 +19,6 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -35,14 +34,9 @@ import ( ) var ( - namespace = TestNamespaceName - - timeout = 10 * time.Second - pollingInterval = 50 * time.Millisecond - - extraDash0MonitoringResourceNames = []types.NamespacedName{} - - operatorNamespace = Dash0OperatorNamespace + namespace = TestNamespaceName + extraDash0MonitoringResourceNames []types.NamespacedName + operatorNamespace = Dash0OperatorNamespace ) var _ = Describe("The Dash0 controller", Ordered, func() { @@ -84,15 +78,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Images: TestImages, OperatorNamespace: Dash0OperatorNamespace, BackendConnectionManager: backendConnectionManager, - DanglingEventsTimeouts: &DanglingEventsTimeouts{ - InitialTimeout: 0 * time.Second, - Backoff: wait.Backoff{ - Steps: 1, - Duration: 0 * time.Second, - Factor: 1, - Jitter: 0, - }, - }, + DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } }) @@ -138,8 +124,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { VerifyCollectorResourcesExist(ctx, k8sClient, operatorNamespace) }) - It("should mark only the most recent resource as available and the other ones as degraded when multiple "+ - "resources exist", func() { + It("should mark only the most recent resource as available and the other ones as degraded when multiple resources exist", func() { firstDash0MonitoringResource := &dash0v1alpha1.Dash0Monitoring{} Expect(k8sClient.Get(ctx, Dash0MonitoringResourceQualifiedName, firstDash0MonitoringResource)).To(Succeed()) time.Sleep(10 * time.Millisecond) @@ -193,7 +178,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { resource3Available, metav1.ConditionTrue, "ReconcileFinished", - "Dash0 is active in this namespace now.", + "Dash0 monitoring is active in this namespace now.", ) g.Expect(resource3Degraded).To(BeNil()) @@ -752,7 +737,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { InstrumentWorkloads: "invalid", Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -862,7 +847,7 @@ func triggerReconcileRequestForName( dash0MonitoringResourceName types.NamespacedName, ) { if stepMessage == "" { - stepMessage = "Trigger reconcile request" + stepMessage = "Trigger a monitoring resource reconcile request" } By(stepMessage) _, err := reconciler.Reconcile(ctx, reconcile.Request{ diff --git a/internal/dash0/controller/operator_configuration_controller.go b/internal/dash0/controller/operator_configuration_controller.go new file mode 100644 index 00000000..c826bf5f --- /dev/null +++ b/internal/dash0/controller/operator_configuration_controller.go @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "fmt" + "reflect" + + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +const ( + ManagerContainerName = "manager" + updateStatusFailedMessageOperatorConfiguration = "Failed to update Dash0 operator configuration status " + + "conditions, requeuing reconcile request." +) + +type OperatorConfigurationReconciler struct { + client.Client + Clientset *kubernetes.Clientset + Scheme *runtime.Scheme + Recorder record.EventRecorder + DeploymentSelfReference *appsv1.Deployment + DanglingEventsTimeouts *util.DanglingEventsTimeouts + Images util.Images +} + +func (r *OperatorConfigurationReconciler) SetupWithManager(mgr ctrl.Manager) error { + if r.DanglingEventsTimeouts == nil { + r.DanglingEventsTimeouts = defaultDanglingEventsTimeouts + } + + return ctrl.NewControllerManagedBy(mgr). + For(&dash0v1alpha1.Dash0OperatorConfiguration{}). + Complete(r) +} + +// The following markers are used to generate the rules permissions (RBAC) on config/rbac using controller-gen +// when the command is executed. +// To know more about markers see: https://book.kubebuilder.io/reference/markers.html +//+kubebuilder:rbac:groups=core,resources=events,verbs=create;list;patch;update +//+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get +//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;delete +//+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0operatorconfigurations,verbs=get;list;watch;create;update;patch;delete;deletecollection +//+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0operatorconfigurations/finalizers,verbs=update +//+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0operatorconfigurations/status,verbs=get;update;patch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// It is essential for the controller's reconciliation loop to be idempotent. By following the Operator +// pattern you will create Controllers which provide a reconcile function +// responsible for synchronizing resources until the desired state is reached on the cluster. +// Breaking this recommendation goes against the design principles of controller-runtime. +// and may lead to unforeseen consequences such as resources becoming stuck and requiring manual intervention. +// For further info: +// - About Operator Pattern: https://kubernetes.io/docs/concepts/extend-kubernetes/operator/ +// - About Controllers: https://kubernetes.io/docs/concepts/architecture/controller/ +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + var resource *dash0v1alpha1.Dash0OperatorConfiguration + resourceDeleted := false + checkResourceResult, err := util.VerifyThatResourceExists( + ctx, + r.Client, + req, + &dash0v1alpha1.Dash0OperatorConfiguration{}, + &logger, + ) + if checkResourceResult.ResourceDoesNotExist { + resourceDeleted = true + } else if err != nil { + return ctrl.Result{}, err + } else if checkResourceResult.StopReconcile { + return ctrl.Result{}, nil + } + + if !resourceDeleted { + resource = checkResourceResult.Resource.(*dash0v1alpha1.Dash0OperatorConfiguration) + stopReconcile, err := + util.VerifyThatResourceIsUniqueInScope( + ctx, + r.Client, + req, + resource, + updateStatusFailedMessageOperatorConfiguration, + &logger, + ) + if err != nil { + // Cannot validate whether this resource is normative, requeuing + return ctrl.Result{}, err + } else if stopReconcile { + return ctrl.Result{}, nil + } + logger.Info("Reconciling the operator configuration resource", "name", req.Name) + } else { + logger.Info("Reconciling the deletion of the operator configuration resource", "name", req.Name) + } + + currentSelfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + r.DeploymentSelfReference, + ManagerContainerName, + ) + if err != nil { + logger.Error(err, "cannot get self-monitoring configuration from controller deployment") + return ctrl.Result{ + Requeue: true, + }, err + } + + if resourceDeleted { + if currentSelfMonitoringConfiguration.Enabled { + if err = r.applySelfMonitoring(ctx, selfmonitoring.SelfMonitoringConfiguration{ + Enabled: false, + }); err != nil { + logger.Error(err, "cannot disable self-monitoring of the controller deployment, requeuing reconcile request.") + return ctrl.Result{ + Requeue: true, + }, nil + } else { + logger.Info("Self-monitoring of the controller deployment has been disabled") + } + } else { + logger.Info("Self-monitoring configuration of the controller deployment is already disabled") + } + return ctrl.Result{}, nil + } + + if _, err = util.InitStatusConditions( + ctx, + r.Client, + resource, + resource.Status.Conditions, + &logger, + ); err != nil { + // The error has already been logged in initStatusConditions + return ctrl.Result{}, err + } + + newSelfMonitoringConfiguration, err := + selfmonitoring.ConvertOperatorConfigurationResourceToSelfMonitoringConfiguration(*resource, &logger) + if err != nil { + logger.Error(err, "cannot generate self-monitoring configuration from operator configuration resource") + return ctrl.Result{ + Requeue: true, + }, err + } + + if reflect.DeepEqual(currentSelfMonitoringConfiguration, newSelfMonitoringConfiguration) { + logger.Info("Self-monitoring configuration of the controller deployment is up-to-date") + } else { + if err = r.applySelfMonitoring(ctx, newSelfMonitoringConfiguration); err != nil { + logger.Error(err, "Cannot apply self-monitoring configurations to the controller deployment") + resource.EnsureResourceIsMarkedAsDegraded("CannotApplySelfMonitoring", "Could not update the controller deployment to reflect the self-monitoring settings") + if statusUpdateErr := r.Status().Update(ctx, resource); statusUpdateErr != nil { + logger.Error(statusUpdateErr, "Failed to update Dash0 operator status conditions, requeuing reconcile request.") + return ctrl.Result{}, statusUpdateErr + } + return ctrl.Result{ + Requeue: true, + }, nil + } + + logger.Info("Self-monitoring configurations applied to the controller deployment", "self-monitoring", newSelfMonitoringConfiguration) + } + + resource.EnsureResourceIsMarkedAsAvailable() + if err = r.Status().Update(ctx, resource); err != nil { + logger.Error(err, updateStatusFailedMessageOperatorConfiguration) + return ctrl.Result{}, fmt.Errorf("cannot mark Dash0 operator configuration resource as available: %w", err) + } + + return ctrl.Result{}, nil +} + +func (r *OperatorConfigurationReconciler) applySelfMonitoring( + ctx context.Context, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, +) error { + updatedDeployment := &appsv1.Deployment{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(r.DeploymentSelfReference), updatedDeployment); err != nil { + return fmt.Errorf("cannot fetch the current controller deployment: %w", err) + } + + if selfMonitoringConfiguration.Enabled { + if err := selfmonitoring.EnableSelfMonitoringInControllerDeployment( + updatedDeployment, + ManagerContainerName, + selfMonitoringConfiguration, + r.Images.GetOperatorVersion(), + ); err != nil { + return fmt.Errorf("cannot apply settings to enable self-monitoring to the controller deployment: %w", err) + } + } else { + if err := selfmonitoring.DisableSelfMonitoringInControllerDeployment( + updatedDeployment, + ManagerContainerName, + ); err != nil { + return fmt.Errorf("cannot apply settings to disable self-monitoring to the controller deployment: %w", err) + } + } + + return r.Client.Update(ctx, updatedDeployment) +} diff --git a/internal/dash0/controller/operator_configuration_controller_test.go b/internal/dash0/controller/operator_configuration_controller_test.go new file mode 100644 index 00000000..41de9330 --- /dev/null +++ b/internal/dash0/controller/operator_configuration_controller_test.go @@ -0,0 +1,900 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/util" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +type SelfMonitoringTestConfig struct { + createExport func() dash0v1alpha1.Export + verify func(Gomega, selfmonitoring.SelfMonitoringConfiguration) +} + +var ( + reconciler *OperatorConfigurationReconciler +) + +var _ = Describe("The Dash0 controller", Ordered, func() { + ctx := context.Background() + var controllerDeployment *appsv1.Deployment + + BeforeAll(func() { + EnsureTestNamespaceExists(ctx, k8sClient) + EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + }) + + Describe("when creating the Dash0Operator resource", func() { + + BeforeEach(func() { + // When creating the resource, we assume the operator has no + // self-monitoring enabled + controllerDeployment = controllerDeploymentWithoutSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + Describe("enabling self-monitoring", func() { + + DescribeTable("it enables self-monitoring in the controller deployment", + func(config SelfMonitoringTestConfig) { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(config.createExport()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + }, + ) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Eventually(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) + config.verify(g, selfMonitoringConfiguration) + }, timeout, pollingInterval).Should(Succeed()) + }, + Entry("with a Dash0 export with a token", SelfMonitoringTestConfig{ + createExport: Dash0ExportWithEndpointAndToken, + verify: verifySelfMonitoringConfigurationDash0Token, + }), + Entry("with a Dash0 export with a secret ref", SelfMonitoringTestConfig{ + createExport: Dash0ExportWithEndpointAndSecretRef, + verify: verifySelfMonitoringConfigurationDash0SecretRef, + }), + Entry("with a Grpc export", SelfMonitoringTestConfig{ + createExport: GrpcExportTest, + verify: verifySelfMonitoringConfigurationGrpc, + }), + Entry("with an HTTP export", SelfMonitoringTestConfig{ + createExport: HttpExportTest, + verify: verifySelfMonitoringConfigurationHttp, + }), + ) + }) + + Describe("disabling self-monitoring", func() { + + It("it does not change the controller deployment", func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: false, + }, + }, + ) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Consistently(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + }, consistentlyTimeout, pollingInterval).Should(Succeed()) + }) + }) + }) + + Describe("when updating the Dash0Operator resource", func() { + + Describe("enabling self-monitoring", func() { + + Describe("when self-monitoring is already enabled", func() { + + BeforeEach(func() { + // When creating the resource, we assume the operator has + // self-monitoring enabled + controllerDeployment = controllerDeploymentWithSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it does not change the controller deployment", func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + }, + ) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + + Consistently(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) + }, consistentlyTimeout, pollingInterval).Should(Succeed()) + }) + }) + + Describe("when self-monitoring is disabled", func() { + + BeforeEach(func() { + controllerDeployment = controllerDeploymentWithoutSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: false, + }, + }, + ) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it enables self-monitoring in the controller deployment", func() { + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, Default) + Expect(resource.Spec.SelfMonitoring.Enabled).To(BeFalse()) + + resource.Spec.SelfMonitoring.Enabled = true + + Expect(k8sClient.Update(ctx, resource)).To(Succeed()) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Eventually(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) + }, timeout, pollingInterval).Should(Succeed()) + }) + }) + }) + + Describe("disabling self-monitoring", func() { + + Describe("when self-monitoring is enabled", func() { + + BeforeEach(func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + }, + ) + + controllerDeployment = controllerDeploymentWithSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it disables self-monitoring in the controller deployment", func() { + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, Default) + Expect(resource.Spec.SelfMonitoring.Enabled).To(BeTrue()) + + resource.Spec.SelfMonitoring.Enabled = false + + Expect(k8sClient.Update(ctx, resource)).To(Succeed()) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Eventually(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + }, timeout, pollingInterval).Should(Succeed()) + }) + }) + + Describe("when self-monitoring is already disabled", func() { + + BeforeEach(func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: false, + }, + }, + ) + + controllerDeployment = controllerDeploymentWithoutSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it does not change the controller deployment", func() { + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, Default) + Expect(resource.Spec.SelfMonitoring.Enabled).To(BeFalse()) + + resource.Spec.SelfMonitoring.Enabled = false + + Expect(k8sClient.Update(ctx, resource)).To(Succeed()) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Consistently(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + }, consistentlyTimeout, pollingInterval).Should(Succeed()) + }) + }) + }) + }) + + Describe("when deleting the Dash0Operator resource", func() { + + Describe("when self-monitoring is enabled", func() { + + BeforeEach(func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(Dash0ExportWithEndpointAndToken()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + }, + ) + + controllerDeployment = controllerDeploymentWithSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it disables self-monitoring in the controller deployment", func() { + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + controllerDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) + + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, Default) + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + VerifyOperatorConfigurationResourceByNameDoesNotExist(ctx, k8sClient, Default, resource.Name) + + Eventually(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + }, timeout, pollingInterval).Should(Succeed()) + }) + }) + + Describe("when self-monitoring is disabled", func() { + + BeforeEach(func() { + CreateOperatorConfigurationResource( + ctx, + k8sClient, + OperatorConfigurationResourceName, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: false, + }, + }, + ) + + controllerDeployment = controllerDeploymentWithoutSelfMonitoring() + EnsureControllerDeploymentExists(ctx, k8sClient, controllerDeployment) + reconciler = createReconciler(controllerDeployment) + }) + + AfterEach(func() { + RemoveOperatorConfigurationResource(ctx, k8sClient) + EnsureControllerDeploymentDoesNotExist(ctx, k8sClient, controllerDeployment) + }) + + It("it does not change the controller deployment", func() { + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + controllerDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, Default) + Expect(resource.Spec.SelfMonitoring.Enabled).To(BeFalse()) + + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + VerifyOperatorConfigurationResourceByNameDoesNotExist(ctx, k8sClient, Default, resource.Name) + + Consistently(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + }, consistentlyTimeout, pollingInterval).Should(Succeed()) + }) + }) + }) +}) + +func controllerDeploymentWithoutSelfMonitoring() *appsv1.Deployment { + replicaCount := int32(2) + falsy := false + truthy := true + terminationGracePeriodSeconds := int64(10) + secretMode := corev1.SecretVolumeSourceDefaultMode + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: Dash0OperatorDeploymentName, + Namespace: Dash0OperatorNamespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "deployment", + "dash0.com/enable": "false", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicaCount, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "kubectl.kubernetes.io/default-container": "manager", + }, + Labels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + "dash0.cert-digest": "1234567890", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "manager", + Image: "ghcr.io/dash0hq/operator-controller@latest", + Command: []string{"/manager"}, + Args: []string{ + "--health-probe-bind-address=:8081", + "--metrics-bind-address=127.0.0.1:8080", + "--leader-elect", + }, + Env: []corev1.EnvVar{ + { + Name: "DASH0_OPERATOR_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + { + Name: "DASH0_DEPLOYMENT_NAME", + Value: Dash0OperatorDeploymentName, + }, + { + Name: "OTEL_COLLECTOR_NAME_PREFIX", + Value: "dash0monitoring-system", + }, + { + Name: "DASH0_INIT_CONTAINER_IMAGE", + Value: "ghcr.io/dash0hq/instrumentation", + }, + { + Name: "DASH0_INIT_CONTAINER_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_COLLECTOR_IMAGE", + Value: "ghcr.io/dash0hq/collector", + }, + { + Name: "DASH0_COLLECTOR_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_CONFIGURATION_RELOADER_IMAGE", + Value: "ghcr.io/dash0hq/configuration-reloader@latest", + }, + { + Name: "DASH0_CONFIGURATION_RELOADER_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_FILELOG_OFFSET_SYNCH_IMAGE", + Value: "ghcr.io/dash0hq/filelog-offset-synch", + }, + { + Name: "DASH0_FILELOG_OFFSET_SYNCH_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "ENABLE_WEBHOOK", + Value: "false", + }, + { + Name: "DASH0_DEVELOPMENT_MODE", + Value: "false", + }, + }, + Ports: []corev1.ContainerPort{ + { + Name: "webhook-server", + ContainerPort: 9443, + Protocol: "TCP", + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "certificates", + MountPath: "/tmp/k8s-webhook-server/serving-certs", + ReadOnly: true, + }, + }, + }, + { + Name: "kube-rbac-proxy", + Image: "quay.io/brancz/kube-rbac-proxy:v0.18.0", + Args: []string{ + "--secure-listen-address=0.0.0.0:8443", + "--upstream=http://127.0.0.1:8080/", + "--logtostderr=true", + "--v=0", + }, + Ports: []corev1.ContainerPort{ + { + Name: "https", + ContainerPort: 8443, + Protocol: "TCP", + }, + }, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &falsy, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + }, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &truthy, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + ServiceAccountName: "dash0-operator-service-account", + AutomountServiceAccountToken: &truthy, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Volumes: []corev1.Volume{ + { + Name: "certificates", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + DefaultMode: &secretMode, + SecretName: "dash0-operator-certificates", + }, + }, + }, + }, + }, + }, + }, + } +} + +func controllerDeploymentWithSelfMonitoring() *appsv1.Deployment { + replicaCount := int32(2) + falsy := false + truthy := true + terminationGracePeriodSeconds := int64(10) + secretMode := corev1.SecretVolumeSourceDefaultMode + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: Dash0OperatorDeploymentName, + Namespace: Dash0OperatorNamespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + "app.kubernetes.io/instance": "deployment", + "dash0monitoring.com/enable": "false", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicaCount, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "kubectl.kubernetes.io/default-container": "manager", + }, + Labels: map[string]string{ + "app.kubernetes.io/name": "dash0monitoring-operator", + "app.kubernetes.io/component": "controller", + "dash0monitoring.cert-digest": "1234567890", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "manager", + Image: "ghcr.io/dash0hq/operator-controller@latest", + Command: []string{"/manager"}, + Args: []string{ + "--health-probe-bind-address=:8081", + "--metrics-bind-address=127.0.0.1:8080", + "--leader-elect", + }, + Env: []corev1.EnvVar{ + { + Name: "DASH0_OPERATOR_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + { + Name: "DASH0_DEPLOYMENT_NAME", + Value: Dash0OperatorNamespace, + }, { + Name: "OTEL_COLLECTOR_NAME_PREFIX", + Value: "dash0monitoring-system", + }, + { + Name: "DASH0_INIT_CONTAINER_IMAGE", + Value: "ghcr.io/dash0hq/instrumentation", + }, + { + Name: "DASH0_INIT_CONTAINER_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_COLLECTOR_IMAGE", + Value: "ghcr.io/dash0hq/collector", + }, + { + Name: "DASH0_COLLECTOR_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_CONFIGURATION_RELOADER_IMAGE", + Value: "ghcr.io/dash0hq/configuration-reloader@latest", + }, + { + Name: "DASH0_CONFIGURATION_RELOADER_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "DASH0_FILELOG_OFFSET_SYNCH_IMAGE", + Value: "ghcr.io/dash0hq/filelog-offset-synch", + }, + { + Name: "DASH0_FILELOG_OFFSET_SYNCH_IMAGE_PULL_POLICY", + Value: "", + }, + { + Name: "ENABLE_WEBHOOK", + Value: "false", + }, + { + Name: "DASH0_DEVELOPMENT_MODE", + Value: "false", + }, + { + Name: "OTEL_EXPORTER_OTLP_ENDPOINT", + Value: "ingress.eu-west-1.aws.dash0monitoring-dev.com:4317", + }, + { + Name: "OTEL_EXPORTER_OTLP_HEADERS", + Value: "Authorization=Bearer 1234567890", + }, + }, + Ports: []corev1.ContainerPort{ + { + Name: "webhook-server", + ContainerPort: 9443, + Protocol: "TCP", + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "certificates", + MountPath: "/tmp/k8s-webhook-server/serving-certs", + ReadOnly: true, + }, + }, + }, + { + Name: "kube-rbac-proxy", + Image: "quay.io/brancz/kube-rbac-proxy:v0.18.0", + Args: []string{ + "--secure-listen-address=0.0.0.0:8443", + "--upstream=http://127.0.0.1:8080/", + "--logtostderr=true", + "--v=0", + }, + Ports: []corev1.ContainerPort{ + { + Name: "https", + ContainerPort: 8443, + Protocol: "TCP", + }, + }, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &falsy, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + }, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &truthy, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + ServiceAccountName: "dash0monitoring-operator-service-account", + AutomountServiceAccountToken: &truthy, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Volumes: []corev1.Volume{ + { + Name: "certificates", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + DefaultMode: &secretMode, + SecretName: "dash0monitoring-operator-certificates", + }, + }, + }, + }, + }, + }, + }, + } +} + +func createReconciler(controllerDeployment *appsv1.Deployment) *OperatorConfigurationReconciler { + return &OperatorConfigurationReconciler{ + Client: k8sClient, + Clientset: clientset, + Recorder: recorder, + DeploymentSelfReference: controllerDeployment, + DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, + } +} + +func triggerOperatorConfigurationReconcileRequest(ctx context.Context, reconciler *OperatorConfigurationReconciler) { + triggerOperatorReconcileRequestForName(ctx, reconciler, OperatorConfigurationResourceName) +} + +func triggerOperatorReconcileRequestForName( + ctx context.Context, + reconciler *OperatorConfigurationReconciler, + dash0OperatorResourceName string, +) { + By("Triggering an operator configuration resource reconcile request") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: dash0OperatorResourceName}, + }) + Expect(err).NotTo(HaveOccurred()) +} + +func verifyOperatorConfigurationResourceIsAvailable(ctx context.Context) { + var availableCondition *metav1.Condition + By("Verifying status conditions") + Eventually(func(g Gomega) { + resource := LoadOperatorConfigurationResourceOrFail(ctx, k8sClient, g) + availableCondition = meta.FindStatusCondition(resource.Status.Conditions, string(dash0v1alpha1.ConditionTypeAvailable)) + g.Expect(availableCondition.Status).To(Equal(metav1.ConditionTrue)) + degraded := meta.FindStatusCondition(resource.Status.Conditions, string(dash0v1alpha1.ConditionTypeDegraded)) + g.Expect(degraded).To(BeNil()) + }, timeout, pollingInterval).Should(Succeed()) +} + +func verifySelfMonitoringConfigurationDash0Token( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, +) { + dash0ExportConfiguration := selfMonitoringConfiguration.Export.Dash0 + g.Expect(dash0ExportConfiguration).NotTo(BeNil()) + g.Expect(dash0ExportConfiguration.Endpoint).To(Equal(EndpointDash0WithProtocolTest)) + g.Expect(dash0ExportConfiguration.Dataset).To(Equal(util.DatasetInsights)) + authorization := dash0ExportConfiguration.Authorization + g.Expect(authorization).ToNot(BeNil()) + g.Expect(*authorization.Token).To(Equal(AuthorizationTokenTest)) + g.Expect(authorization.SecretRef).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) +} + +func verifySelfMonitoringConfigurationDash0SecretRef( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, +) { + dash0ExportConfiguration := selfMonitoringConfiguration.Export.Dash0 + g.Expect(dash0ExportConfiguration).NotTo(BeNil()) + g.Expect(dash0ExportConfiguration.Endpoint).To(Equal(EndpointDash0WithProtocolTest)) + g.Expect(dash0ExportConfiguration.Dataset).To(Equal(util.DatasetInsights)) + authorization := dash0ExportConfiguration.Authorization + g.Expect(authorization.Token).To(BeNil()) + g.Expect(authorization.SecretRef).ToNot(BeNil()) + g.Expect(authorization.SecretRef.Name).To(Equal(SecretRefTest.Name)) + g.Expect(authorization.SecretRef.Key).To(Equal(SecretRefTest.Key)) + g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) +} + +func verifySelfMonitoringConfigurationGrpc( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, +) { + grpcExportConfiguration := selfMonitoringConfiguration.Export.Grpc + g.Expect(grpcExportConfiguration).NotTo(BeNil()) + g.Expect(grpcExportConfiguration.Endpoint).To(Equal("dns://" + EndpointGrpcTest)) + headers := grpcExportConfiguration.Headers + g.Expect(headers).To(HaveLen(2)) + g.Expect(headers[0].Name).To(Equal("Key")) + g.Expect(headers[0].Value).To(Equal("Value")) + g.Expect(headers[1].Name).To(Equal(util.Dash0DatasetHeaderName)) + g.Expect(headers[1].Value).To(Equal(util.DatasetInsights)) + g.Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) +} + +func verifySelfMonitoringConfigurationHttp( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, +) { + httpExportConfiguration := selfMonitoringConfiguration.Export.Http + g.Expect(httpExportConfiguration).NotTo(BeNil()) + g.Expect(httpExportConfiguration.Endpoint).To(Equal(EndpointHttpTest)) + g.Expect(httpExportConfiguration.Encoding).To(Equal(dash0v1alpha1.Proto)) + headers := httpExportConfiguration.Headers + g.Expect(headers).To(HaveLen(2)) + g.Expect(headers[0].Name).To(Equal("Key")) + g.Expect(headers[0].Value).To(Equal("Value")) + g.Expect(headers[1].Name).To(Equal(util.Dash0DatasetHeaderName)) + g.Expect(headers[1].Value).To(Equal(util.DatasetInsights)) + g.Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) +} diff --git a/internal/dash0/instrumentation/instrumenter.go b/internal/dash0/instrumentation/instrumenter.go index eb49b488..74020a7e 100644 --- a/internal/dash0/instrumentation/instrumenter.go +++ b/internal/dash0/instrumentation/instrumenter.go @@ -106,15 +106,16 @@ func (i *Instrumenter) InstrumentAtStartup( Name: dash0MonitoringResource.Name, }, } - _, stop, err := util.VerifyUniqueDash0MonitoringResourceExists( + checkResourceResult, err := util.VerifyThatUniqueResourceExists( ctx, k8sClient, - updateStatusFailedMessage, pseudoReconcileRequest, + &dash0v1alpha1.Dash0Monitoring{}, + updateStatusFailedMessage, logger, ) - if err != nil || stop { - // if an error occurred, it has already been logged in verifyUniqueDash0MonitoringResourceExists + if err != nil || checkResourceResult.StopReconcile || checkResourceResult.ResourceDoesNotExist { + // if an error occurred, it has already been logged in VerifyThatUniqueResourceExists continue } diff --git a/internal/dash0/instrumentation/instrumenter_test.go b/internal/dash0/instrumentation/instrumenter_test.go index 3f23c4b2..fa7a6fba 100644 --- a/internal/dash0/instrumentation/instrumenter_test.go +++ b/internal/dash0/instrumentation/instrumenter_test.go @@ -498,7 +498,7 @@ var _ = Describe("The instrumenter", Ordered, func() { VerifyNoEvents(ctx, clientset, namespace) workload = config.GetFn(ctx, k8sClient, namespace, name) config.VerifyFn(workload) - VerifyWebhookIgnoreOnceLabelIsAbesent(workload.GetObjectMeta()) + VerifyWebhookIgnoreOnceLabelIsAbsent(workload.GetObjectMeta()) }, Entry("should not attempt to revert a cron job that has the opt-out label", WorkloadTestConfig{ WorkloadNamePrefix: CronJobNamePrefix, diff --git a/internal/dash0/removal/removal_suite_test.go b/internal/dash0/removal/removal_suite_test.go index 57a00d56..af85be12 100644 --- a/internal/dash0/removal/removal_suite_test.go +++ b/internal/dash0/removal/removal_suite_test.go @@ -10,7 +10,6 @@ import ( "testing" "time" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -99,7 +98,7 @@ var _ = BeforeSuite(func() { instrumenter := &instrumentation.Instrumenter{ Client: k8sClient, Clientset: clientset, - Recorder: mgr.GetEventRecorderFor("dash0-controller"), + Recorder: mgr.GetEventRecorderFor("dash0-monitoring-controller"), Images: TestImages, OTelCollectorBaseUrl: OTelCollectorBaseUrlTest, } @@ -121,15 +120,7 @@ var _ = BeforeSuite(func() { Instrumenter: instrumenter, OperatorNamespace: Dash0OperatorNamespace, BackendConnectionManager: backendConnectionManager, - DanglingEventsTimeouts: &controller.DanglingEventsTimeouts{ - InitialTimeout: 0 * time.Second, - Backoff: wait.Backoff{ - Steps: 1, - Duration: 0 * time.Second, - Factor: 1, - Jitter: 0, - }, - }, + DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } }) diff --git a/internal/dash0/selfmonitoring/self_monitoring.go b/internal/dash0/selfmonitoring/self_monitoring.go new file mode 100644 index 00000000..40838b83 --- /dev/null +++ b/internal/dash0/selfmonitoring/self_monitoring.go @@ -0,0 +1,649 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package selfmonitoring + +import ( + "fmt" + "regexp" + "slices" + "strings" + + "github.com/go-logr/logr" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type OtlpProtocol string + +type SelfMonitoringConfiguration struct { + Enabled bool + Export dash0v1alpha1.Export +} + +type EndpointAndHeaders struct { + Endpoint string + Protocol string + Headers []dash0v1alpha1.Header +} + +const ( + otelExporterOtlpEndpointEnvVarName = "OTEL_EXPORTER_OTLP_ENDPOINT" + otelExporterOtlpHeadersEnvVarName = "OTEL_EXPORTER_OTLP_HEADERS" + otelExporterOtlpProtocolEnvVarName = "OTEL_EXPORTER_OTLP_PROTOCOL" + otelResourceAttribtuesEnvVarName = "OTEL_RESOURCE_ATTRIBUTES" + + selfMonitoringauthTokenEnvVarName = "SELF_MONITORING_AUTH_TOKEN" +) + +var ( + dash0IngressEndpointRegex = regexp.MustCompile(`dash0(?:-dev)?\.com`) + // See https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/ + authHeaderValue = fmt.Sprintf("Bearer $(%s)", selfMonitoringauthTokenEnvVarName) +) + +func ConvertOperatorConfigurationResourceToSelfMonitoringConfiguration( + resource dash0v1alpha1.Dash0OperatorConfiguration, + logger *logr.Logger, +) (SelfMonitoringConfiguration, error) { + if !resource.Spec.SelfMonitoring.Enabled { + return SelfMonitoringConfiguration{ + Enabled: false, + }, nil + } + + export := resource.Spec.Export + if export == nil { + logger.Info("Invalid configuration of Dash0OperatorConfiguration resource: Self-monitoring is enabled but no " + + "export configuration is set. Self-monitoring telemetry will not be sent.") + return SelfMonitoringConfiguration{ + Enabled: false, + }, nil + } + + if export.Dash0 != nil { + return convertResourceToDash0ExportConfiguration(export, logger) + } + if export.Grpc != nil { + return convertResourceToGrpcExportConfiguration(export, logger) + } + if export.Http != nil { + return convertResourceToHttpExportConfiguration(export) + } + return SelfMonitoringConfiguration{ + Enabled: false, + }, fmt.Errorf("no export configuration for self-monitoring has been provided, no self-monitoring telemetry will be sent") +} + +func convertResourceToDash0ExportConfiguration( + export *dash0v1alpha1.Export, + logger *logr.Logger, +) (SelfMonitoringConfiguration, error) { + if export.Grpc != nil { + logger.Info( + fmt.Sprintf( + "Ignoring grpc export configuration (%s) for self-monitoring telemetry, will send to the configured Dash0 export.", + export.Grpc.Endpoint)) + } + if export.Http != nil { + logger.Info( + fmt.Sprintf( + "Ignoring http export configuration (%s) for self-monitoring telemetry, will send to the configured Dash0 export.", + export.Http.Endpoint)) + } + + dash0Export := export.Dash0 + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: dash0Export.Endpoint, + Dataset: util.DatasetInsights, + Authorization: dash0Export.Authorization, + }, + }, + }, nil +} + +func convertResourceToGrpcExportConfiguration( + export *dash0v1alpha1.Export, + logger *logr.Logger, +) (SelfMonitoringConfiguration, error) { + if export.Http != nil { + logger.Info( + fmt.Sprintf( + "Ignoring http export configuration (%s) for self-monitoring telemetry, will send to the configured gRPC export.", + export.Http.Endpoint)) + } + + grpcExport := export.Grpc + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Grpc: &dash0v1alpha1.GrpcConfiguration{ + Endpoint: grpcExport.Endpoint, + Headers: append( + grpcExport.Headers, + dash0v1alpha1.Header{ + Name: util.Dash0DatasetHeaderName, + Value: util.DatasetInsights, + }, + ), + }, + }, + }, nil +} + +func convertResourceToHttpExportConfiguration( + export *dash0v1alpha1.Export, +) (SelfMonitoringConfiguration, error) { + httpExport := export.Http + if httpExport.Encoding == dash0v1alpha1.Json { + return SelfMonitoringConfiguration{ + Enabled: false, + }, fmt.Errorf("using an HTTP exporter with JSON encoding self-monitoring is not supported") + } + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Http: &dash0v1alpha1.HttpConfiguration{ + Endpoint: httpExport.Endpoint, + Headers: append( + httpExport.Headers, + dash0v1alpha1.Header{ + Name: util.Dash0DatasetHeaderName, + Value: util.DatasetInsights, + }, + ), + Encoding: httpExport.Encoding, + }, + }, + }, nil +} + +type cannotFindContainerByNameError struct { + ContainerName string + WorkloadGKV schema.GroupVersionKind + WorkloadNamespace string + WorkloadName string +} + +func (c *cannotFindContainerByNameError) Error() string { + return fmt.Sprintf("cannot find the container named '%v' in the %v %v/%v", c.ContainerName, c.WorkloadGKV.Kind, c.WorkloadNamespace, c.WorkloadName) +} + +func DisableSelfMonitoringInCollectorDaemonSet(collectorDemonSet *appsv1.DaemonSet) { + for _, container := range collectorDemonSet.Spec.Template.Spec.InitContainers { + disableSelfMonitoringInContainer(&container) + } + + for _, container := range collectorDemonSet.Spec.Template.Spec.Containers { + disableSelfMonitoringInContainer(&container) + } +} + +func EnableSelfMonitoringInCollectorDaemonSet( + collectorDaemonSet *appsv1.DaemonSet, + selfMonitoringConfiguration SelfMonitoringConfiguration, + operatorVersion string, +) error { + selfMonitoringExport := selfMonitoringConfiguration.Export + var authTokenEnvVar *corev1.EnvVar + if selfMonitoringExport.Dash0 != nil { + envVar, err := util.CreateEnvVarForAuthorization( + *selfMonitoringExport.Dash0, + selfMonitoringauthTokenEnvVarName, + ) + if err != nil { + return err + } + authTokenEnvVar = &envVar + } + + // For now, we do not instrument init containers. The filelogoffsetsynch init container fails with: + // filelog-offset-init 2024/08/29 21:45:48 + // Failed to shutdown metrics provider, metrics data nay have been lost: failed to upload metrics: + // failed to exit idle mode: dns resolver: missing address + // making the collector pod go into CrashLoopBackoff. + // + // This is probably due to a misconfiguration of the endpoint, but ultimately it won't do if selfmonitoring issues + // prevent the collector from starting. We probably need to remove the log.Fatalln calls entirely there. + // + // for i, container := range collectorDaemonSet.Spec.Template.Spec.InitContainers { + // enableSelfMonitoringInContainer(&container, selfMonitoringExport, authTokenEnvVar) + // collectorDaemonSet.Spec.Template.Spec.InitContainers[i] = container + // } + + for i, container := range collectorDaemonSet.Spec.Template.Spec.Containers { + enableSelfMonitoringInContainer(&container, selfMonitoringExport, authTokenEnvVar, operatorVersion) + collectorDaemonSet.Spec.Template.Spec.Containers[i] = container + } + + return nil +} + +func GetSelfMonitoringConfigurationFromControllerDeployment( + controllerDeployment *appsv1.Deployment, + managerContainerName string, +) (SelfMonitoringConfiguration, error) { + managerContainerIdx := slices.IndexFunc(controllerDeployment.Spec.Template.Spec.Containers, func(c corev1.Container) bool { + return c.Name == managerContainerName + }) + + if managerContainerIdx < 0 { + return SelfMonitoringConfiguration{ + Enabled: false, + }, &cannotFindContainerByNameError{ + ContainerName: managerContainerName, + WorkloadGKV: controllerDeployment.GroupVersionKind(), + WorkloadNamespace: controllerDeployment.Namespace, + WorkloadName: controllerDeployment.Name, + } + } + + return ParseSelfMonitoringConfigurationFromContainer(&controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx]) +} + +func DisableSelfMonitoringInControllerDeployment( + controllerDeployment *appsv1.Deployment, + managerContainerName string, +) error { + managerContainerIdx := slices.IndexFunc(controllerDeployment.Spec.Template.Spec.Containers, func(c corev1.Container) bool { + return c.Name == managerContainerName + }) + + if managerContainerIdx < 0 { + return &cannotFindContainerByNameError{ + ContainerName: managerContainerName, + WorkloadGKV: controllerDeployment.GroupVersionKind(), + WorkloadNamespace: controllerDeployment.Namespace, + WorkloadName: controllerDeployment.Name, + } + } + + managerContainer := controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] + disableSelfMonitoringInContainer(&managerContainer) + controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer + + return nil +} + +func EnableSelfMonitoringInControllerDeployment( + controllerDeployment *appsv1.Deployment, + managerContainerName string, + selfMonitoringConfiguration SelfMonitoringConfiguration, + operatorVersion string, +) error { + managerContainerIdx := slices.IndexFunc( + controllerDeployment.Spec.Template.Spec.Containers, + func(c corev1.Container) bool { + return c.Name == managerContainerName + }) + + if managerContainerIdx < 0 { + return &cannotFindContainerByNameError{ + ContainerName: managerContainerName, + WorkloadGKV: controllerDeployment.GroupVersionKind(), + WorkloadNamespace: controllerDeployment.Namespace, + WorkloadName: controllerDeployment.Name, + } + } + + selfMonitoringExport := selfMonitoringConfiguration.Export + var authTokenEnvVar *corev1.EnvVar + if selfMonitoringExport.Dash0 != nil { + envVar, err := util.CreateEnvVarForAuthorization( + *selfMonitoringExport.Dash0, + selfMonitoringauthTokenEnvVarName, + ) + if err != nil { + return err + } + authTokenEnvVar = &envVar + } + managerContainer := controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] + enableSelfMonitoringInContainer(&managerContainer, selfMonitoringExport, authTokenEnvVar, operatorVersion) + controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer + + return nil +} + +func ParseSelfMonitoringConfigurationFromContainer(container *corev1.Container) (SelfMonitoringConfiguration, error) { + endpoint, err := parseEndpoint(container) + if err != nil { + return SelfMonitoringConfiguration{}, err + } else if endpoint == "" { + return SelfMonitoringConfiguration{ + Enabled: false, + }, nil + } + + protocolFromEnvVar := "grpc" + otelExporterOtlpProtocolEnvVarIdx := slices.IndexFunc(container.Env, matchOtelExporterOtlpProtocolEnvVar) + if otelExporterOtlpProtocolEnvVarIdx >= 0 { + protocolFromEnvVar = container.Env[otelExporterOtlpProtocolEnvVarIdx].Value + } + + headers := parseHeadersFromEnvVar(container) + + switch protocolFromEnvVar { + case "grpc": + return createDash0OrGrpcConfigurationFromContainer(container, endpoint, headers), nil + case "http/json": + return createHttpJsonConfigurationFromContainer(endpoint, headers), nil + case "http/protobuf": + return createHttpProtobufConfigurationFromContainer(endpoint, headers), nil + + default: + return SelfMonitoringConfiguration{}, fmt.Errorf("unsupported protocol %v", protocolFromEnvVar) + } +} + +func isDash0Export(endpoint string, headers []dash0v1alpha1.Header) bool { + return dash0IngressEndpointRegex.MatchString(endpoint) && + slices.ContainsFunc(headers, func(h dash0v1alpha1.Header) bool { + return h.Name == util.AuthorizationHeaderName + }) +} + +func createDash0OrGrpcConfigurationFromContainer(container *corev1.Container, endpoint string, headers []dash0v1alpha1.Header) SelfMonitoringConfiguration { + if isDash0Export(endpoint, headers) { + return createDash0ConfigurationFromContainer(container, endpoint, headers) + } else { + return createGrpcConfigurationFromContainer(endpoint, headers) + } +} + +func createDash0ConfigurationFromContainer(container *corev1.Container, endpoint string, headers []dash0v1alpha1.Header) SelfMonitoringConfiguration { + referencesTokenEnvVar := false + dataset := "" + for _, header := range headers { + if header.Name == util.AuthorizationHeaderName && header.Value == authHeaderValue { + referencesTokenEnvVar = true + } else if header.Name == util.Dash0DatasetHeaderName { + dataset = header.Value + } + } + + dash0Configuration := &dash0v1alpha1.Dash0Configuration{ + Endpoint: endpoint, + Dataset: dataset, + } + if referencesTokenEnvVar { + authorization := parseDash0AuthorizationFromEnvVars(container) + if authorization != nil { + dash0Configuration.Authorization = *authorization + } + } + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Dash0: dash0Configuration, + }, + } +} + +func createGrpcConfigurationFromContainer(endpoint string, headers []dash0v1alpha1.Header) SelfMonitoringConfiguration { + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Grpc: &dash0v1alpha1.GrpcConfiguration{ + Endpoint: endpoint, + Headers: headers, + }, + }, + } +} + +func createHttpProtobufConfigurationFromContainer(endpoint string, headers []dash0v1alpha1.Header) SelfMonitoringConfiguration { + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Http: &dash0v1alpha1.HttpConfiguration{ + Endpoint: endpoint, + Headers: headers, + Encoding: dash0v1alpha1.Proto, + }, + }, + } +} + +func createHttpJsonConfigurationFromContainer(endpoint string, headers []dash0v1alpha1.Header) SelfMonitoringConfiguration { + return SelfMonitoringConfiguration{ + Enabled: true, + Export: dash0v1alpha1.Export{ + Http: &dash0v1alpha1.HttpConfiguration{ + Endpoint: endpoint, + Headers: headers, + Encoding: dash0v1alpha1.Json, + }, + }, + } +} + +func parseEndpoint(container *corev1.Container) (string, error) { + otelExporterOtlpEndpointEnvVarIdx := slices.IndexFunc(container.Env, matchOtelExporterOtlpEndpointEnvVar) + if otelExporterOtlpEndpointEnvVarIdx < 0 { + return "", nil + } + otelExporterOtlpEndpointEnvVar := container.Env[otelExporterOtlpEndpointEnvVarIdx] + if otelExporterOtlpEndpointEnvVar.Value == "" && otelExporterOtlpEndpointEnvVar.ValueFrom != nil { + return "", fmt.Errorf("retrieving the endpoint from OTEL_EXPORTER_OTLP_ENDPOINT with a ValueFrom source is not supported") + } else if otelExporterOtlpEndpointEnvVar.Value == "" { + return "", fmt.Errorf("no OTEL_EXPORTER_OTLP_ENDPOINT is set") + } + return otelExporterOtlpEndpointEnvVar.Value, nil +} + +func parseHeadersFromEnvVar(container *corev1.Container) []dash0v1alpha1.Header { + otelExporterOtlpHeadersEnvVarValue := "" + var headers []dash0v1alpha1.Header + if otelExporterOtlpHeadersEnvVarIdx := + slices.IndexFunc(container.Env, matchOtelExporterOtlpHeadersEnvVar); otelExporterOtlpHeadersEnvVarIdx >= 0 { + otelExporterOtlpHeadersEnvVarValue = container.Env[otelExporterOtlpHeadersEnvVarIdx].Value + keyValuePairs := strings.Split(otelExporterOtlpHeadersEnvVarValue, ",") + for _, keyValuePair := range keyValuePairs { + parts := strings.Split(keyValuePair, "=") + if len(parts) == 2 { + headers = append(headers, dash0v1alpha1.Header{ + Name: parts[0], + Value: parts[1], + }) + } + } + } + + return headers +} + +func parseDash0AuthorizationFromEnvVars(container *corev1.Container) *dash0v1alpha1.Authorization { + if idx := slices.IndexFunc(container.Env, matchSelfMonitoringAuthTokenEnvVar); idx >= 0 { + authTokenEnvVar := container.Env[idx] + if authTokenEnvVar.Value != "" { + return &dash0v1alpha1.Authorization{ + Token: &authTokenEnvVar.Value, + } + } else if authTokenEnvVar.ValueFrom != nil && + authTokenEnvVar.ValueFrom.SecretKeyRef != nil && + authTokenEnvVar.ValueFrom.SecretKeyRef.LocalObjectReference.Name != "" && + authTokenEnvVar.ValueFrom.SecretKeyRef.Key != "" { + return &dash0v1alpha1.Authorization{ + SecretRef: &dash0v1alpha1.SecretRef{ + Name: authTokenEnvVar.ValueFrom.SecretKeyRef.LocalObjectReference.Name, + Key: authTokenEnvVar.ValueFrom.SecretKeyRef.Key, + }, + } + } + } + return nil +} + +func enableSelfMonitoringInContainer( + container *corev1.Container, + selfMonitoringExport dash0v1alpha1.Export, + authTokenEnvVar *corev1.EnvVar, + operatorVersion string, +) { + if authTokenEnvVar != nil { + authTokenEnvVarIdx := slices.IndexFunc(container.Env, matchSelfMonitoringAuthTokenEnvVar) + if authTokenEnvVarIdx == 0 { + // update the existing value + container.Env[authTokenEnvVarIdx] = *authTokenEnvVar + } else if authTokenEnvVarIdx > 0 { + // Since we reference this env var in the OTEL_EXPORTER_OTLP_HEADERS env var, we want to have this as the + // very first env var, to make sure it is defined before OTEL_EXPORTER_OTLP_HEADERS. (This is a requirement + // for using + // https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/.) + container.Env = slices.Delete(container.Env, authTokenEnvVarIdx, authTokenEnvVarIdx+1) + container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) + } else { + // the env var is not present yet, add it to the start of the list + container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) + } + } + + exportSettings := ConvertExportConfigurationToEnvVarSettings(selfMonitoringExport) + updateOrAppendEnvVar(container, otelExporterOtlpEndpointEnvVarName, exportSettings.Endpoint) + updateOrAppendEnvVar(container, otelExporterOtlpProtocolEnvVarName, exportSettings.Protocol) + updateOrAppendEnvVar(container, otelResourceAttribtuesEnvVarName, + fmt.Sprintf( + "service.namespace=dash0.operator,service.name=filelog_offset_synch,service.version=%s", + operatorVersion, + )) + + headers := exportSettings.Headers + headersEnvVarIdx := slices.IndexFunc(container.Env, matchOtelExporterOtlpHeadersEnvVar) + if len(headers) == 0 { + // We need to remove headers if set up + if headersEnvVarIdx >= 0 { + container.Env = + slices.Delete(container.Env, headersEnvVarIdx, headersEnvVarIdx+1) + } + } else { + newOtelExporterOtlpHeadersEnvVar := corev1.EnvVar{ + Name: otelExporterOtlpHeadersEnvVarName, + Value: convertHeadersToEnvVarValue(headers), + } + if headersEnvVarIdx >= 0 { + // update the existing environment variable + container.Env[headersEnvVarIdx] = newOtelExporterOtlpHeadersEnvVar + } else { + // append a new environment variable + headersEnvVarIdx = slices.IndexFunc(container.Env, matchOtelExporterOtlpEndpointEnvVar) + container.Env = slices.Insert(container.Env, headersEnvVarIdx+1, newOtelExporterOtlpHeadersEnvVar) + } + } +} + +func ConvertExportConfigurationToEnvVarSettings(selfMonitoringExport dash0v1alpha1.Export) EndpointAndHeaders { + if selfMonitoringExport.Dash0 != nil { + dash0Export := selfMonitoringExport.Dash0 + headers := []dash0v1alpha1.Header{{ + Name: util.AuthorizationHeaderName, + Value: authHeaderValue, + }} + if dash0Export.Dataset != "" && dash0Export.Dataset != "default" { + headers = append(headers, dash0v1alpha1.Header{ + Name: util.Dash0DatasetHeaderName, + Value: dash0Export.Dataset, + }) + } + return EndpointAndHeaders{ + Endpoint: prependProtocol(dash0Export.Endpoint, "https://"), + Protocol: "grpc", + Headers: headers, + } + } + + if selfMonitoringExport.Grpc != nil { + return EndpointAndHeaders{ + Endpoint: prependProtocol(selfMonitoringExport.Grpc.Endpoint, "dns://"), + Protocol: "grpc", + Headers: selfMonitoringExport.Grpc.Headers, + } + } + + if selfMonitoringExport.Http != nil { + protocol := "http/protobuf" + // The Go SDK does not support http/json, so we ignore this setting for now. + // if selfMonitoringExport.Http.Encoding == dash0v1alpha1.Json { + // protocol = "http/json" + // } + return EndpointAndHeaders{ + Endpoint: selfMonitoringExport.Http.Endpoint, + Protocol: protocol, + Headers: selfMonitoringExport.Http.Headers, + } + } + return EndpointAndHeaders{} +} + +func prependProtocol(endpoint string, defaultProtocol string) string { + // Most gRPC implementations are fine without a protocol, but the Go SDK with gRPC requires the endpoint with a + // protocol, see https://github.com/open-telemetry/opentelemetry-go/pull/5632. + if !regexp.MustCompile(`^\w+://`).MatchString(endpoint) { + // See https://grpc.github.io/grpc/core/md_doc_naming.html + return defaultProtocol + endpoint + } + return endpoint +} + +func convertHeadersToEnvVarValue(headers []dash0v1alpha1.Header) string { + keyValuePairs := make([]string, 0, len(headers)) + for _, header := range headers { + keyValuePairs = append(keyValuePairs, fmt.Sprintf("%v=%v", header.Name, header.Value)) + } + return strings.Join(keyValuePairs, ",") +} + +func disableSelfMonitoringInContainer(container *corev1.Container) { + removeEnvVar(container, otelExporterOtlpEndpointEnvVarName) + removeEnvVar(container, otelExporterOtlpProtocolEnvVarName) + removeEnvVar(container, otelExporterOtlpHeadersEnvVarName) + removeEnvVar(container, selfMonitoringauthTokenEnvVarName) +} + +func updateOrAppendEnvVar(container *corev1.Container, name string, value string) { + newEnvVar := corev1.EnvVar{ + Name: name, + Value: value, + } + idx := slices.IndexFunc(container.Env, func(e corev1.EnvVar) bool { + return e.Name == name + }) + if idx >= 0 { + // We need to update the existing value + container.Env[idx] = newEnvVar + } else { + container.Env = append(container.Env, newEnvVar) + } +} + +func removeEnvVar(container *corev1.Container, name string) { + idx := slices.IndexFunc(container.Env, func(e corev1.EnvVar) bool { + return e.Name == name + }) + if idx >= 0 { + container.Env = slices.Delete(container.Env, idx, idx+1) + } +} + +func matchOtelExporterOtlpEndpointEnvVar(e corev1.EnvVar) bool { + return e.Name == otelExporterOtlpEndpointEnvVarName +} + +func matchOtelExporterOtlpHeadersEnvVar(e corev1.EnvVar) bool { + return e.Name == otelExporterOtlpHeadersEnvVarName +} + +func matchOtelExporterOtlpProtocolEnvVar(e corev1.EnvVar) bool { + return e.Name == otelExporterOtlpProtocolEnvVarName +} + +func matchSelfMonitoringAuthTokenEnvVar(e corev1.EnvVar) bool { + return e.Name == selfMonitoringauthTokenEnvVarName +} diff --git a/internal/dash0/util/constants.go b/internal/dash0/util/constants.go new file mode 100644 index 00000000..a7105d2b --- /dev/null +++ b/internal/dash0/util/constants.go @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package util + +const ( + AuthorizationHeaderName = "Authorization" + Dash0DatasetHeaderName = "Dash0-Dataset" + DatasetInsights = "dash0-internal" +) diff --git a/internal/dash0/util/controller.go b/internal/dash0/util/controller.go index 62a7e630..dc970896 100644 --- a/internal/dash0/util/controller.go +++ b/internal/dash0/util/controller.go @@ -7,19 +7,35 @@ import ( "context" "fmt" "sort" + "time" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + dash0common "github.com/dash0hq/dash0-operator/api/dash0monitoring" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" ) +type CheckResourceResult struct { + Resource dash0common.Dash0Resource + StopReconcile bool + ResourceDoesNotExist bool +} + +type DanglingEventsTimeouts struct { + InitialTimeout time.Duration + Backoff wait.Backoff +} + // CheckIfNamespaceExists checks if the given namespace (which is supposed to be the namespace from a reconcile request) // exists in the cluster. If the namespace does not exist, it returns false, and this is supposed to stop the reconcile func CheckIfNamespaceExists( @@ -40,11 +56,12 @@ func CheckIfNamespaceExists( return true, nil } -// VerifyUniqueDash0MonitoringResourceExists loads the resource that the current reconcile request applies to, if it -// exists. It also checks whether there is only one such resource (or, if there are multiple, if the currently -// reconciled one is the most recently created one). The bool returned has the meaning "stop the reconcile request", -// that is, if the function returns true, it expects the caller to stop the reconcile request immediately and not -// requeue it. If an error ocurrs during any of the checks (for example while talking to the Kubernetes API server), the +// VerifyThatUniqueResourceExists loads the resource that the current reconcile request applies to, if it exists. It +// also checks whether there is only one such resource (or, if there are multiple, if the currently reconciled one is +// the most recently created one). The bool returned has the meaning "stop the reconcile request", that is, if the +// function returns true, it expects the caller to stop the reconcile request immediately and not requeue it. +// +// If an error occurs during any of the checks (for example while talking to the Kubernetes API server), the // function will return that error, the caller should then ignore the bool result and requeue the reconcile request. // // - If the resource does not exist, the function logs a message and returns (nil, true, nil) and expects the caller @@ -56,134 +73,227 @@ func CheckIfNamespaceExists( // stopReconcile and the caller is expected to stop the reconcile and not requeue it. // - If any error is encountered when searching for resources etc., that error will be returned, the caller is // expected to ignore the bool result and requeue the reconcile request. -func VerifyUniqueDash0MonitoringResourceExists( +func VerifyThatUniqueResourceExists( ctx context.Context, k8sClient client.Client, - updateStatusFailedMessage string, req ctrl.Request, + resourcePrototype dash0common.Dash0Resource, + updateStatusFailedMessage string, logger *logr.Logger, -) (*dash0v1alpha1.Dash0Monitoring, bool, error) { - dash0MonitoringResource, stopReconcile, err := verifyThatCustomResourceExists( +) (CheckResourceResult, error) { + checkResourceResult, err := VerifyThatResourceExists( ctx, k8sClient, req, + resourcePrototype, logger, ) - if err != nil || stopReconcile { - return nil, stopReconcile, err + if err != nil || checkResourceResult.StopReconcile || checkResourceResult.ResourceDoesNotExist { + return checkResourceResult, err } - stopReconcile, err = - verifyThatCustomResourceIsUniqe( + checkResourceResult.StopReconcile, err = + VerifyThatResourceIsUniqueInScope( ctx, k8sClient, req, - dash0MonitoringResource, + checkResourceResult.Resource, updateStatusFailedMessage, logger, ) - return dash0MonitoringResource, stopReconcile, err + return checkResourceResult, err } -// verifyThatCustomResourceExists loads the resource that the current reconcile request applies to. If that -// resource does not exist, the function logs a message and returns (nil, true, nil) and expects the caller to stop the -// reconciliation (without requeing it). If any other error occurs while trying to fetch the resource, the function logs -// the error and returns (nil, true, err) and expects the caller to requeue the reconciliation. -func verifyThatCustomResourceExists( +// VerifyThatResourceExists loads the resource that the current reconcile request applies to. If that resource does not +// exist, the function logs a message and returns (nil, true, nil) and expects the caller to stop the reconciliation +// (without requeing it). If any other error occurs while trying to fetch the resource, the function logs the error and +// returns (nil, true, err) and expects the caller to requeue the reconciliation. +func VerifyThatResourceExists( ctx context.Context, k8sClient client.Client, req ctrl.Request, + resourcePrototype dash0common.Dash0Resource, logger *logr.Logger, -) (*dash0v1alpha1.Dash0Monitoring, bool, error) { - resource := &dash0v1alpha1.Dash0Monitoring{} - err := k8sClient.Get(ctx, req.NamespacedName, resource) - if err != nil { +) (CheckResourceResult, error) { + resource := resourcePrototype.GetReceiver() + if err := k8sClient.Get(ctx, req.NamespacedName, resource); err != nil { if apierrors.IsNotFound(err) { logger.Info( - "The Dash0 monitoring resource has not been found, either it hasn't been installed or it has " + - "been deleted. Ignoring the reconcile request.") - // stop the reconciliation, and do not requeue it (that is, return (ctrl.Result{}, nil)) - return nil, true, nil + fmt.Sprintf( + "The %s %s has not been found, either it hasn't been installed or it has been deleted.", + resourcePrototype.GetNaturalLanguageResourceTypeName(), + resourcePrototype.RequestToName(req), + )) + // stop the reconciliation, and do not requeue it. + return CheckResourceResult{nil, true, true}, err } - logger.Error(err, "Failed to get the Dash0 monitoring resource, requeuing reconcile request.") - // requeue the reconciliation (that is, return (ctrl.Result{}, err)) - return nil, true, err + logger.Error(err, + fmt.Sprintf( + "Failed to get the %s %s, requeuing reconcile request.", + resourcePrototype.GetNaturalLanguageResourceTypeName(), + resourcePrototype.RequestToName(req), + )) + // requeue the reconciliation + return CheckResourceResult{nil, true, false}, err } - return resource, false, nil + + // We have found a resource and return it. + return CheckResourceResult{resource.(dash0common.Dash0Resource), false, false}, nil } -// verifyThatCustomResourceIsUniqe checks whether there are any additional resources of the same type in the namespace, -// besides the one that the current reconcile request applies to. The bool the function returns has the semantic -// stopReconcile, that is, if the function returns true, it expects the caller to stop the reconcile. If there are no -// errors and the resource is unique, the function will return (false, nil). If there are multiple resources in the -// namespace, but the given resource is the most recent one, the function will return (false, nil) as well, since the -// newest resource should be reconciled. If there are multiple resources and the given one is not the most recent one, -// the function will return (true, nil), and the caller is expected to stop the reconcile and not requeue it. +// VerifyThatResourceIsUniqueInScope checks whether there are any additional resources of the same type +// in the namespace, besides the one that the current reconcile request applies to. The bool the function returns has +// the semantic stopReconcile, that is, if the function returns true, it expects the caller to stop the reconcile. If +// there are no errors and the resource is unique, the function will return (false, nil). If there are multiple +// resources in the namespace, but the given resource is the most recent one, the function will return (false, nil) as +// well, since the newest resource should be reconciled. If there are multiple resources and the given one is not the +// most recent one, the function will return (true, nil), and the caller is expected to stop the reconcile and not +// requeue it. // If any error is encountered when searching for other resource etc., that error will be returned, the caller is // expected to ignore the bool result and requeue the reconcile request. -func verifyThatCustomResourceIsUniqe( +func VerifyThatResourceIsUniqueInScope( ctx context.Context, k8sClient client.Client, req ctrl.Request, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + resource dash0common.Dash0Resource, updateStatusFailedMessage string, logger *logr.Logger, ) (bool, error) { - allCustomResourcesInNamespace := &dash0v1alpha1.Dash0MonitoringList{} + scope, allResourcesInScope, err := + findAllResourceInstancesInScope(ctx, k8sClient, req, resource, logger) + if err != nil { + return true, err + } + + items := resource.Items(allResourcesInScope) + if len(items) <= 1 { + // The given resource is unique. + return false, nil + } + + // There are multiple instances of the resource in scope (that is, in the same namespace for namespaced + // resources, or in the same cluster for cluster-scoped resources). If the resource that is currently being + // reconciled is the one that has been most recently created, we assume that this is the source of truth in terms + // of configuration settings etc., and we ignore the other instances in this reconcile request (they will be + // handled when they are being reconciled). If the currently reconciled resource is not the most recent one, we + // set its status to degraded. + sort.Sort(SortByCreationTimestamp(items)) + mostRecentResource := resource.At(allResourcesInScope, len(items)-1) + if mostRecentResource.GetUid() == resource.GetUid() { + logger.Info(fmt.Sprintf( + "At least one other %[1]s exists in this %[2]s. This %[1]s resource (%[3]s) is the most recent one."+ + " The state of the other resource(s) will be set to degraded.", + resource.GetNaturalLanguageResourceTypeName(), + scope, + resource.RequestToName(req), + )) + // continue with the reconcile request for this resource, let the reconcile requests for the other offending + // resources handle the situation for those resources + return false, nil + } else { + logger.Info( + fmt.Sprintf( + "At least one other %[1]s exists in this %[2]s, and at least one other %[1]s has been created "+ + "more recently than this one. Setting the state of this resource to degraded.", + resource.GetNaturalLanguageResourceTypeName(), + scope, + ), + fmt.Sprintf("most recently created %s", resource.GetNaturalLanguageResourceTypeName()), + fmt.Sprintf("%s (%s)", mostRecentResource.GetName(), mostRecentResource.GetUid()), + ) + resource.EnsureResourceIsMarkedAsDegraded( + "NewerResourceIsPresent", + fmt.Sprintf("There is a more recently created %s in this %s, please remove all but one resource "+ + "instance.", + resource.GetNaturalLanguageResourceTypeName(), + scope, + )) + if err := k8sClient.Status().Update(ctx, resource.Get()); err != nil { + logger.Error(err, updateStatusFailedMessage) + return true, err + } + // stop the reconciliation, and do not requeue it + return true, nil + } +} + +func findAllResourceInstancesInScope( + ctx context.Context, + k8sClient client.Client, + req ctrl.Request, + resource dash0common.Dash0Resource, + logger *logr.Logger, +) (string, client.ObjectList, error) { + scope := "namespace" + listOptions := client.ListOptions{ + Namespace: req.Namespace, + } + + if resource.IsClusterResource() { + scope = "cluster" + listOptions = client.ListOptions{} + } + + allResourcesInScope := resource.GetListReceiver() if err := k8sClient.List( ctx, - allCustomResourcesInNamespace, - &client.ListOptions{ - Namespace: req.Namespace, - }, + allResourcesInScope, + &listOptions, ); err != nil { logger.Error( err, - "Failed to list all Dash0 monitoring resources, requeuing reconcile request.", - ) - return true, err + fmt.Sprintf( + "Failed to list all %ss, requeuing reconcile request.", + resource.GetNaturalLanguageResourceTypeName(), + )) + return scope, nil, err } - items := allCustomResourcesInNamespace.Items - if len(items) > 1 { - // There are multiple instances of the Dash0 monitoring resource in this namespace. If the resource that is - // currently being reconciled is the one that has been most recently created, we assume that this is the source - // of truth in terms of configuration settings etc., and we ignore the other instances in this reconcile request - // (they will be handled when they are being reconciled). If the currently reconciled resource is not the most - // recent one, we set its status to degraded. - sort.Sort(SortByCreationTimestamp(items)) - mostRecentResource := items[len(items)-1] - if mostRecentResource.UID == dash0MonitoringResource.UID { - logger.Info( - "At least one other Dash0 monitoring resource exists in this namespace. This Dash0 monitoring " + - "resource is the most recent one. The state of the other resource(s) will be set to degraded.", - ) - // continue with the reconcile request for this resource, let the reconcile requests for the other offending - // resources handle the situation for those resources - return false, nil - } else { - logger.Info( - "At least one other Dash0 monitoring resource exists in this namespace, and at least one other "+ - "Dash0 monitoring resource has been created more recently than this one. Setting the state of "+ - "this resource to degraded.", - "most recently created Dash0 monitoring resource", - fmt.Sprintf("%s (%s)", mostRecentResource.Name, mostRecentResource.UID), - ) - dash0MonitoringResource.EnsureResourceIsMarkedAsDegraded( - "NewerResourceIsPresent", - "There is a more recently created Dash0 monitoring resource in this namespace, please remove all but one resource instance.", - ) - if err := k8sClient.Status().Update(ctx, dash0MonitoringResource); err != nil { - logger.Error(err, updateStatusFailedMessage) - return true, err - } - // stop the reconciliation, and do not requeue it - return true, nil - } + return scope, allResourcesInScope, nil +} + +// FindUniqueOrMostRecentResourceInScope tries to fetch the unique resource of a given type in a scope (cluster or +// namespace). If multiple resources exist, it returns the most recent one. If no resources exist, it returns nil. +func FindUniqueOrMostRecentResourceInScope( + ctx context.Context, + k8sClient client.Client, + namespace string, + resourcePrototype dash0common.Dash0Resource, + logger *logr.Logger, +) (dash0common.Dash0Resource, error) { + _, allResourcesInScope, err := findAllResourceInstancesInScope( + ctx, + k8sClient, + ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: namespace, + }, + }, + resourcePrototype, + logger, + ) + if err != nil { + return nil, err + } + + return findMosRecentResource(resourcePrototype, allResourcesInScope), nil +} + +func findMosRecentResource( + resourcePrototype dash0common.Dash0Resource, + allResourcesInScope client.ObjectList, +) dash0common.Dash0Resource { + items := resourcePrototype.Items(allResourcesInScope) + if len(items) == 0 { + return nil + } + if len(items) == 1 { + return resourcePrototype.At(allResourcesInScope, 0) } - return false, nil + sort.Sort(SortByCreationTimestamp(items)) + return resourcePrototype.At(allResourcesInScope, len(items)-1) } -type SortByCreationTimestamp []dash0v1alpha1.Dash0Monitoring +type SortByCreationTimestamp []client.Object func (s SortByCreationTimestamp) Len() int { return len(s) @@ -192,34 +302,34 @@ func (s SortByCreationTimestamp) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s SortByCreationTimestamp) Less(i, j int) bool { - tsi := s[i].CreationTimestamp - tsj := s[j].CreationTimestamp + tsi := s[i].GetCreationTimestamp() + tsj := s[j].GetCreationTimestamp() return tsi.Before(&tsj) } func InitStatusConditions( ctx context.Context, - statusWriter client.SubResourceWriter, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + k8sClient client.Client, + resource dash0common.Dash0Resource, + conditions []metav1.Condition, logger *logr.Logger, ) (bool, error) { - status := dash0MonitoringResource.Status firstReconcile := false needsRefresh := false - if len(status.Conditions) == 0 { - dash0MonitoringResource.SetAvailableConditionToUnknown() + if len(conditions) == 0 { + resource.SetAvailableConditionToUnknown() firstReconcile = true needsRefresh = true } else if availableCondition := meta.FindStatusCondition( - status.Conditions, + conditions, string(dash0v1alpha1.ConditionTypeAvailable), ); availableCondition == nil { - dash0MonitoringResource.SetAvailableConditionToUnknown() + resource.SetAvailableConditionToUnknown() needsRefresh = true } if needsRefresh { - err := updateResourceStatus(ctx, statusWriter, dash0MonitoringResource, logger) + err := updateResourceStatus(ctx, k8sClient, resource, logger) if err != nil { // The error has already been logged in refreshStatus return firstReconcile, err @@ -230,12 +340,16 @@ func InitStatusConditions( func updateResourceStatus( ctx context.Context, - statusWriter client.SubResourceWriter, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + k8sClient client.Client, + resource dash0common.Dash0Resource, logger *logr.Logger, ) error { - if err := statusWriter.Update(ctx, dash0MonitoringResource); err != nil { - logger.Error(err, "Cannot update the status of the Dash0 monitoring resource.") + if err := k8sClient.Status().Update(ctx, resource.Get()); err != nil { + logger.Error(err, + fmt.Sprintf( + "Cannot update the status of the %s.", + resource.GetNaturalLanguageResourceTypeName(), + )) return err } return nil @@ -244,28 +358,31 @@ func updateResourceStatus( func CheckImminentDeletionAndHandleFinalizers( ctx context.Context, k8sClient client.Client, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + resource dash0common.Dash0Resource, finalizerId string, logger *logr.Logger, ) (bool, bool, error) { - isMarkedForDeletion := dash0MonitoringResource.IsMarkedForDeletion() + isMarkedForDeletion := resource.IsMarkedForDeletion() if !isMarkedForDeletion { err := addFinalizerIfNecessary( ctx, k8sClient, - dash0MonitoringResource, + resource, finalizerId, ) if err != nil { logger.Error( err, - "Failed to add finalizer to Dash0 monitoring resource, requeuing reconcile request.", + fmt.Sprintf( + "Failed to add finalizer to %s, requeuing reconcile request.", + resource.GetNaturalLanguageResourceTypeName(), + ), ) return isMarkedForDeletion, false, err } return isMarkedForDeletion, false, nil } else { - if controllerutil.ContainsFinalizer(dash0MonitoringResource, finalizerId) { + if controllerutil.ContainsFinalizer(resource.Get(), finalizerId) { return isMarkedForDeletion, true, nil } return isMarkedForDeletion, false, nil @@ -275,13 +392,41 @@ func CheckImminentDeletionAndHandleFinalizers( func addFinalizerIfNecessary( ctx context.Context, k8sClient client.Client, - dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, + resource dash0common.Dash0Resource, finalizerId string, ) error { - finalizerHasBeenAdded := controllerutil.AddFinalizer(dash0MonitoringResource, finalizerId) + finalizerHasBeenAdded := controllerutil.AddFinalizer(resource.Get(), finalizerId) if finalizerHasBeenAdded { - return k8sClient.Update(ctx, dash0MonitoringResource) + return k8sClient.Update(ctx, resource.Get()) } // The resource already had the finalizer, no update necessary. return nil } + +func CreateEnvVarForAuthorization( + dash0ExportConfiguration dash0v1alpha1.Dash0Configuration, + envVarName string, +) (corev1.EnvVar, error) { + token := dash0ExportConfiguration.Authorization.Token + secretRef := dash0ExportConfiguration.Authorization.SecretRef + if token != nil && *token != "" { + return corev1.EnvVar{ + Name: envVarName, + Value: *token, + }, nil + } else if secretRef != nil && secretRef.Name != "" && secretRef.Key != "" { + return corev1.EnvVar{ + Name: envVarName, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretRef.Name, + }, + Key: secretRef.Key, + }, + }, + }, nil + } else { + return corev1.EnvVar{}, fmt.Errorf("neither token nor secretRef provided for the Dash0 exporter") + } +} diff --git a/internal/dash0/util/types.go b/internal/dash0/util/types.go index d074fa90..1db925d2 100644 --- a/internal/dash0/util/types.go +++ b/internal/dash0/util/types.go @@ -3,7 +3,11 @@ package util -import corev1 "k8s.io/api/core/v1" +import ( + "strings" + + corev1 "k8s.io/api/core/v1" +) type Reason string @@ -37,6 +41,22 @@ type Images struct { FilelogOffsetSynchImagePullPolicy corev1.PullPolicy } +func (i Images) GetOperatorVersion() string { + return getImageVersion(i.OperatorImage) +} + +func getImageVersion(image string) string { + idx := strings.LastIndex(image, "@") + if idx >= 0 { + return image[idx+1:] + } + idx = strings.LastIndex(image, ":") + if idx >= 0 { + return image[idx+1:] + } + return "" +} + type InstrumentationMetadata struct { Images OTelCollectorBaseUrl string diff --git a/internal/dash0/webhook/attach_dangling_events_test.go b/internal/dash0/webhook/attach_dangling_events_test.go index 4614f16e..0ba38b46 100644 --- a/internal/dash0/webhook/attach_dangling_events_test.go +++ b/internal/dash0/webhook/attach_dangling_events_test.go @@ -12,7 +12,6 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -36,7 +35,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { BeforeAll(func() { EnsureDash0OperatorNamespaceExists(ctx, k8sClient) - recorder := manager.GetEventRecorderFor("dash0-controller") + recorder := manager.GetEventRecorderFor("dash0-monitoring-controller") instrumenter := &instrumentation.Instrumenter{ Client: k8sClient, Clientset: clientset, @@ -55,6 +54,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { Clientset: clientset, OTelColResourceManager: oTelColResourceManager, } + reconciler = &controller.Dash0Reconciler{ Client: k8sClient, Clientset: clientset, @@ -62,15 +62,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { Images: TestImages, OperatorNamespace: Dash0OperatorNamespace, BackendConnectionManager: backendConnectionManager, - DanglingEventsTimeouts: &controller.DanglingEventsTimeouts{ - InitialTimeout: 0 * time.Second, - Backoff: wait.Backoff{ - Steps: 1, - Duration: 0 * time.Second, - Factor: 1, - Jitter: 0, - }, - }, + DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } dash0MonitoringResource = EnsureDash0MonitoringResourceExistsAndIsAvailable(ctx, k8sClient) diff --git a/internal/dash0/webhook/dash0_webhook_test.go b/internal/dash0/webhook/dash0_webhook_test.go index 8cb57a38..ce787b12 100644 --- a/internal/dash0/webhook/dash0_webhook_test.go +++ b/internal/dash0/webhook/dash0_webhook_test.go @@ -425,7 +425,7 @@ var _ = Describe("The Dash0 webhook", func() { workload = config.GetFn(ctx, k8sClient, TestNamespaceName, name) config.VerifyFn(workload) - VerifyWebhookIgnoreOnceLabelIsAbesent(workload.GetObjectMeta()) + VerifyWebhookIgnoreOnceLabelIsAbsent(workload.GetObjectMeta()) VerifyNoEvents(ctx, clientset, TestNamespaceName) }, Entry("should not instrument a cron job that has the label, but remove the label", WorkloadTestConfig{ WorkloadNamePrefix: CronJobNamePrefix, diff --git a/test-resources/bin/render-templates.sh b/test-resources/bin/render-templates.sh index 944b54bd..0ec353f7 100755 --- a/test-resources/bin/render-templates.sh +++ b/test-resources/bin/render-templates.sh @@ -11,3 +11,4 @@ source test-resources/bin/util load_env_file cat test-resources/customresources/dash0monitoring/dash0monitoring.token.yaml.template | DASH0_AUTHORIZATION_TOKEN="$DASH0_AUTHORIZATION_TOKEN" envsubst > test-resources/customresources/dash0monitoring/dash0monitoring.token.yaml +cat test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template | DASH0_AUTHORIZATION_TOKEN="$DASH0_AUTHORIZATION_TOKEN" envsubst > test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index e22346f2..f05d6d91 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -15,6 +15,7 @@ load_env_file verify_kubectx kubectl delete -n ${target_namespace} -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml || true +kubectl delete -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml || true make undeploy-via-helm || true diff --git a/test-resources/bin/test-roundtrip-01-aum-operator-cr.sh b/test-resources/bin/test-roundtrip-01-aum-operator-cr.sh index cc481123..997f3cdb 100755 --- a/test-resources/bin/test-roundtrip-01-aum-operator-cr.sh +++ b/test-resources/bin/test-roundtrip-01-aum-operator-cr.sh @@ -52,6 +52,11 @@ echo sleep 5 -echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" +echo "STEP 7: deploy the Dash0 operator configuration resource to cluster" +kubectl apply -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml +echo +echo + +echo "STEP 8: deploy the Dash0 monitoring resource to namespace ${target_namespace}" kubectl apply -n ${target_namespace} -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml diff --git a/test-resources/bin/test-roundtrip-02-operator-cr-aum.sh b/test-resources/bin/test-roundtrip-02-operator-cr-aum.sh index ce583eb5..abd66843 100755 --- a/test-resources/bin/test-roundtrip-02-operator-cr-aum.sh +++ b/test-resources/bin/test-roundtrip-02-operator-cr-aum.sh @@ -47,13 +47,19 @@ echo sleep 5 -echo "STEP 6: deploy the Dash0 monitoring resource to namespace ${target_namespace}" + +echo "STEP 6: deploy the Dash0 operator configuration resource to cluster" +kubectl apply -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml +echo +echo + +echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" kubectl apply -n ${target_namespace} -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml echo echo sleep 5 -echo "STEP 7: deploy application under monitoring" +echo "STEP 8: deploy application under monitoring" test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} diff --git a/test-resources/customresources/dash0operatorconfiguration/.gitignore b/test-resources/customresources/dash0operatorconfiguration/.gitignore new file mode 100644 index 00000000..87329e54 --- /dev/null +++ b/test-resources/customresources/dash0operatorconfiguration/.gitignore @@ -0,0 +1 @@ +dash0operatorconfiguration.token.yaml \ No newline at end of file diff --git a/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template new file mode 100644 index 00000000..062c5280 --- /dev/null +++ b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template @@ -0,0 +1,12 @@ +apiVersion: operator.dash0.com/v1alpha1 +kind: Dash0OperatorConfiguration +metadata: + name: dash0-operator-configuration-resource +spec: + selfMonitoring: + enabled: true + export: + dash0: + endpoint: ingress.eu-west-1.aws.dash0-dev.com:4317 + authorization: + token: "$DASH0_AUTHORIZATION_TOKEN" diff --git a/test-resources/node.js/express/Dockerfile b/test-resources/node.js/express/Dockerfile index 930febaf..3cd476b6 100644 --- a/test-resources/node.js/express/Dockerfile +++ b/test-resources/node.js/express/Dockerfile @@ -5,7 +5,7 @@ ARG NODE_VERSION=20.13.1 ARG ALPINE_VERSION=3.19 FROM node:${NODE_VERSION}-alpine${ALPINE_VERSION} -ENV NODE_ENV production +ENV NODE_ENV=production WORKDIR /usr/src/app COPY --chown=node:node package.json . COPY --chown=node:node package-lock.json . diff --git a/test/e2e/dash0_operator_configuration_resource.go b/test/e2e/dash0_operator_configuration_resource.go new file mode 100644 index 00000000..83cffef1 --- /dev/null +++ b/test/e2e/dash0_operator_configuration_resource.go @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package e2e + +import ( + "bytes" + _ "embed" + "fmt" + "os" + "os/exec" + "text/template" + + . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/gomega" +) + +type dash0OperatorConfigurationValues struct { + Endpoint string +} + +const ( + dash0OperatorConfigurationResourceName = "dash0-operator-configuration-resource-e2e" +) + +var ( + //go:embed dash0operatorconfiguration.e2e.yaml.template + dash0OperatorConfigurationResourceSource string + dash0OperatorConfigurationResourceTemplate *template.Template + + defaultDash0OperatorConfigurationValues = dash0OperatorConfigurationValues{ + Endpoint: defaultEndpoint, + } +) + +func renderDash0OperatorConfigurationResourceTemplate( + dash0OperatorConfigurationValues dash0OperatorConfigurationValues, +) string { + By("render Dash0OperatorConfiguration resource template") + if dash0OperatorConfigurationResourceTemplate == nil { + dash0OperatorConfigurationResourceTemplate = + template.Must(template.New("dash0operatorconfiguration").Parse(dash0OperatorConfigurationResourceSource)) + } + + var dash0OperatorConfigurationResource bytes.Buffer + Expect( + dash0OperatorConfigurationResourceTemplate.Execute( + &dash0OperatorConfigurationResource, + dash0OperatorConfigurationValues, + )).To(Succeed()) + + renderedResourceFile, err := os.CreateTemp(os.TempDir(), "dash0operatorconfiguration-*.yaml") + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(renderedResourceFile.Name(), dash0OperatorConfigurationResource.Bytes(), 0644)).To(Succeed()) + + return renderedResourceFile.Name() +} + +func deployDash0OperatorConfigurationResource( + dash0OperatorConfigurationValues dash0OperatorConfigurationValues, +) { + renderedResourceFileName := renderDash0OperatorConfigurationResourceTemplate(dash0OperatorConfigurationValues) + defer func() { + Expect(os.Remove(renderedResourceFileName)).To(Succeed()) + }() + + By(fmt.Sprintf( + "Deploying the Dash0 operator configuration resource with values %v", dash0OperatorConfigurationValues)) + Expect( + runAndIgnoreOutput(exec.Command( + "kubectl", + "apply", + "-f", + renderedResourceFileName, + ))).To(Succeed()) +} + +func undeployDash0OperatorConfigurationResource() { + By("Removing the Dash0 operator configuration resource") + Expect( + runAndIgnoreOutput(exec.Command( + "kubectl", + "delete", + "dash0operatorconfiguration", + dash0OperatorConfigurationResourceName, + "--ignore-not-found", + ))).To(Succeed()) +} diff --git a/test/e2e/dash0operatorconfiguration.e2e.yaml.template b/test/e2e/dash0operatorconfiguration.e2e.yaml.template new file mode 100644 index 00000000..fd8d3d1a --- /dev/null +++ b/test/e2e/dash0operatorconfiguration.e2e.yaml.template @@ -0,0 +1,10 @@ +apiVersion: operator.dash0.com/v1alpha1 +kind: Dash0OperatorConfiguration +metadata: + name: dash0-operator-configuration-resource-e2e +spec: + selfMonitoring: + enabled: true + export: + http: + endpoint: {{ .Endpoint }} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ea820cfb..5a875604 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -596,7 +596,36 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { return verifyExactlyOneLogRecordIsReported(g, testId, &now) }, 30*time.Second, pollingInterval).Should(Succeed()) }) + }) + + Describe("self-monitoring", func() { + BeforeAll(func() { + By("deploy the Dash0 operator") + deployOperator(operatorNamespace, operatorHelmChart, operatorHelmChartUrl, images, true) + }) + AfterAll(func() { + undeployOperator(operatorNamespace) + }) + + BeforeEach(func() { + deployDash0OperatorConfigurationResource(defaultDash0OperatorConfigurationValues) + deployDash0MonitoringResource( + applicationUnderTestNamespace, + defaultDash0MonitoringValues, + operatorNamespace, + operatorHelmChart, + ) + }) + + AfterEach(func() { + undeployDash0OperatorConfigurationResource() + undeployDash0OperatorConfigurationResource() + }) + + It("should produce self-monitoring telemetry", func() { + verifySelfMonitoringSpans() + }) }) Describe("operator removal", func() { diff --git a/test/e2e/spans.go b/test/e2e/spans.go index eb2bf212..92e3be80 100644 --- a/test/e2e/spans.go +++ b/test/e2e/spans.go @@ -12,7 +12,6 @@ import ( "strings" "time" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive @@ -57,7 +56,16 @@ func sendRequestAndFindMatchingSpans( if !isBatch { sendRequest(g, port, httpPathWithQuery) } - return fileHasMatchingSpan(g, workloadType, httpPathWithQuery, timestampLowerBound) + var resourceMatchFn func(span ptrace.ResourceSpans) bool + if workloadType != "" { + resourceMatchFn = resourceSpansHaveExpectedResourceAttributes(workloadType) + } + return fileHasMatchingSpan( + g, + resourceMatchFn, + matchHttpServerSpanWithHttpTarget(httpPathWithQuery), + timestampLowerBound, + ) } func sendRequest(g Gomega, port int, httpPathWithQuery string) { @@ -79,7 +87,12 @@ func sendRequest(g Gomega, port int, httpPathWithQuery string) { } //nolint:all -func fileHasMatchingSpan(g Gomega, workloadType string, httpPathWithQuery string, timestampLowerBound *time.Time) bool { +func fileHasMatchingSpan( + g Gomega, + resourceMatchFn func(span ptrace.ResourceSpans) bool, + spanMatchFn func(span ptrace.Span) bool, + timestampLowerBound *time.Time, +) bool { fileHandle, err := os.Open("test-resources/e2e-test-volumes/otlp-sink/traces.jsonl") g.Expect(err).NotTo(HaveOccurred()) defer func() { @@ -88,11 +101,6 @@ func fileHasMatchingSpan(g Gomega, workloadType string, httpPathWithQuery string scanner := bufio.NewScanner(fileHandle) scanner.Buffer(make([]byte, tracesJsonMaxLineLength), tracesJsonMaxLineLength) - var resourceMatchFn func(span ptrace.ResourceSpans) bool - if workloadType != "" { - resourceMatchFn = resourceSpansHaveExpectedResourceAttributes(workloadType) - } - // read file line by line spansFound := false for scanner.Scan() { @@ -102,15 +110,11 @@ func fileHasMatchingSpan(g Gomega, workloadType string, httpPathWithQuery string // ignore lines that cannot be parsed continue } - // Missing cronjob HTTP server spans in the "should instrument and uninstrument all workload types" have given - // us a hard time lately. Therefore, log more details about finding the matching spans for those. - detailedMatchingLogs := workloadType == "cronjob" if spansFound = hasMatchingSpans( traces, resourceMatchFn, - isHttpServerSpanWithHttpTarget(httpPathWithQuery, detailedMatchingLogs), + spanMatchFn, timestampLowerBound, - detailedMatchingLogs, ); spansFound { break } @@ -127,7 +131,6 @@ func hasMatchingSpans( resourceMatchFn func(span ptrace.ResourceSpans) bool, spanMatchFn func(span ptrace.Span) bool, timestampLowerBound *time.Time, - detailedMatchingLogs bool, ) bool { for i := 0; i < traces.ResourceSpans().Len(); i++ { resourceSpan := traces.ResourceSpans().At(i) @@ -137,9 +140,6 @@ func hasMatchingSpans( } } - if detailedMatchingLogs { - fmt.Fprint(GinkgoWriter, "> checking resource span\n") - } for j := 0; j < resourceSpan.ScopeSpans().Len(); j++ { scopeSpan := resourceSpan.ScopeSpans().At(j) for k := 0; k < scopeSpan.Spans().Len(); k++ { @@ -149,19 +149,6 @@ func hasMatchingSpans( if timestampMatch { spanMatches = spanMatchFn(span) } - if detailedMatchingLogs { - fmt.Fprintf( - GinkgoWriter, - "> %s: ResourceSpans(%d)/ScopeSpans(%d)/(%d), timestamp: %t, matches: %t\n", - span.Name(), - i, - j, - k, - timestampMatch, - spanMatches, - ) - } - if timestampMatch && spanMatches { return true } @@ -175,9 +162,6 @@ func hasMatchingSpans( func resourceSpansHaveExpectedResourceAttributes(workloadType string) func(span ptrace.ResourceSpans) bool { return func(resourceSpans ptrace.ResourceSpans) bool { attributes := resourceSpans.Resource().Attributes() - attributes.Range(func(k string, v pcommon.Value) bool { - return true - }) workloadAttributeFound := false if workloadType == "replicaset" { @@ -215,28 +199,59 @@ func resourceSpansHaveExpectedResourceAttributes(workloadType string) func(span } } -func isHttpServerSpanWithHttpTarget(expectedTarget string, detailedMatchingLogs bool) func(span ptrace.Span) bool { +func matchHttpServerSpanWithHttpTarget(expectedTarget string) func(span ptrace.Span) bool { return func(span ptrace.Span) bool { if span.Kind() == ptrace.SpanKindServer { target, hasTarget := span.Attributes().Get("http.target") if hasTarget { if target.Str() == expectedTarget { - if detailedMatchingLogs { - fmt.Fprintf(GinkgoWriter, "> span has matching http.target: %s\n", target.Str()) - } return true - } else if detailedMatchingLogs { - fmt.Fprintf(GinkgoWriter, - "> span has http.target attribute, but it does not match: expected: %s, actual: %s\n", - expectedTarget, - target.Str()) - } - } else { - if detailedMatchingLogs { - fmt.Fprintf(GinkgoWriter, "> span does not have the http.target attribute\n") } } } return false } } + +func verifySelfMonitoringSpans() { + Eventually(func(g Gomega) { + resourceMatchFn := func(resourceSpans ptrace.ResourceSpans) bool { + attributes := resourceSpans.Resource().Attributes() + serviceNamespace, isSet := attributes.Get("service.namespace") + if !isSet { + return false + } + if serviceNamespace.Str() != "dash0.operator" { + return false + } + _, isSet = attributes.Get("service.name") + if !isSet { + return false + } + _, isSet = attributes.Get("service.version") + if !isSet { + return false + } + _, isSet = attributes.Get("k8s.node.name") + if !isSet { + return false + } + _, isSet = attributes.Get("k8s.pod.uid") + + return isSet + } + spanMatchFn := func(span ptrace.Span) bool { + return span.Kind() == ptrace.SpanKindInternal + } + selfMonitoringSpansFound := fileHasMatchingSpan( + g, + resourceMatchFn, + spanMatchFn, + nil, + ) + g.Expect(selfMonitoringSpansFound).To( + BeTrue(), + "expected to find at least one matching self-monitoring span", + ) + }, verifyTelemetryTimeout, pollingInterval).Should(Succeed()) +} diff --git a/test/e2e/verify_instrumentation.go b/test/e2e/verify_instrumentation.go index 4436b6be..6663bbe0 100644 --- a/test/e2e/verify_instrumentation.go +++ b/test/e2e/verify_instrumentation.go @@ -16,6 +16,8 @@ const ( labelChangeTimeout = 25 * time.Second verifyTelemetryTimeout = 40 * time.Second pollingInterval = 500 * time.Millisecond + + cronjob = "cronjob" ) func verifyThatWorkloadHasBeenInstrumented( @@ -51,7 +53,7 @@ func verifyThatWorkloadHasBeenInstrumented( // For batch workloads (job, cronjob), which are not reachable via a service, the application will call itself via // HTTP instead, which will create spans as well. spanTimeout := verifyTelemetryTimeout - if workloadType == "cronjob" { + if workloadType == cronjob { // Cronjob pods are only scheduled once a minute, so we might need to wait a while for a job to be started // and for spans to become available, hence increasing the timeout for "Eventually" block that waits for spans. spanTimeout = 90 * time.Second @@ -126,7 +128,7 @@ func verifyThatInstrumentationIsRevertedEventually( time.Sleep(10 * time.Second) secondsToCheckForSpans := 20 - if workloadType == "cronjob" { + if workloadType == cronjob { // Pod for cron jobs only get scheduled once a minute, since the cronjob schedule format does not allow for jobs // starting every second. Thus, to make the test valid, we need to monitor for spans a little bit longer than // for other workload types. diff --git a/test/util/constants.go b/test/util/constants.go index 1ab7e6d8..81d61781 100644 --- a/test/util/constants.go +++ b/test/util/constants.go @@ -4,9 +4,12 @@ package util import ( + "time" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/dash0/util" @@ -29,9 +32,12 @@ const ( ConfigurationReloaderImageTest = "some-registry.com:1234/dash0hq/configuration-reloader:10.11.12" FilelogOffsetSynchImageTest = "some-registry.com:1234/dash0hq/filelog-offset-synch:13.14.15" - OTelCollectorBaseUrlTest = "http://$(DASH0_NODE_IP):40318" - EndpointTest = "endpoint.dash0.com:4317" - EndpointTestQuoted = "\"endpoint.dash0.com:4317\"" + OTelCollectorBaseUrlTest = "http://$(DASH0_NODE_IP):40318" + EndpointDash0Test = "endpoint.dash0.com:4317" + EndpointDash0TestQuoted = "\"endpoint.dash0.com:4317\"" + EndpointDash0WithProtocolTest = "https://endpoint.dash0.com:4317" + EndpointGrpcTest = "endpoint.backend.com:4317" + EndpointHttpTest = "https://endpoint.backend.com:4318" ) var ( @@ -62,4 +68,77 @@ var ( UID: "2f009c75-d69f-4b02-9d9d-fa17e76f5c1d", }, } + + DanglingEventsTimeoutsTest = util.DanglingEventsTimeouts{ + InitialTimeout: 0 * time.Second, + Backoff: wait.Backoff{ + Steps: 1, + Duration: 0 * time.Second, + Factor: 1, + Jitter: 0, + }, + } ) + +func Dash0ExportWithEndpointAndToken() dash0v1alpha1.Export { + return dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: EndpointDash0Test, + Authorization: dash0v1alpha1.Authorization{ + Token: &AuthorizationTokenTest, + }, + }, + } +} + +func Dash0ExportWithEndpointTokenAndInsightsDataset() dash0v1alpha1.Export { + return dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: EndpointDash0Test, + Dataset: util.DatasetInsights, + Authorization: dash0v1alpha1.Authorization{ + Token: &AuthorizationTokenTest, + }, + }, + } +} + +func Dash0ExportWithEndpointAndSecretRef() dash0v1alpha1.Export { + return dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: EndpointDash0Test, + Authorization: dash0v1alpha1.Authorization{ + SecretRef: &SecretRefTest, + }, + }, + } +} + +func ExportToPrt(export dash0v1alpha1.Export) *dash0v1alpha1.Export { + return &export +} + +func GrpcExportTest() dash0v1alpha1.Export { + return dash0v1alpha1.Export{ + Grpc: &dash0v1alpha1.GrpcConfiguration{ + Endpoint: EndpointGrpcTest, + Headers: []dash0v1alpha1.Header{{ + Name: "Key", + Value: "Value", + }}, + }, + } +} + +func HttpExportTest() dash0v1alpha1.Export { + return dash0v1alpha1.Export{ + Http: &dash0v1alpha1.HttpConfiguration{ + Endpoint: EndpointHttpTest, + Headers: []dash0v1alpha1.Header{{ + Name: "Key", + Value: "Value", + }}, + Encoding: dash0v1alpha1.Proto, + }, + } +} diff --git a/test/util/dash0_monitoring_resource.go b/test/util/dash0_monitoring_resource.go index 258610ca..b22402fe 100644 --- a/test/util/dash0_monitoring_resource.go +++ b/test/util/dash0_monitoring_resource.go @@ -64,7 +64,7 @@ func EnsureDash0MonitoringResourceExistsWithNamespacedName( spec := dash0v1alpha1.Dash0MonitoringSpec{ Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -104,7 +104,7 @@ func CreateDash0MonitoringResource( Spec: dash0v1alpha1.Dash0MonitoringSpec{ Export: dash0v1alpha1.Export{ Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: EndpointTest, + Endpoint: EndpointDash0Test, Authorization: dash0v1alpha1.Authorization{ Token: &AuthorizationTokenTest, }, @@ -257,7 +257,7 @@ func removeFinalizerFromDash0MonitoringResource( k8sClient client.Client, dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring, ) { - finalizerHasBeenRemoved := controllerutil.RemoveFinalizer(dash0MonitoringResource, dash0v1alpha1.FinalizerId) + finalizerHasBeenRemoved := controllerutil.RemoveFinalizer(dash0MonitoringResource, dash0v1alpha1.MonitoringFinalizerId) if finalizerHasBeenRemoved { Expect(k8sClient.Update(ctx, dash0MonitoringResource)).To(Succeed()) } diff --git a/test/util/operator_resource.go b/test/util/operator_resource.go new file mode 100644 index 00000000..e4897df3 --- /dev/null +++ b/test/util/operator_resource.go @@ -0,0 +1,261 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "context" + "slices" + + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const ( + Dash0OperatorDeploymentName = "controller-deployment" + OperatorConfigurationResourceName = "dash0-operator-test-resource" +) + +func EnsureControllerDeploymentExists( + ctx context.Context, + k8sClient client.Client, + controllerDeployment *appsv1.Deployment, +) *appsv1.Deployment { + deployment := EnsureKubernetesObjectExists( + ctx, + k8sClient, + types.NamespacedName{Namespace: controllerDeployment.Namespace, Name: controllerDeployment.Name}, + &appsv1.Deployment{}, + controllerDeployment, + ) + return deployment.(*appsv1.Deployment) +} + +func EnsureOperatorConfigurationResourceExists( + ctx context.Context, + k8sClient client.Client, +) *dash0v1alpha1.Dash0OperatorConfiguration { + return EnsureOperatorConfigurationResourceExistsWithName( + ctx, + k8sClient, + OperatorConfigurationResourceName, + ) +} + +func EnsureControllerDeploymentDoesNotExist( + ctx context.Context, + k8sClient client.Client, + controllerDeployment *appsv1.Deployment, +) { + Expect(k8sClient.Delete(ctx, controllerDeployment)).To(Succeed()) +} + +func EnsureOperatorConfigurationResourceExistsWithName( + ctx context.Context, + k8sClient client.Client, + name string, +) *dash0v1alpha1.Dash0OperatorConfiguration { + By("creating the Dash0 operator configuration resource") + + list := dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := k8sClient.List(ctx, &list, &client.ListOptions{}); err != nil && !errors.IsNotFound(err) { + Expect(err).ToNot(HaveOccurred()) + } + + object := dash0v1alpha1.Dash0OperatorConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + + found := false + if len(list.Items) > 0 { + resourceIdx := slices.IndexFunc(list.Items, func(r dash0v1alpha1.Dash0OperatorConfiguration) bool { + return r.Name == name + }) + + found = resourceIdx > -1 + } + + if !found { + Expect(k8sClient.Create(ctx, &object)).To(Succeed()) + } else { + Expect(k8sClient.Update(ctx, &object)).To(Succeed()) + } + + return &object +} + +func CreateOperatorConfigurationResource( + ctx context.Context, + k8sClient client.Client, + name string, + spec dash0v1alpha1.Dash0OperatorConfigurationSpec, +) *dash0v1alpha1.Dash0OperatorConfiguration { + resource := &dash0v1alpha1.Dash0OperatorConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: spec, + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + return resource +} + +func EnsureOperatorConfigurationResourceExistsAndIsAvailable( + ctx context.Context, + k8sClient client.Client, +) *dash0v1alpha1.Dash0OperatorConfiguration { + resource := EnsureOperatorConfigurationResourceExistsWithName(ctx, k8sClient, OperatorConfigurationResourceName) + resource.EnsureResourceIsMarkedAsAvailable() + Expect(k8sClient.Status().Update(ctx, resource)).To(Succeed()) + return resource +} + +func EnsureOperatorConfigurationResourceExistsAndIsDegraded( + ctx context.Context, + k8sClient client.Client, +) *dash0v1alpha1.Dash0OperatorConfiguration { + resource := EnsureOperatorConfigurationResourceExistsWithName(ctx, k8sClient, OperatorConfigurationResourceName) + resource.EnsureResourceIsMarkedAsDegraded( + "TestReasonForDegradation", + "This resource is degraded.", + ) + Expect(k8sClient.Status().Update(ctx, resource)).To(Succeed()) + return resource +} + +func LoadOperatorConfigurationResourceByNameIfItExists( + ctx context.Context, + k8sClient client.Client, + g Gomega, + name string, +) *dash0v1alpha1.Dash0OperatorConfiguration { + return LoadOperatorConfigurationResourceByName(ctx, k8sClient, g, name, false) +} + +func LoadOperatorDeploymentOrFail( + ctx context.Context, + k8sClient client.Client, + g Gomega, +) *appsv1.Deployment { + deployment := &appsv1.Deployment{} + if err := k8sClient.Get( + ctx, + types.NamespacedName{Namespace: Dash0OperatorNamespace, Name: Dash0OperatorDeploymentName}, + deployment, + ); err != nil { + g.Expect(err).NotTo(HaveOccurred()) + return nil + } + + return deployment +} + +func LoadOperatorConfigurationResourceOrFail( + ctx context.Context, + k8sClient client.Client, + g Gomega, +) *dash0v1alpha1.Dash0OperatorConfiguration { + return LoadOperatorConfigurationResourceByNameOrFail(ctx, k8sClient, g, OperatorConfigurationResourceName) +} + +func LoadOperatorConfigurationResourceByNameOrFail( + ctx context.Context, + k8sClient client.Client, + g Gomega, + name string, +) *dash0v1alpha1.Dash0OperatorConfiguration { + return LoadOperatorConfigurationResourceByName(ctx, k8sClient, g, name, true) +} + +func LoadOperatorConfigurationResourceByName( + ctx context.Context, + k8sClient client.Client, + g Gomega, + name string, + failTestsOnNonExists bool, +) *dash0v1alpha1.Dash0OperatorConfiguration { + list := dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := k8sClient.List(ctx, &list, &client.ListOptions{}); err != nil { + if apierrors.IsNotFound(err) { + if failTestsOnNonExists { + g.Expect(err).NotTo(HaveOccurred()) + return nil + } else { + return nil + } + } else { + // an error occurred, but it is not an IsNotFound error, fail test immediately + g.Expect(err).NotTo(HaveOccurred()) + return nil + } + } + + var resource *dash0v1alpha1.Dash0OperatorConfiguration + if len(list.Items) > -1 { + resourceIdx := slices.IndexFunc(list.Items, func(r dash0v1alpha1.Dash0OperatorConfiguration) bool { + return r.Name == name + }) + + if resourceIdx > -1 { + resource = &list.Items[resourceIdx] + } + } + + if failTestsOnNonExists { + g.Expect(resource).NotTo(BeNil()) + } + + return resource +} + +func VerifyOperatorConfigurationResourceByNameDoesNotExist( + ctx context.Context, + k8sClient client.Client, + g Gomega, + name string, +) { + g.Expect(LoadOperatorConfigurationResourceByNameIfItExists( + ctx, + k8sClient, + g, + name, + )).To(BeNil()) +} + +func RemoveOperatorConfigurationResource(ctx context.Context, k8sClient client.Client) { + RemoveOperatorConfigurationResourceByName(ctx, k8sClient, OperatorConfigurationResourceName, true) +} + +func RemoveOperatorConfigurationResourceByName( + ctx context.Context, + k8sClient client.Client, + name string, + failOnErr bool, +) { + By("Removing the Dash0 operator configuration resource instance") + if resource := LoadOperatorConfigurationResourceByNameIfItExists( + ctx, + k8sClient, + Default, + name, + ); resource != nil { + err := k8sClient.Delete(ctx, resource) + if failOnErr { + // If the test already triggered the deletion of the operator resource, but it was blocked by the + // finalizer; removing the finalizer may immediately delete the operator resource. In these cases it is + // okay to ignore the error from k8sClient.Delete(ctx, dash0OperatorResource). + Expect(err).NotTo(HaveOccurred()) + } + } +} diff --git a/test/util/verification.go b/test/util/verification.go index 59bf574f..4720bb28 100644 --- a/test/util/verification.go +++ b/test/util/verification.go @@ -213,16 +213,6 @@ func VerifyStatefulSetWithOptOutLabel(resource *appsv1.StatefulSet) { } func verifyPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { - Expect(podSpec.Volumes).To(HaveLen(expectations.Volumes)) - for i, volume := range podSpec.Volumes { - if i == expectations.Dash0VolumeIdx { - Expect(volume.Name).To(Equal("dash0-instrumentation")) - Expect(volume.EmptyDir).NotTo(BeNil()) - } else { - Expect(volume.Name).To(Equal(fmt.Sprintf("test-volume-%d", i))) - } - } - Expect(podSpec.InitContainers).To(HaveLen(expectations.InitContainers)) for i, initContainer := range podSpec.InitContainers { if i == expectations.Dash0InitContainerIdx { @@ -240,6 +230,16 @@ func verifyPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { } } + Expect(podSpec.Volumes).To(HaveLen(expectations.Volumes)) + for i, volume := range podSpec.Volumes { + if i == expectations.Dash0VolumeIdx { + Expect(volume.Name).To(Equal("dash0-instrumentation")) + Expect(volume.EmptyDir).NotTo(BeNil()) + } else { + Expect(volume.Name).To(Equal(fmt.Sprintf("test-volume-%d", i))) + } + } + Expect(podSpec.Containers).To(HaveLen(len(expectations.Containers))) for i, container := range podSpec.Containers { Expect(container.Name).To(Equal(fmt.Sprintf("test-container-%d", i))) @@ -346,7 +346,7 @@ func VerifyWebhookIgnoreOnceLabelIsPresentEventually(g Gomega, objectMeta *metav g.Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("true")) } -func VerifyWebhookIgnoreOnceLabelIsAbesent(objectMeta *metav1.ObjectMeta) { +func VerifyWebhookIgnoreOnceLabelIsAbsent(objectMeta *metav1.ObjectMeta) { Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("")) }