Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(metrics): collect cluster metrics #129

Merged
merged 3 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,6 @@ install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~
.PHONY: uninstall
uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) --wait=false -f -
sleep 1
$(KUSTOMIZE) build config/crd | $(KUBECTL) patch CustomResourceDefinition dash0monitorings.operator.dash0.com -p '{"metadata":{"finalizers":null}}' --type=merge

.PHONY: deploy-via-helm
deploy-via-helm: ## Deploy the controller via helm to the K8s cluster specified in ~/.kube/config.
Expand Down
75 changes: 68 additions & 7 deletions helm-chart/dash0-operator/templates/operator/cluster-roles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,12 @@ rules:
- apiGroups:
- apps
resources:
# Note: apps.daemonsets are also listed further up together with the other workload types in the apps API group, with
# fewer permissions. The declaration here extends that list of permissions to make sure we can also create and delete
# the daemonsets for the OTel collector instances that the Dash0 operator manages.
# Note: apps.daemonsets and app.deployments are also listed further up together with the other workload types in the
# apps API group, with fewer permissions. The declaration here extends that list of permissions to make sure we can
# also create and delete the daemonset and deployment for the OTel collector instance that the Dash0 operator
# manages.
- daemonsets
- deployments
verbs:
- create
- delete
Expand All @@ -179,9 +181,11 @@ rules:
- update
- watch

# Permissions required due to the fact that the operator needs to create a dedicated service account/cluster role/
# cluster role binding for the OTel collector and give it a set of permissions; which it can only do if holds these
# permissions itself.
# Permissions required due to the fact that the operator needs to create dedicated service accounts/cluster roles/
# cluster role bindings for the OTel collector daemonset/deployment and give it a set of permissions; which it can only
# do if holds these permissions itself.
#
# First, the permissions for the OpenTelemetry collector DaemonSet:
- apiGroups:
- ""
resources:
Expand All @@ -190,24 +194,81 @@ rules:
- nodes/stats
verbs:
- get
- watch
- list
- watch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- replicasets
verbs:
- get
- list
- watch
# Next, the permissions for the OpenTelemetry collector Deployment:
- apiGroups:
- ""
resources:
- events
- namespaces
- namespaces/status
- nodes
- nodes/spec
- pods
- pods/status
- replicationcontrollers
- replicationcontrollers/status
- resourcequotas
- services
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ cluster roles should match snapshot:
- apps
resources:
- daemonsets
- deployments
verbs:
- create
- delete
Expand All @@ -163,24 +164,80 @@ cluster roles should match snapshot:
- nodes/stats
verbs:
- get
- watch
- list
- watch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
- namespaces
- namespaces/status
- nodes
- nodes/spec
- pods
- pods/status
- replicationcontrollers
- replicationcontrollers/status
- resourcequotas
- services
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- get
- list
- watch
2: |
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
2 changes: 1 addition & 1 deletion images/collector/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.21-alpine AS builder
FROM golang:1.22-alpine AS builder

RUN apk add --update make git && apk cache clean

Expand Down
34 changes: 18 additions & 16 deletions images/collector/src/builder/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,34 @@ dist:
module: github.com/dash0hq/dash0-operator
name: dash0-operator-collector
description: OpenTelemetry collector managed by the Dash0 Kubernetes operator
otelcol_version: "0.106.1"
otelcol_version: "0.108.0"
version: "dash0"
output_path: dist

connectors:
- gomod: "go.opentelemetry.io/collector/connector/forwardconnector v0.106.1"
- gomod: "go.opentelemetry.io/collector/connector/forwardconnector v0.108.0"

extensions:
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage v0.107.0"

exporters:
- gomod: "go.opentelemetry.io/collector/exporter/debugexporter v0.106.1"
- gomod: "go.opentelemetry.io/collector/exporter/otlpexporter v0.106.1"
- gomod: "go.opentelemetry.io/collector/exporter/otlphttpexporter v0.106.1"
- gomod: "go.opentelemetry.io/collector/exporter/debugexporter v0.108.0"
- gomod: "go.opentelemetry.io/collector/exporter/otlpexporter v0.108.0"
- gomod: "go.opentelemetry.io/collector/exporter/otlphttpexporter v0.108.0"

receivers:
- gomod: "go.opentelemetry.io/collector/receiver/otlpreceiver v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.106.1"
- gomod: "go.opentelemetry.io/collector/receiver/otlpreceiver v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.108.0"

processors:
- gomod: "go.opentelemetry.io/collector/processor/batchprocessor v0.106.1"
- gomod: "go.opentelemetry.io/collector/processor/memorylimiterprocessor v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/attributesprocessor v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sattributesprocessor v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourceprocessor v0.106.1"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.106.1"
- gomod: "go.opentelemetry.io/collector/processor/batchprocessor v0.108.0"
- gomod: "go.opentelemetry.io/collector/processor/memorylimiterprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/attributesprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sattributesprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourceprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.108.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.108.0"
2 changes: 1 addition & 1 deletion images/collector/src/builder/otelcol-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.106.1
0.108.0
23 changes: 15 additions & 8 deletions internal/backendconnection/backendconnection_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ type BackendConnectionManager struct {
*otelcolresources.OTelColResourceManager
}

const (
failedToCreateMsg = "failed to create the OpenTelemetry collector instance, no telemetry will be reported to Dash0"
)

func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace(
ctx context.Context,
images util.Images,
Expand All @@ -47,7 +43,11 @@ func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOpera
)

if err != nil {
logger.Error(err, failedToCreateMsg)
logger.Error(
err,
"failed to create the one or more of the OpenTelemetry collector DaemonSet/Deployment resources, some or "+
"all telemetry will be missing",
)
return err
}

Expand Down Expand Up @@ -105,17 +105,24 @@ func (m *BackendConnectionManager) RemoveOpenTelemetryCollectorIfNoMonitoringRes

// Either there is no Dash0 monitoring resource left, or only one and that one is about to be deleted. Delete the
// backend connection.
logger.Info(fmt.Sprintf("Deleting the OpenTelemetry collector resources in the Dash0 operator namespace %s.", operatorNamespace))
logger.Info(
fmt.Sprintf(
"Deleting the OpenTelemetry collector Kuberenetes resources in the Dash0 operator namespace %s.",
operatorNamespace,
))

if err := m.OTelColResourceManager.DeleteResources(
if err = m.OTelColResourceManager.DeleteResources(
ctx,
operatorNamespace,
images,
dash0MonitoringResourceToBeDeleted,
selfMonitoringConfiguration,
&logger,
); err != nil {
logger.Error(err, "Failed to delete the OpenTelemetry collector resources, requeuing reconcile request.")
logger.Error(
err,
"Failed to delete the OpenTelemetry collector Kuberenetes resources, requeuing reconcile request.",
)
return err
}
return nil
Expand Down
Loading