diff --git a/Dockerfile b/Dockerfile index ff7d2846..bc864b75 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,11 @@ WORKDIR /workspace # Copy the Go Modules manifests COPY go.mod go.mod COPY go.sum go.sum + +# This particular COPY needs to be executed before go mod download since it is referenced by a replace directive +# in go.mod. +COPY images/pkg/common/ images/pkg/common/ + # cache deps before building and copying source so that we don't need to re-download as much # and so that source changes don't invalidate our downloaded layer RUN go mod download diff --git a/cmd/main.go b/cmd/main.go index 16de20c0..87ef7dad 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -16,6 +16,7 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" "github.com/go-logr/logr" + otelmetric "go.opentelemetry.io/otel/metric" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" @@ -32,6 +33,7 @@ import ( k8swebhook "sigs.k8s.io/controller-runtime/pkg/webhook" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/images/pkg/common" "github.com/dash0hq/dash0-operator/internal/backendconnection" "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" "github.com/dash0hq/dash0-operator/internal/dash0/controller" @@ -76,11 +78,18 @@ const ( //nolint mandatoryEnvVarMissingMessageTemplate = "cannot start the Dash0 operator, the mandatory environment variable \"%s\" is missing" + + meterName = "dash0.operator.manager" ) var ( scheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") + + metricNamePrefix = fmt.Sprintf("%s.", meterName) + + meter otelmetric.Meter + otelShutdownFunctions []func(ctx context.Context) error ) func init() { @@ -158,6 +167,8 @@ func main() { TLSOpts: tlsOpts, }) + meter, otelShutdownFunctions = common.InitOTelSdk(ctx, meterName) + if err := startOperatorManager( ctx, metricsAddr, @@ -252,7 +263,7 @@ func startOperatorManager( developmentMode, ) - err = startDash0Controller(ctx, mgr, clientset, envVars, developmentMode) + err = startDash0Controllers(ctx, mgr, clientset, envVars, developmentMode) if err != nil { return err } @@ -270,6 +281,9 @@ func startOperatorManager( if err = mgr.Start(ctrl.SetupSignalHandler()); err != nil { return fmt.Errorf("unable to set up the signal handler: %w", err) } + // ^mgr.Start(...) blocks. It only returns when the manager is terminating. + + common.ShutDownOTelSdk(ctx, otelShutdownFunctions) return nil } @@ -354,7 +368,7 @@ func readOptionalPullPolicyFromEnvironmentVariable(envVarName string) corev1.Pul return "" } -func startDash0Controller( +func startDash0Controllers( ctx context.Context, mgr manager.Manager, clientset *kubernetes.Clientset, @@ -427,6 +441,11 @@ func startDash0Controller( if err := operatorConfigurationReconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to set up the operator configuration reconciler: %w", err) } + operatorConfigurationReconciler.InitializeSelfMonitoringMetrics( + meter, + metricNamePrefix, + &setupLog, + ) monitoringReconciler := &controller.Dash0Reconciler{ Client: k8sClient, @@ -436,10 +455,14 @@ func startDash0Controller( Images: images, OperatorNamespace: envVars.operatorNamespace, } - if err := monitoringReconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to set up the monitoring reconciler: %w", err) } + monitoringReconciler.InitializeSelfMonitoringMetrics( + meter, + metricNamePrefix, + &setupLog, + ) if os.Getenv("ENABLE_WEBHOOK") != "false" { if err := (&webhook.Handler{ diff --git a/go.mod b/go.mod index f5636884..d6b3a686 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,11 @@ module github.com/dash0hq/dash0-operator -go 1.22.0 +go 1.22.4 -toolchain go1.22.4 +toolchain go1.22.6 require ( + github.com/dash0hq/dash0-operator/images/pkg/common v0.0.0-00010101000000-000000000000 github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 github.com/google/uuid v1.6.0 @@ -21,12 +22,14 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect @@ -38,6 +41,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -52,18 +56,27 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/otel v1.29.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 // indirect + go.opentelemetry.io/otel/metric v1.29.0 // indirect + go.opentelemetry.io/otel/sdk v1.29.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.29.0 // indirect + go.opentelemetry.io/otel/trace v1.29.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/net v0.28.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sys v0.24.0 // indirect golang.org/x/term v0.23.0 // indirect golang.org/x/text v0.17.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.24.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd // indirect google.golang.org/grpc v1.65.0 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect @@ -75,3 +88,5 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) + +replace github.com/dash0hq/dash0-operator/images/pkg/common => ./images/pkg/common diff --git a/go.sum b/go.sum index 0c69df83..e105dbe6 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -16,8 +18,11 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= @@ -46,6 +51,8 @@ github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 h1:5iH8iuqE5apketRbSF github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -98,6 +105,22 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opentelemetry.io/collector/pdata v1.14.1 h1:wXZjtQA7Vy5HFqco+yA95ENyMQU5heBB1IxMHQf6mUk= go.opentelemetry.io/collector/pdata v1.14.1/go.mod h1:z1dTjwwtcoXxZx2/nkHysjxMeaxe9pEmYTEr4SMNIx8= +go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= +go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 h1:k6fQVDQexDE+3jG2SfCQjnHS7OamcP73YMoxEVq5B6k= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0/go.mod h1:t4BrYLHU450Zo9fnydWlIuswB1bm7rM8havDpWOJeDo= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 h1:xvhQxJ/C9+RTnAj5DpTg7LSM1vbbMTiXt7e9hsfqHNw= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0/go.mod h1:Fcvs2Bz1jkDM+Wf5/ozBGmi3tQ/c9zPKLnsipnfhGAo= +go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= +go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= +go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo= +go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok= +go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY= +go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ= +go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= +go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -117,8 +140,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -147,8 +170,10 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo= +google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd h1:6TEm2ZxXoQmFWFlt1vNxvVOa1Q0dXFQD1m/rYjXmS0E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml index c737fa05..37e4a9e0 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhook.yaml @@ -121,6 +121,14 @@ spec: - name: DASH0_DEVELOPMENT_MODE value: {{ .Values.operator.developmentMode | toString | quote }} {{- end }} + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid ports: - containerPort: 9443 name: webhook-server diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap index 9d25a089..831246ef 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment-and-webhook_test.yaml.snap @@ -56,6 +56,14 @@ deployment should match snapshot (default values): value: ghcr.io/dash0hq/configuration-reloader:0.0.0 - name: DASH0_FILELOG_OFFSET_SYNCH_IMAGE value: ghcr.io/dash0hq/filelog-offset-synch:0.0.0 + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid image: ghcr.io/dash0hq/operator-controller:0.0.0 livenessProbe: httpGet: diff --git a/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml index 14415dc8..a33ba4f4 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment-and-webhook_test.yaml @@ -302,8 +302,14 @@ tests: - equal: path: spec.template.spec.containers[0].env[7].value value: ghcr.io/dash0hq/filelog-offset-synch@sha256:4e8c25853217c7393dbd95e17fe2117bb31b39478bbea4479cc5e7c1257dda04 - - notExists: + - equal: path: spec.template.spec.containers[0].env[8].name + value: K8S_NODE_NAME + - equal: + path: spec.template.spec.containers[0].env[9].name + value: K8S_POD_UID + - notExists: + path: spec.template.spec.containers[0].env[10].name - it: webhook should have caBundle set documentSelector: diff --git a/images/configreloader/src/configreloader.go b/images/configreloader/src/configreloader.go index 627ac96e..b905f656 100644 --- a/images/configreloader/src/configreloader.go +++ b/images/configreloader/src/configreloader.go @@ -72,7 +72,7 @@ func main() { signal.Notify(shutdown, syscall.SIGTERM) meter, selfMonitoringShutdownFunctions := common.InitOTelSdk(ctx, meterName) - setUpSelfMonitoringMetrics(meter) + initializeSelfMonitoringMetrics(meter) go func() { for { @@ -234,7 +234,7 @@ func triggerConfigurationReload(collectorPid OTelColPid) error { return nil } -func setUpSelfMonitoringMetrics(meter otelmetric.Meter) { +func initializeSelfMonitoringMetrics(meter otelmetric.Meter) { var err error if configFilesChangesMetric, err = meter.Int64Counter( diff --git a/images/filelogoffsetsynch/src/filelogoffsetsynch.go b/images/filelogoffsetsynch/src/filelogoffsetsynch.go index 204fceb3..4aba1954 100644 --- a/images/filelogoffsetsynch/src/filelogoffsetsynch.go +++ b/images/filelogoffsetsynch/src/filelogoffsetsynch.go @@ -109,7 +109,7 @@ func main() { } meter, selfMonitoringShutdownFunctions := common.InitOTelSdk(ctx, meterName) - setUpSelfMonitoringMetrics(meter) + initializeSelfMonitoringMetrics(meter) // creates the clientset clientset, err := kubernetes.NewForConfig(config) @@ -393,7 +393,7 @@ func tarFile(writer *tar.Writer, path string, info os.FileInfo) (HasAddedFileToA return true, nil } -func setUpSelfMonitoringMetrics(meter otelmetric.Meter) { +func initializeSelfMonitoringMetrics(meter otelmetric.Meter) { var err error if updateSizeMetric, err = meter.Int64Gauge( diff --git a/images/pkg/common/otel.go b/images/pkg/common/otel.go index f5f4d28c..0979f1d2 100644 --- a/images/pkg/common/otel.go +++ b/images/pkg/common/otel.go @@ -80,6 +80,8 @@ func InitOTelSdk( // Actually, the trace SDK does that correctly, but the metric SDK does not. // - https://github.com/open-telemetry/opentelemetry-go/blob/932a4d8a5f2536645618d7aee8e5da6b8e3b6751/sdk/trace/provider.go#L353 // - https://github.com/open-telemetry/opentelemetry-go/blob/932a4d8a5f2536645618d7aee8e5da6b8e3b6751/sdk/metric/config.go#L106 + // + // Reported here: https://github.com/open-telemetry/opentelemetry-go/issues/5764 finalResource, err := resource.Merge(resource.Environment(), resourceWithPodAndNode) if err != nil { log.Fatalf("Cannot merge the OpenTelemetry resource: %v", err) diff --git a/internal/dash0/controller/dash0_controller.go b/internal/dash0/controller/dash0_controller.go index 062b48dd..aa1296ad 100644 --- a/internal/dash0/controller/dash0_controller.go +++ b/internal/dash0/controller/dash0_controller.go @@ -10,6 +10,7 @@ import ( "time" "github.com/go-logr/logr" + otelmetric "go.opentelemetry.io/otel/metric" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -49,6 +50,8 @@ var ( Jitter: 0.3, }, } + + monitoringReconcileRequestMetric otelmetric.Int64Counter ) func (r *Dash0Reconciler) SetupWithManager(mgr ctrl.Manager) error { @@ -60,6 +63,22 @@ func (r *Dash0Reconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } +func (r *Dash0Reconciler) InitializeSelfMonitoringMetrics( + meter otelmetric.Meter, + metricNamePrefix string, + logger *logr.Logger, +) { + reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "monitoring.reconcile_requests") + var err error + if monitoringReconcileRequestMetric, err = meter.Int64Counter( + reconcileRequestMetricName, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter for monitoring resource reconcile requests"), + ); err != nil { + logger.Error(err, "Cannot initialize the metric %s.") + } +} + // The following markers are used to generate the rules permissions (RBAC) on config/rbac using controller-gen // when the command is executed. // To know more about markers see: https://book.kubebuilder.io/reference/markers.html @@ -84,6 +103,10 @@ func (r *Dash0Reconciler) SetupWithManager(mgr ctrl.Manager) error { // - About Controllers: https://kubernetes.io/docs/concepts/architecture/controller/ // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + if monitoringReconcileRequestMetric != nil { + monitoringReconcileRequestMetric.Add(ctx, 1) + } + logger := log.FromContext(ctx) logger.Info("processing reconcile request for Dash0 monitoring resource") diff --git a/internal/dash0/controller/operator_configuration_controller.go b/internal/dash0/controller/operator_configuration_controller.go index d24ca419..0cf0cece 100644 --- a/internal/dash0/controller/operator_configuration_controller.go +++ b/internal/dash0/controller/operator_configuration_controller.go @@ -8,6 +8,8 @@ import ( "fmt" "reflect" + "github.com/go-logr/logr" + otelmetric "go.opentelemetry.io/otel/metric" appsv1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" @@ -21,12 +23,6 @@ import ( "github.com/dash0hq/dash0-operator/internal/dash0/util" ) -const ( - ManagerContainerName = "manager" - updateStatusFailedMessageOperatorConfiguration = "Failed to update Dash0 operator configuration status " + - "conditions, requeuing reconcile request." -) - type OperatorConfigurationReconciler struct { client.Client Clientset *kubernetes.Clientset @@ -38,6 +34,16 @@ type OperatorConfigurationReconciler struct { DevelopmentMode bool } +const ( + ManagerContainerName = "manager" + updateStatusFailedMessageOperatorConfiguration = "Failed to update Dash0 operator configuration status " + + "conditions, requeuing reconcile request." +) + +var ( + operatorReconcileRequestMetric otelmetric.Int64Counter +) + func (r *OperatorConfigurationReconciler) SetupWithManager(mgr ctrl.Manager) error { if r.DanglingEventsTimeouts == nil { r.DanglingEventsTimeouts = defaultDanglingEventsTimeouts @@ -48,6 +54,22 @@ func (r *OperatorConfigurationReconciler) SetupWithManager(mgr ctrl.Manager) err Complete(r) } +func (r *OperatorConfigurationReconciler) InitializeSelfMonitoringMetrics( + meter otelmetric.Meter, + metricNamePrefix string, + logger *logr.Logger, +) { + reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "operatorconfiguration.reconcile_requests") + var err error + if operatorReconcileRequestMetric, err = meter.Int64Counter( + reconcileRequestMetricName, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter for operator configuration resource reconcile requests"), + ); err != nil { + logger.Error(err, "Cannot initialize the metric %s.") + } +} + // The following markers are used to generate the rules permissions (RBAC) on config/rbac using controller-gen // when the command is executed. // To know more about markers see: https://book.kubebuilder.io/reference/markers.html @@ -70,6 +92,10 @@ func (r *OperatorConfigurationReconciler) SetupWithManager(mgr ctrl.Manager) err // - About Controllers: https://kubernetes.io/docs/concepts/architecture/controller/ // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + if operatorReconcileRequestMetric != nil { + operatorReconcileRequestMetric.Add(ctx, 1) + } + logger := log.FromContext(ctx) var resource *dash0v1alpha1.Dash0OperatorConfiguration