Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(prometheus): watch Prometheus ScrapeConfig CRD & CRs #153

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/go-logr/logr"
persesv1alpha1 "github.com/perses/perses-operator/api/v1alpha1"
prometheusoperator "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
semconv "go.opentelemetry.io/collector/semconv/v1.27.0"
otelmetric "go.opentelemetry.io/otel/metric"
appsv1 "k8s.io/api/apps/v1"
Expand All @@ -26,6 +27,7 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -93,6 +95,8 @@ var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")

kubernetesApiServerRestConfig *rest.Config

startupTasksK8sClient client.Client
deploymentSelfReference *appsv1.Deployment
envVars environmentVariables
Expand All @@ -108,6 +112,7 @@ func init() {
// for perses dashboard controller, prometheus scrape config controller etc.
utilruntime.Must(apiextensionsv1.AddToScheme(scheme))
utilruntime.Must(persesv1alpha1.AddToScheme(scheme))
utilruntime.Must(prometheusoperator.AddToScheme(scheme))
}

func main() {
Expand Down Expand Up @@ -196,6 +201,7 @@ func main() {
TLSOpts: tlsOpts,
})

kubernetesApiServerRestConfig = ctrl.GetConfigOrDie()
var err error
if err = readEnvironmentVariables(); err != nil {
os.Exit(1)
Expand Down Expand Up @@ -263,7 +269,7 @@ func startOperatorManager(
operatorConfiguration *startup.OperatorConfigurationValues,
developmentMode bool,
) error {
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgr, err := ctrl.NewManager(kubernetesApiServerRestConfig, ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: metricsAddr,
Expand Down Expand Up @@ -502,11 +508,11 @@ func startDash0Controllers(
OTelColResourceSpecs: oTelColResourceSpecs,
DevelopmentMode: developmentMode,
}
backendConnectionManager := &backendconnection.BackendConnectionManager{
Client: k8sClient,
Clientset: clientset,
OTelColResourceManager: oTelColResourceManager,
}
backendConnectionManager := backendconnection.NewBackendConnectionManager(
k8sClient,
clientset,
oTelColResourceManager,
)
backendConnectionReconciler := &backendconnection.BackendConnectionReconciler{
Client: k8sClient,
BackendConnectionManager: backendConnectionManager,
Expand Down Expand Up @@ -566,6 +572,17 @@ func startDash0Controllers(
&setupLog,
)

prometheusScrapeConfigReconciler := &controller.PrometheusScrapeConfigCrdReconciler{
Client: k8sClient,
Clientset: clientset,
BackendConnectionReconciler: backendConnectionReconciler,
Scheme: mgr.GetScheme(),
}
if err := prometheusScrapeConfigReconciler.
SetupWithManager(ctx, mgr, startupTasksK8sClient, &setupLog); err != nil {
return fmt.Errorf("unable to set up the prometheus scrapeconfig reconciler: %w", err)
}

if err := (&webhooks.InstrumentationWebhookHandler{
Client: k8sClient,
Recorder: mgr.GetEventRecorderFor("dash0-instrumentation-webhook"),
Expand All @@ -590,9 +607,8 @@ func startDash0Controllers(
}

func initStartupTasksK8sClient(logger *logr.Logger) error {
cfg := ctrl.GetConfigOrDie()
var err error
if startupTasksK8sClient, err = client.New(cfg, client.Options{
if startupTasksK8sClient, err = client.New(kubernetesApiServerRestConfig, client.Options{
Scheme: scheme,
}); err != nil {
logger.Error(err, "failed to create Kubernetes API client for startup tasks")
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ rules:
- delete
- get
- list
- apiGroups:
- monitoring.coreos.com
resources:
- scrapeconfigs
verbs:
- get
- list
- watch
- apiGroups:
- operator.dash0.com
resources:
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ require (
github.com/nexucis/lamenv v0.5.2 // indirect
github.com/perses/common v0.23.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.2 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.2 h1:BpGDC87A2SaxbKgONsFLEX3kRcRJee2aLQbjXsuz0hA=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.2/go.mod h1:Rd8YnCqz+2FYsiGmE2DMlaLjQRB4v2jFNnzCt9YY4IM=
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
Expand Down
10 changes: 10 additions & 0 deletions helm-chart/dash0-operator/templates/operator/cluster-roles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@ rules:
- get
- list

# Permissions required to watch for the Prometheus ScrapeConfigs.
- apiGroups:
- monitoring.coreos.com
resources:
- scrapeconfigs
verbs:
- get
- list
- watch

# Permissions required to watch for the Perses dashboard resources.
- apiGroups:
- perses.dev
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ cluster roles should match snapshot:
- delete
- get
- list
- apiGroups:
- monitoring.coreos.com
resources:
- scrapeconfigs
verbs:
- get
- list
- watch
- apiGroups:
- perses.dev
resources:
Expand Down
1 change: 1 addition & 0 deletions images/collector/src/builder/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ receivers:
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.111.0"

processors:
- gomod: "go.opentelemetry.io/collector/processor/batchprocessor v0.111.0"
Expand Down
29 changes: 21 additions & 8 deletions internal/backendconnection/backend_connection_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"slices"

"github.com/go-logr/logr"
prometheusoperator "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
Expand Down Expand Up @@ -35,7 +36,7 @@ type BackendConnectionReconciler struct {

func (r *BackendConnectionReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
Named("dash0backendconnectioncontroller").
Named("dash0_backend_connection_controller").
Watches(
&corev1.ConfigMap{},
&handler.EnqueueRequestForObject{},
Expand Down Expand Up @@ -81,10 +82,10 @@ func (r *BackendConnectionReconciler) SetupWithManager(mgr ctrl.Manager) error {
}

func (r *BackendConnectionReconciler) withNamePredicate(resourceNames []string) builder.Predicates {
return builder.WithPredicates(r.createFilterPredicate(resourceNames))
return builder.WithPredicates(r.makeFilterPredicate(resourceNames))
}

func (r *BackendConnectionReconciler) createFilterPredicate(resourceNames []string) predicate.Funcs {
func (r *BackendConnectionReconciler) makeFilterPredicate(resourceNames []string) predicate.Funcs {
resourceNamespace := r.OperatorNamespace
return predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
Expand Down Expand Up @@ -135,11 +136,7 @@ func (r *BackendConnectionReconciler) Reconcile(
return reconcile.Result{}, err
}

logger.Info(
"successfully reconciled backend connection resources",
"request",
request,
)
logger.Info("successfully reconciled backend connection resources")

return reconcile.Result{}, nil
}
Expand Down Expand Up @@ -168,3 +165,19 @@ func (r *BackendConnectionReconciler) findArbitraryMonitoringResource(
// monitored namespaces.
return &allDash0MonitoringResouresInCluster.Items[0], nil
}

func (r *BackendConnectionReconciler) UpdatePrometheusScrapeConfigs(
ctx context.Context,
scrapeConfig *prometheusoperator.ScrapeConfig,
) {
r.BackendConnectionManager.UpdatePrometheusScrapeConfigs(scrapeConfig)
_, _ = r.Reconcile(ctx, reconcile.Request{})
}

func (r *BackendConnectionReconciler) DeletePrometheusScrapeConfigs(
ctx context.Context,
scrapeConfig *prometheusoperator.ScrapeConfig,
) {
r.BackendConnectionManager.DeletePrometheusScrapeConfigs(scrapeConfig)
_, _ = r.Reconcile(ctx, reconcile.Request{})
}
38 changes: 38 additions & 0 deletions internal/backendconnection/backendconnection_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ package backendconnection
import (
"context"
"fmt"
"sync"
"sync/atomic"

prometheusoperator "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
Expand All @@ -23,6 +25,8 @@ type BackendConnectionManager struct {
*otelcolresources.OTelColResourceManager
updateInProgress atomic.Bool
resourcesHaveBeenDeletedByOperator atomic.Bool
prometheusScrapeConfigsMutex sync.Mutex
prometheusScrapeConfigs map[string]*prometheusoperator.ScrapeConfig
}

type BackendConnectionReconcileTrigger string
Expand All @@ -32,6 +36,19 @@ const (
TriggeredByDash0Resource BackendConnectionReconcileTrigger = "resource"
)

func NewBackendConnectionManager(
k8sClient client.Client,
clientset *kubernetes.Clientset,
oTelColResourceManager *otelcolresources.OTelColResourceManager,
) *BackendConnectionManager {
return &BackendConnectionManager{
Client: k8sClient,
Clientset: clientset,
OTelColResourceManager: oTelColResourceManager,
prometheusScrapeConfigs: make(map[string]*prometheusoperator.ScrapeConfig),
}
}

func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOperatorNamespace(
ctx context.Context,
images util.Images,
Expand All @@ -40,6 +57,10 @@ func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOpera
trigger BackendConnectionReconcileTrigger,
) error {
logger := log.FromContext(ctx)

m.prometheusScrapeConfigsMutex.Lock()
defer m.prometheusScrapeConfigsMutex.Unlock()

if m.resourcesHaveBeenDeletedByOperator.Load() {
if trigger == TriggeredByWatchEvent {
if m.DevelopmentMode {
Expand Down Expand Up @@ -72,6 +93,7 @@ func (m *BackendConnectionManager) EnsureOpenTelemetryCollectorIsDeployedInOpera
operatorNamespace,
images,
monitoringResource,
m.prometheusScrapeConfigs,
&logger,
)

Expand Down Expand Up @@ -161,3 +183,19 @@ func (m *BackendConnectionManager) RemoveOpenTelemetryCollectorIfNoMonitoringRes
}
return nil
}

func (m *BackendConnectionManager) UpdatePrometheusScrapeConfigs(scrapeConfig *prometheusoperator.ScrapeConfig) {
m.prometheusScrapeConfigsMutex.Lock()
defer m.prometheusScrapeConfigsMutex.Unlock()
m.prometheusScrapeConfigs[scrapeConfigKey(scrapeConfig)] = scrapeConfig
}

func (m *BackendConnectionManager) DeletePrometheusScrapeConfigs(scrapeConfig *prometheusoperator.ScrapeConfig) {
m.prometheusScrapeConfigsMutex.Lock()
defer m.prometheusScrapeConfigsMutex.Unlock()
delete(m.prometheusScrapeConfigs, scrapeConfigKey(scrapeConfig))
}

func scrapeConfigKey(scrapeConfig *prometheusoperator.ScrapeConfig) string {
return fmt.Sprintf("%s_%s", scrapeConfig.Namespace, scrapeConfig.Name)
}
11 changes: 6 additions & 5 deletions internal/backendconnection/backendconnection_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ var _ = Describe("The backend connection manager", Ordered, func() {
OTelCollectorNamePrefix: OTelCollectorNamePrefixTest,
OTelColResourceSpecs: &otelcolresources.DefaultOTelColResourceSpecs,
}
manager = &BackendConnectionManager{
Client: k8sClient,
Clientset: clientset,
OTelColResourceManager: oTelColResourceManager,
}
manager = NewBackendConnectionManager(
k8sClient,
clientset,
oTelColResourceManager,
)

})

AfterEach(func() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func assembleCollectorConfigMap(
// logs will compound in case of log parsing errors
config.Namespace,
},
ScrapeConfigs: config.ScrapeConfigs,
DevelopmentMode: config.DevelopmentMode,
})
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ processors:

receivers:
k8s_cluster: {}
{{- if .ScrapeConfigs }}
prometheus:
config:
scrape_configs:
- job_name: otel-collector
scrape_interval: 5s
static_configs:
- targets: ['0.0.0.0:8888']
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
regex: "true"
action: keep
metric_relabel_configs:
- source_labels: [__name__]
regex: "(request_duration_seconds.*|response_duration_seconds.*)"
action: keep
{{- end }}

service:
extensions:
Expand All @@ -56,6 +76,9 @@ service:
metrics/downstream:
receivers:
- k8s_cluster
{{- if .ScrapeConfigs }}
- prometheus
{{- end }}
processors:
- memory_limiter
- resourcedetection
Expand Down
2 changes: 2 additions & 0 deletions internal/backendconnection/otelcolresources/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ type oTelColConfig struct {
Export dash0v1alpha1.Export
SelfMonitoringAndApiAccessConfiguration selfmonitoringapiaccess.SelfMonitoringAndApiAccessConfiguration
Images util.Images
ScrapeConfigs bool
DevelopmentMode bool
}

type collectorConfigurationTemplateValues struct {
Exporters []OtlpExporter
IgnoreLogsFromNamespaces []string
ScrapeConfigs bool
DevelopmentMode bool
}

Expand Down
Loading