Skip to content

Commit

Permalink
Update Telemeter metrics allow-list
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Pasquier <[email protected]>
  • Loading branch information
simonpasquier committed Oct 16, 2024
1 parent 62555bd commit e942112
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 7 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ resources/observability/grafana/observatorium-logs: format observability/grafana
$(JSONNET) -J "$(JSONNET_VENDOR_DIR)" -m resources/observability/grafana/observatorium-logs observability/grafana-obs-logs.jsonnet | $(XARGS) -I{} sh -c 'cat {} | $(GOJSONTOYAML) > {}.yaml' -- {}

.PHONY: whitelisted_metrics
whitelisted_metrics: $(GOJSONTOYAML) $(GOJQ)
whitelisted_metrics: $(GOJSONTOYAML) $(GOJQ) configuration/telemeter/metrics.json resources/services/telemeter-template.yaml

configuration/telemeter/metrics.json:
@echo ">>>>> Running whitelisted_metrics"
# Download the latest metrics file to extract the new added metrics.
# NOTE: Because old clusters could still send metrics the whitelisting is append only
Expand Down
15 changes: 13 additions & 2 deletions configuration/telemeter/metrics.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[
"{__name__=\":apiserver_v1_image_imports:sum\"}",
"{__name__=\"ALERTS\",alertstate=\"firing\",severity=~\"critical|warning|info|none\"}",
"{__name__=\"ALERTS\",alertstate=\"firing\"}",
"{__name__=\"acm_console_page_count:sum\", page=~\"overview-classic|overview-fleet|search|search-details|clusters|application|governance\"}",
"{__name__=\"acm_managed_cluster_info\"}",
Expand Down Expand Up @@ -73,6 +74,8 @@
"{__name__=\"cluster_installer\"}",
"{__name__=\"cluster_legacy_scheduler_policy\"}",
"{__name__=\"cluster_master_schedulable\"}",
"{__name__=\"cluster_name:hypershift_nodepools_available_replicas:sum\"}",
"{__name__=\"cluster_name:hypershift_nodepools_size:sum\"}",
"{__name__=\"cluster_operator_conditions\"}",
"{__name__=\"cluster_operator_up\"}",
"{__name__=\"cluster_version\"}",
Expand All @@ -88,6 +91,7 @@
"{__name__=\"count:up1\"}",
"{__name__=\"csv_abnormal\"}",
"{__name__=\"csv_succeeded\"}",
"{__name__=\"enabled:tempo_operator_tempostack_jaeger_ui:sum\",enabled=\"true|false\"}",
"{__name__=\"eo_es_cluster_management_state_info\"}",
"{__name__=\"eo_es_defined_delete_namespaces_total\"}",
"{__name__=\"eo_es_misconfigured_memory_resources_info\"}",
Expand Down Expand Up @@ -164,8 +168,6 @@
"{__name__=\"os_image_url_override:sum\"}",
"{__name__=\"platform:hypershift_hostedclusters:max\"}",
"{__name__=\"platform:hypershift_nodepools:max\"}",
"{__name__=\"cluster_name:hypershift_nodepools_size:sum\"}",
"{__name__=\"cluster_name:hypershift_nodepools_available_replicas:sum\"}",
"{__name__=\"pod:eo_es_shards_total:max\"}",
"{__name__=\"profile:cluster_monitoring_operator_collection_profile:max\"}",
"{__name__=\"rhacs:telemetry:rox_central_info\"}",
Expand All @@ -180,8 +182,17 @@
"{__name__=\"rhods_total_users\"}",
"{__name__=\"state:rhoam_critical_alerts:max\"}",
"{__name__=\"state:rhoam_warning_alerts:max\"}",
"{__name__=\"state:tempo_operator_tempostack_managed:sum\",state=~\"Managed|Unmanaged\"}",
"{__name__=\"status:upgrading:version:rhoam_state:max\"}",
"{__name__=\"subscription_sync_total\"}",
"{__name__=\"type:opentelemetry_collector_connectors:sum\",type=\"spanmetricsconnector|forwardconnector\"}",
"{__name__=\"type:opentelemetry_collector_exporters:sum\",type=\"debugexporter|loggingexporter|otlpexporter|otlphttpexporter|prometheusexporter|lokiexporter|kafkaexporter|awscloudwatchlogsexporter|loadbalancingexporter\"}",
"{__name__=\"type:opentelemetry_collector_extensions:sum\",type=\"zpagesextension|ballastextension|memorylimiterextension|jaegerremotesampling|healthcheckextension|pprofextension|oauth2clientauthextension|oidcauthextension|bearertokenauthextension|filestorage\"}",
"{__name__=\"type:opentelemetry_collector_info:sum\",type=\"deployment|daemonset|sidecar|statefulset\"}",
"{__name__=\"type:opentelemetry_collector_processors:sum\",type=\"batchprocessor|memorylimiterprocessor|attributesprocessor|resourceprocessor|spanprocessor|k8sattributesprocessor|resourcedetectionprocessor|filterprocessor|routingprocessor|cumulativetodeltaprocessor|groupbyattrsprocessor\"}",
"{__name__=\"type:opentelemetry_collector_receivers:sum\",type=\"jaegerreceiver|hostmetricsreceiver|opencensusreceiver|prometheusreceiver|zipkinreceiver|kafkareceiver|filelogreceiver|journaldreceiver|k8seventsreceiver|kubeletstatsreceiver|k8sclusterreceiver|k8sobjectsreceiver\"}",
"{__name__=\"type:tempo_operator_tempostack_multi_tenancy:sum\",type=~\"enabled|disabled\"}",
"{__name__=\"type:tempo_operator_tempostack_storage_backend:sum\",type=~\"azure|gcs|s3\"}",
"{__name__=\"up\"}",
"{__name__=\"visual_web_terminal_sessions_total\"}",
"{__name__=\"workload:cpu_usage_cores:sum\"}",
Expand Down
30 changes: 26 additions & 4 deletions resources/services/telemeter-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ objects:
- --memcached=memcached-1.memcached.${NAMESPACE}.svc.cluster.local:11211
- --memcached=memcached-2.memcached.${NAMESPACE}.svc.cluster.local:11211
- --whitelist={__name__=":apiserver_v1_image_imports:sum"}
- --whitelist={__name__="alerts",alertstate="firing",severity=~"critical|warning|info|none"}
- --whitelist={__name__="alerts",alertstate="firing"}
- --whitelist={__name__="acm_console_page_count:sum", page=~"overview-classic|overview-fleet|search|search-details|clusters|application|governance"}
- --whitelist={__name__="acm_managed_cluster_info"}
Expand Down Expand Up @@ -167,6 +168,8 @@ objects:
- --whitelist={__name__="cluster_installer"}
- --whitelist={__name__="cluster_legacy_scheduler_policy"}
- --whitelist={__name__="cluster_master_schedulable"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_available_replicas:sum"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_size:sum"}
- --whitelist={__name__="cluster_operator_conditions"}
- --whitelist={__name__="cluster_operator_up"}
- --whitelist={__name__="cluster_version"}
Expand All @@ -182,6 +185,7 @@ objects:
- --whitelist={__name__="count:up1"}
- --whitelist={__name__="csv_abnormal"}
- --whitelist={__name__="csv_succeeded"}
- --whitelist={__name__="enabled:tempo_operator_tempostack_jaeger_ui:sum",enabled="true|false"}
- --whitelist={__name__="eo_es_cluster_management_state_info"}
- --whitelist={__name__="eo_es_defined_delete_namespaces_total"}
- --whitelist={__name__="eo_es_misconfigured_memory_resources_info"}
Expand Down Expand Up @@ -258,8 +262,6 @@ objects:
- --whitelist={__name__="os_image_url_override:sum"}
- --whitelist={__name__="platform:hypershift_hostedclusters:max"}
- --whitelist={__name__="platform:hypershift_nodepools:max"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_size:sum"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_available_replicas:sum"}
- --whitelist={__name__="pod:eo_es_shards_total:max"}
- --whitelist={__name__="profile:cluster_monitoring_operator_collection_profile:max"}
- --whitelist={__name__="rhacs:telemetry:rox_central_info"}
Expand All @@ -274,8 +276,17 @@ objects:
- --whitelist={__name__="rhods_total_users"}
- --whitelist={__name__="state:rhoam_critical_alerts:max"}
- --whitelist={__name__="state:rhoam_warning_alerts:max"}
- --whitelist={__name__="state:tempo_operator_tempostack_managed:sum",state=~"Managed|Unmanaged"}
- --whitelist={__name__="status:upgrading:version:rhoam_state:max"}
- --whitelist={__name__="subscription_sync_total"}
- --whitelist={__name__="type:opentelemetry_collector_connectors:sum",type="spanmetricsconnector|forwardconnector"}
- --whitelist={__name__="type:opentelemetry_collector_exporters:sum",type="debugexporter|loggingexporter|otlpexporter|otlphttpexporter|prometheusexporter|lokiexporter|kafkaexporter|awscloudwatchlogsexporter|loadbalancingexporter"}
- --whitelist={__name__="type:opentelemetry_collector_extensions:sum",type="zpagesextension|ballastextension|memorylimiterextension|jaegerremotesampling|healthcheckextension|pprofextension|oauth2clientauthextension|oidcauthextension|bearertokenauthextension|filestorage"}
- --whitelist={__name__="type:opentelemetry_collector_info:sum",type="deployment|daemonset|sidecar|statefulset"}
- --whitelist={__name__="type:opentelemetry_collector_processors:sum",type="batchprocessor|memorylimiterprocessor|attributesprocessor|resourceprocessor|spanprocessor|k8sattributesprocessor|resourcedetectionprocessor|filterprocessor|routingprocessor|cumulativetodeltaprocessor|groupbyattrsprocessor"}
- --whitelist={__name__="type:opentelemetry_collector_receivers:sum",type="jaegerreceiver|hostmetricsreceiver|opencensusreceiver|prometheusreceiver|zipkinreceiver|kafkareceiver|filelogreceiver|journaldreceiver|k8seventsreceiver|kubeletstatsreceiver|k8sclusterreceiver|k8sobjectsreceiver"}
- --whitelist={__name__="type:tempo_operator_tempostack_multi_tenancy:sum",type=~"enabled|disabled"}
- --whitelist={__name__="type:tempo_operator_tempostack_storage_backend:sum",type=~"azure|gcs|s3"}
- --whitelist={__name__="up"}
- --whitelist={__name__="visual_web_terminal_sessions_total"}
- --whitelist={__name__="workload:cpu_usage_cores:sum"}
Expand Down Expand Up @@ -379,6 +390,7 @@ objects:
- --memcached=memcached-1.memcached.${NAMESPACE}.svc.cluster.local:11211
- --memcached=memcached-2.memcached.${NAMESPACE}.svc.cluster.local:11211
- --whitelist={__name__=":apiserver_v1_image_imports:sum"}
- --whitelist={__name__="alerts",alertstate="firing",severity=~"critical|warning|info|none"}
- --whitelist={__name__="alerts",alertstate="firing"}
- --whitelist={__name__="acm_console_page_count:sum", page=~"overview-classic|overview-fleet|search|search-details|clusters|application|governance"}
- --whitelist={__name__="acm_managed_cluster_info"}
Expand Down Expand Up @@ -452,6 +464,8 @@ objects:
- --whitelist={__name__="cluster_installer"}
- --whitelist={__name__="cluster_legacy_scheduler_policy"}
- --whitelist={__name__="cluster_master_schedulable"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_available_replicas:sum"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_size:sum"}
- --whitelist={__name__="cluster_operator_conditions"}
- --whitelist={__name__="cluster_operator_up"}
- --whitelist={__name__="cluster_version"}
Expand All @@ -467,6 +481,7 @@ objects:
- --whitelist={__name__="count:up1"}
- --whitelist={__name__="csv_abnormal"}
- --whitelist={__name__="csv_succeeded"}
- --whitelist={__name__="enabled:tempo_operator_tempostack_jaeger_ui:sum",enabled="true|false"}
- --whitelist={__name__="eo_es_cluster_management_state_info"}
- --whitelist={__name__="eo_es_defined_delete_namespaces_total"}
- --whitelist={__name__="eo_es_misconfigured_memory_resources_info"}
Expand Down Expand Up @@ -543,8 +558,6 @@ objects:
- --whitelist={__name__="os_image_url_override:sum"}
- --whitelist={__name__="platform:hypershift_hostedclusters:max"}
- --whitelist={__name__="platform:hypershift_nodepools:max"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_size:sum"}
- --whitelist={__name__="cluster_name:hypershift_nodepools_available_replicas:sum"}
- --whitelist={__name__="pod:eo_es_shards_total:max"}
- --whitelist={__name__="profile:cluster_monitoring_operator_collection_profile:max"}
- --whitelist={__name__="rhacs:telemetry:rox_central_info"}
Expand All @@ -559,8 +572,17 @@ objects:
- --whitelist={__name__="rhods_total_users"}
- --whitelist={__name__="state:rhoam_critical_alerts:max"}
- --whitelist={__name__="state:rhoam_warning_alerts:max"}
- --whitelist={__name__="state:tempo_operator_tempostack_managed:sum",state=~"Managed|Unmanaged"}
- --whitelist={__name__="status:upgrading:version:rhoam_state:max"}
- --whitelist={__name__="subscription_sync_total"}
- --whitelist={__name__="type:opentelemetry_collector_connectors:sum",type="spanmetricsconnector|forwardconnector"}
- --whitelist={__name__="type:opentelemetry_collector_exporters:sum",type="debugexporter|loggingexporter|otlpexporter|otlphttpexporter|prometheusexporter|lokiexporter|kafkaexporter|awscloudwatchlogsexporter|loadbalancingexporter"}
- --whitelist={__name__="type:opentelemetry_collector_extensions:sum",type="zpagesextension|ballastextension|memorylimiterextension|jaegerremotesampling|healthcheckextension|pprofextension|oauth2clientauthextension|oidcauthextension|bearertokenauthextension|filestorage"}
- --whitelist={__name__="type:opentelemetry_collector_info:sum",type="deployment|daemonset|sidecar|statefulset"}
- --whitelist={__name__="type:opentelemetry_collector_processors:sum",type="batchprocessor|memorylimiterprocessor|attributesprocessor|resourceprocessor|spanprocessor|k8sattributesprocessor|resourcedetectionprocessor|filterprocessor|routingprocessor|cumulativetodeltaprocessor|groupbyattrsprocessor"}
- --whitelist={__name__="type:opentelemetry_collector_receivers:sum",type="jaegerreceiver|hostmetricsreceiver|opencensusreceiver|prometheusreceiver|zipkinreceiver|kafkareceiver|filelogreceiver|journaldreceiver|k8seventsreceiver|kubeletstatsreceiver|k8sclusterreceiver|k8sobjectsreceiver"}
- --whitelist={__name__="type:tempo_operator_tempostack_multi_tenancy:sum",type=~"enabled|disabled"}
- --whitelist={__name__="type:tempo_operator_tempostack_storage_backend:sum",type=~"azure|gcs|s3"}
- --whitelist={__name__="up"}
- --whitelist={__name__="visual_web_terminal_sessions_total"}
- --whitelist={__name__="workload:cpu_usage_cores:sum"}
Expand Down

0 comments on commit e942112

Please sign in to comment.