[grafana-sampling] upgrade to grafana alloy (#3273)

* feat: upgrade to grafana alloy --------- Signed-off-by: Robbie Lankford <[email protected]>
grafana · Aug 16, 2024 · d243174 · d243174
1 parent 6a386f9
commit d243174
Show file tree

Hide file tree

Showing 22 changed files with 176 additions and 111 deletions.
diff --git a/charts/grafana-sampling/Chart.lock b/charts/grafana-sampling/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
-- name: grafana-agent
+- name: alloy
   repository: https://grafana.github.io/helm-charts
-  version: 0.36.0
-- name: grafana-agent
+  version: 0.6.0
+- name: alloy
   repository: https://grafana.github.io/helm-charts
-  version: 0.36.0
-digest: sha256:6d04a55dce2c09c4c250c6453e0d58f7280750bf04fce51027b4e235062413e5
-generated: "2024-03-11T15:41:30.921516-07:00"
+  version: 0.6.0
+digest: sha256:e9dbff0d3707c403c1fb645eb33920a2219cc3156358134537e89caf39c588a5
+generated: "2024-08-14T10:41:47.606272-07:00"
diff --git a/charts/grafana-sampling/Chart.yaml b/charts/grafana-sampling/Chart.yaml
@@ -2,17 +2,17 @@ apiVersion: v2
 name: grafana-sampling
 description: A Helm chart for a layered OTLP tail sampling and metrics generation pipeline.
 type: application
-version: 0.1.1
-appVersion: "v0.40.2"
+version: 1.0.0
+appVersion: "v1.3.0"
 sources:
-  - https://github.com/grafana/agent
+  - https://github.com/grafana/alloy
   - https://grafana.com/docs/grafana-cloud/monitor-applications/application-observability/setup/sampling/tail/
 dependencies:
-  - name: grafana-agent
-    version: 0.36.0
+  - name: alloy
+    version: 0.6.0
     repository: https://grafana.github.io/helm-charts
-    alias: grafana-agent-deployment
-  - name: grafana-agent
-    version: 0.36.0
+    alias: alloy-deployment
+  - name: alloy
+    version: 0.6.0
     repository: https://grafana.github.io/helm-charts
-    alias: grafana-agent-statefulset
+    alias: alloy-statefulset
diff --git a/charts/grafana-sampling/README.md b/charts/grafana-sampling/README.md
@@ -1,10 +1,37 @@
 # grafana-sampling
 
-![Version: 0.1.1](https://img.shields.io/badge/Version-0.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.40.2](https://img.shields.io/badge/AppVersion-v0.40.2-informational?style=flat-square)
+![Version: 1.0.0](https://img.shields.io/badge/Version-1.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.3.0](https://img.shields.io/badge/AppVersion-v1.3.0-informational?style=flat-square)
 
 A Helm chart for a layered OTLP tail sampling and metrics generation pipeline.
 
-This chart deploys the following architecture to your environment:
+## Breaking change announcements
+
+### **v1.0.0**
+
+Grafana Agent has been replaced with [Grafana Alloy](https://grafana.com/oss/alloy-opentelemetry-collector/)!
+
+These sections in your values file will need to be renamed:
+
+| Old                         | New                 | Purpose                                        |
+|-----------------------------|---------------------|------------------------------------------------|
+| `grafana-agent-deployment`  | `alloy-deployment`  | Settings for the Alloy load balancing instance |
+| `grafana-agent-statefulset` | `alloy-statefulset` | Settings for the Alloy tail sampling instance  |
+
+For example, if you have something like this:
+
+```yaml
+grafana-agent-statefulset:
+  agent:
+```
+
+you will need to change it to this:
+
+```yaml
+alloy-statefulset:
+  alloy:
+`````
+
+This chart deploys the following architecture to your environment (note the agents have been replaced with Alloy):
 ![Photo of sampling architecture](./sampling-architecture.png)
 
 Note: by default, only OTLP traces are accepted at the load balancing layer.
@@ -22,13 +49,13 @@ Use the following command to install the chart with the release name `my-release
 
 ```console
 helm install my-release grafana/grafana-sampling --values - <<EOF | less
-grafana-agent-statefulset:
-  agent:
+alloy-statefulset:
+  alloy:
     extraEnv:
       - name: GRAFANA_CLOUD_API_KEY
         value: <REQUIRED>
       - name: GRAFANA_CLOUD_PROMETHEUS_URL
-        value: <REQUIRED>
+        value: <REQUIRED> # This should include /api/prom/push uri
       - name: GRAFANA_CLOUD_PROMETHEUS_USERNAME
         value: <REQUIRED>
       - name: GRAFANA_CLOUD_TEMPO_ENDPOINT
@@ -62,61 +89,62 @@ A major chart version change indicates that there is an incompatible breaking ch
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| grafana-agent-deployment.agent.configMap.create | bool | `false` |  |
-| grafana-agent-deployment.agent.extraPorts[0].name | string | `"otlp-grpc"` |  |
-| grafana-agent-deployment.agent.extraPorts[0].port | int | `4317` |  |
-| grafana-agent-deployment.agent.extraPorts[0].protocol | string | `"TCP"` |  |
-| grafana-agent-deployment.agent.extraPorts[0].targetPort | int | `4317` |  |
-| grafana-agent-deployment.agent.extraPorts[1].name | string | `"otlp-http"` |  |
-| grafana-agent-deployment.agent.extraPorts[1].port | int | `4318` |  |
-| grafana-agent-deployment.agent.extraPorts[1].protocol | string | `"TCP"` |  |
-| grafana-agent-deployment.agent.extraPorts[1].targetPort | int | `4318` |  |
-| grafana-agent-deployment.agent.resources.requests.cpu | string | `"1"` |  |
-| grafana-agent-deployment.agent.resources.requests.memory | string | `"2G"` |  |
-| grafana-agent-deployment.controller.autoscaling.enabled | bool | `false` | Creates a HorizontalPodAutoscaler for controller type deployment. |
-| grafana-agent-deployment.controller.autoscaling.maxReplicas | int | `5` | The upper limit for the number of replicas to which the autoscaler can scale up. |
-| grafana-agent-deployment.controller.autoscaling.minReplicas | int | `2` | The lower limit for the number of replicas to which the autoscaler can scale down. |
-| grafana-agent-deployment.controller.autoscaling.targetCPUUtilizationPercentage | int | `0` | Average CPU utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetCPUUtilizationPercentage` to 0 will disable CPU scaling. |
-| grafana-agent-deployment.controller.autoscaling.targetMemoryUtilizationPercentage | int | `80` | Average Memory utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetMemoryUtilizationPercentage` to 0 will disable Memory scaling. |
-| grafana-agent-deployment.controller.replicas | int | `1` |  |
-| grafana-agent-deployment.controller.type | string | `"deployment"` |  |
-| grafana-agent-deployment.nameOverride | string | `"deployment"` | Do not change this. |
-| grafana-agent-statefulset.agent.configMap.create | bool | `false` |  |
-| grafana-agent-statefulset.agent.extraEnv[0].name | string | `"GRAFANA_CLOUD_API_KEY"` |  |
-| grafana-agent-statefulset.agent.extraEnv[0].value | string | `"<REQUIRED>"` |  |
-| grafana-agent-statefulset.agent.extraEnv[1].name | string | `"GRAFANA_CLOUD_PROMETHEUS_URL"` |  |
-| grafana-agent-statefulset.agent.extraEnv[1].value | string | `"<REQUIRED>"` |  |
-| grafana-agent-statefulset.agent.extraEnv[2].name | string | `"GRAFANA_CLOUD_PROMETHEUS_USERNAME"` |  |
-| grafana-agent-statefulset.agent.extraEnv[2].value | string | `"<REQUIRED>"` |  |
-| grafana-agent-statefulset.agent.extraEnv[3].name | string | `"GRAFANA_CLOUD_TEMPO_ENDPOINT"` |  |
-| grafana-agent-statefulset.agent.extraEnv[3].value | string | `"<REQUIRED>"` |  |
-| grafana-agent-statefulset.agent.extraEnv[4].name | string | `"GRAFANA_CLOUD_TEMPO_USERNAME"` |  |
-| grafana-agent-statefulset.agent.extraEnv[4].value | string | `"<REQUIRED>"` |  |
-| grafana-agent-statefulset.agent.extraEnv[5].name | string | `"POD_UID"` |  |
-| grafana-agent-statefulset.agent.extraEnv[5].valueFrom.fieldRef.apiVersion | string | `"v1"` |  |
-| grafana-agent-statefulset.agent.extraEnv[5].valueFrom.fieldRef.fieldPath | string | `"metadata.uid"` |  |
-| grafana-agent-statefulset.agent.extraPorts[0].name | string | `"otlp-grpc"` |  |
-| grafana-agent-statefulset.agent.extraPorts[0].port | int | `4317` |  |
-| grafana-agent-statefulset.agent.extraPorts[0].protocol | string | `"TCP"` |  |
-| grafana-agent-statefulset.agent.extraPorts[0].targetPort | int | `4317` |  |
-| grafana-agent-statefulset.agent.resources.requests.cpu | string | `"1"` |  |
-| grafana-agent-statefulset.agent.resources.requests.memory | string | `"2G"` |  |
-| grafana-agent-statefulset.controller.autoscaling.enabled | bool | `false` | Creates a HorizontalPodAutoscaler for controller type deployment. |
-| grafana-agent-statefulset.controller.autoscaling.maxReplicas | int | `5` | The upper limit for the number of replicas to which the autoscaler can scale up. |
-| grafana-agent-statefulset.controller.autoscaling.minReplicas | int | `2` | The lower limit for the number of replicas to which the autoscaler can scale down. |
-| grafana-agent-statefulset.controller.autoscaling.targetCPUUtilizationPercentage | int | `0` | Average CPU utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetCPUUtilizationPercentage` to 0 will disable CPU scaling. |
-| grafana-agent-statefulset.controller.autoscaling.targetMemoryUtilizationPercentage | int | `80` | Average Memory utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetMemoryUtilizationPercentage` to 0 will disable Memory scaling. |
-| grafana-agent-statefulset.controller.replicas | int | `1` |  |
-| grafana-agent-statefulset.controller.type | string | `"statefulset"` |  |
-| grafana-agent-statefulset.nameOverride | string | `"statefulset"` | Do not change this. |
-| grafana-agent-statefulset.rbac.create | bool | `false` |  |
-| grafana-agent-statefulset.service.clusterIP | string | `"None"` |  |
-| grafana-agent-statefulset.serviceAccount.create | bool | `false` |  |
+| alloy-deployment.alloy.configMap.create | bool | `false` |  |
+| alloy-deployment.alloy.extraPorts[0].name | string | `"otlp-grpc"` |  |
+| alloy-deployment.alloy.extraPorts[0].port | int | `4317` |  |
+| alloy-deployment.alloy.extraPorts[0].protocol | string | `"TCP"` |  |
+| alloy-deployment.alloy.extraPorts[0].targetPort | int | `4317` |  |
+| alloy-deployment.alloy.extraPorts[1].name | string | `"otlp-http"` |  |
+| alloy-deployment.alloy.extraPorts[1].port | int | `4318` |  |
+| alloy-deployment.alloy.extraPorts[1].protocol | string | `"TCP"` |  |
+| alloy-deployment.alloy.extraPorts[1].targetPort | int | `4318` |  |
+| alloy-deployment.alloy.resources.requests.cpu | string | `"1"` |  |
+| alloy-deployment.alloy.resources.requests.memory | string | `"2G"` |  |
+| alloy-deployment.controller.autoscaling.enabled | bool | `false` | Creates a HorizontalPodAutoscaler for controller type deployment. |
+| alloy-deployment.controller.autoscaling.maxReplicas | int | `5` | The upper limit for the number of replicas to which the autoscaler can scale up. |
+| alloy-deployment.controller.autoscaling.minReplicas | int | `2` | The lower limit for the number of replicas to which the autoscaler can scale down. |
+| alloy-deployment.controller.autoscaling.targetCPUUtilizationPercentage | int | `0` | Average CPU utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetCPUUtilizationPercentage` to 0 will disable CPU scaling. |
+| alloy-deployment.controller.autoscaling.targetMemoryUtilizationPercentage | int | `80` | Average Memory utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetMemoryUtilizationPercentage` to 0 will disable Memory scaling. |
+| alloy-deployment.controller.replicas | int | `1` |  |
+| alloy-deployment.controller.type | string | `"deployment"` |  |
+| alloy-deployment.nameOverride | string | `"deployment"` | Do not change this. |
+| alloy-statefulset.alloy.configMap.create | bool | `false` |  |
+| alloy-statefulset.alloy.extraEnv[0].name | string | `"GRAFANA_CLOUD_API_KEY"` |  |
+| alloy-statefulset.alloy.extraEnv[0].value | string | `"<REQUIRED>"` |  |
+| alloy-statefulset.alloy.extraEnv[1].name | string | `"GRAFANA_CLOUD_PROMETHEUS_URL"` |  |
+| alloy-statefulset.alloy.extraEnv[1].value | string | `"<REQUIRED>"` |  |
+| alloy-statefulset.alloy.extraEnv[2].name | string | `"GRAFANA_CLOUD_PROMETHEUS_USERNAME"` |  |
+| alloy-statefulset.alloy.extraEnv[2].value | string | `"<REQUIRED>"` |  |
+| alloy-statefulset.alloy.extraEnv[3].name | string | `"GRAFANA_CLOUD_TEMPO_ENDPOINT"` |  |
+| alloy-statefulset.alloy.extraEnv[3].value | string | `"<REQUIRED>"` |  |
+| alloy-statefulset.alloy.extraEnv[4].name | string | `"GRAFANA_CLOUD_TEMPO_USERNAME"` |  |
+| alloy-statefulset.alloy.extraEnv[4].value | string | `"<REQUIRED>"` |  |
+| alloy-statefulset.alloy.extraEnv[5].name | string | `"POD_UID"` |  |
+| alloy-statefulset.alloy.extraEnv[5].valueFrom.fieldRef.apiVersion | string | `"v1"` |  |
+| alloy-statefulset.alloy.extraEnv[5].valueFrom.fieldRef.fieldPath | string | `"metadata.uid"` |  |
+| alloy-statefulset.alloy.extraPorts[0].name | string | `"otlp-grpc"` |  |
+| alloy-statefulset.alloy.extraPorts[0].port | int | `4317` |  |
+| alloy-statefulset.alloy.extraPorts[0].protocol | string | `"TCP"` |  |
+| alloy-statefulset.alloy.extraPorts[0].targetPort | int | `4317` |  |
+| alloy-statefulset.alloy.resources.requests.cpu | string | `"1"` |  |
+| alloy-statefulset.alloy.resources.requests.memory | string | `"2G"` |  |
+| alloy-statefulset.controller.autoscaling.enabled | bool | `false` | Creates a HorizontalPodAutoscaler for controller type deployment. |
+| alloy-statefulset.controller.autoscaling.maxReplicas | int | `5` | The upper limit for the number of replicas to which the autoscaler can scale up. |
+| alloy-statefulset.controller.autoscaling.minReplicas | int | `2` | The lower limit for the number of replicas to which the autoscaler can scale down. |
+| alloy-statefulset.controller.autoscaling.targetCPUUtilizationPercentage | int | `0` | Average CPU utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetCPUUtilizationPercentage` to 0 will disable CPU scaling. |
+| alloy-statefulset.controller.autoscaling.targetMemoryUtilizationPercentage | int | `80` | Average Memory utilization across all relevant pods, a percentage of the requested value of the resource for the pods. Setting `targetMemoryUtilizationPercentage` to 0 will disable Memory scaling. |
+| alloy-statefulset.controller.replicas | int | `1` |  |
+| alloy-statefulset.controller.type | string | `"statefulset"` |  |
+| alloy-statefulset.nameOverride | string | `"statefulset"` | Do not change this. |
+| alloy-statefulset.rbac.create | bool | `false` |  |
+| alloy-statefulset.service.clusterIP | string | `"None"` |  |
+| alloy-statefulset.serviceAccount.create | bool | `false` |  |
 | metricsGeneration.dimensions | list | `["service.namespace","service.version","deployment.environment","k8s.cluster.name"]` | Additional dimensions to add to generated metrics. |
 | metricsGeneration.enabled | bool | `true` | Toggle generation of spanmetrics and servicegraph metrics. |
+| metricsGeneration.legacy | bool | `true` | Use legacy metric names that match those used by the Tempo metrics generator. |
 | sampling.decisionWait | string | `"15s"` | Wait time since the first span of a trace before making a sampling decision. |
 | sampling.enabled | bool | `true` | Toggle tail sampling. |
-| sampling.extraPolicies | string | A policy to sample long requests is added by default. | User-defined policies in river format. |
+| sampling.extraPolicies | string | A policy to sample long requests is added by default. | User-defined policies in alloy format. |
 | sampling.failedRequests.percentage | int | `50` | Percentage of failed requests to sample. |
 | sampling.failedRequests.sample | bool | `false` | Toggle sampling failed requests. |
 | sampling.successfulRequests.percentage | int | `10` | Percentage of successful requests to sample. |

diff --git a/charts/grafana-sampling/README.md.gotmpl b/charts/grafana-sampling/README.md.gotmpl
@@ -4,12 +4,38 @@
 
 {{ template "chart.description" . }}
 
-This chart deploys the following architecture to your environment:
+## Breaking change announcements
+
+### **v1.0.0**
+
+Grafana Agent has been replaced with [Grafana Alloy](https://grafana.com/oss/alloy-opentelemetry-collector/)!
+
+These sections in your values file will need to be renamed:
+
+| Old                         | New                 | Purpose                                        |
+|-----------------------------|---------------------|------------------------------------------------|
+| `grafana-agent-deployment`  | `alloy-deployment`  | Settings for the Alloy load balancing instance |
+| `grafana-agent-statefulset` | `alloy-statefulset` | Settings for the Alloy tail sampling instance  |
+
+For example, if you have something like this:
+
+```yaml
+grafana-agent-statefulset:
+  agent:
+```
+
+you will need to change it to this:
+
+```yaml
+alloy-statefulset:
+  alloy:
+`````
+
+This chart deploys the following architecture to your environment (note the agents have been replaced with Alloy):
 ![Photo of sampling architecture](./sampling-architecture.png)
 
 Note: by default, only OTLP traces are accepted at the load balancing layer.
 
-
 ## Chart Repo
 
 Add the following repo to use the chart:
@@ -23,13 +49,13 @@ Use the following command to install the chart with the release name `my-release
 
 ```console
 helm install my-release grafana/grafana-sampling --values - <<EOF | less
-grafana-agent-statefulset:
-  agent:
+alloy-statefulset:
+  alloy:
     extraEnv:
       - name: GRAFANA_CLOUD_API_KEY
         value: <REQUIRED>
       - name: GRAFANA_CLOUD_PROMETHEUS_URL
-        value: <REQUIRED>
+        value: <REQUIRED> # This should include /api/prom/push uri
       - name: GRAFANA_CLOUD_PROMETHEUS_USERNAME
         value: <REQUIRED>
       - name: GRAFANA_CLOUD_TEMPO_ENDPOINT

diff --git a/...plates/_agent_config_deployment.river.txt → ...plates/_alloy_config_deployment.alloy.txt b/...plates/_agent_config_deployment.river.txt → ...plates/_alloy_config_deployment.alloy.txt
@@ -1,4 +1,4 @@
-{{- define "agent.config.deployment" -}}
+{{- define "alloy.config.deployment" -}}
   {{- include "deployment.receiver.otlp" . }}
   {{- include "deployment.processor.batch" . }}
   {{- include "deployment.exporter.loadbalancing" . }}

diff --git a/...lates/_agent_config_statefulset.river.txt → ...lates/_alloy_config_statefulset.alloy.txt b/...lates/_agent_config_statefulset.river.txt → ...lates/_alloy_config_statefulset.alloy.txt
@@ -1,9 +1,11 @@
-{{- define "agent.config.statefulset" -}}
+{{- define "alloy.config.statefulset" -}}
   {{- include "statefulset.receiver.otlp" . }}
   {{- if .Values.metricsGeneration.enabled -}}
     {{- include "statefulset.connector.spanmetrics" . }}
     {{- include "statefulset.processor.transform.drop_unneeded_resource_attributes" . }}
-    {{- include "statefulset.processor.transform.use_grafana_metric_names" . }}
+    {{- if .Values.metricsGeneration.legacy -}}
+        {{- include "statefulset.processor.transform.use_grafana_metric_names" . }}
+    {{- end -}}
     {{- include "statefulset.processor.filter" . }}
     {{- include "statefulset.connector.servicegraph" . }}
     {{- include "statefulset.exporter.prometheus" . }}

diff --git a/charts/grafana-sampling/templates/_helpers.tpl b/charts/grafana-sampling/templates/_helpers.tpl
@@ -1,9 +1,9 @@
-{{/* use the release name as the serviceAccount name for deployment and statefulset agents */}}
-{{- define "grafana-agent.serviceAccountName" -}}
+{{/* use the release name as the serviceAccount name for deployment and statefulset collectors */}}
+{{- define "alloy.serviceAccountName" -}}
 {{- default .Release.Name }}
 {{- end }}
 
-{{/* Calculate name of image ID to use for "grafana-agent". */}}
-{{- define "grafana-agent.imageId" -}}
+{{/* Calculate name of image ID to use for "alloy". */}}
+{{- define "alloy.imageId" -}}
 {{- printf ":%s" .Chart.AppVersion }}
 {{- end }}
diff --git a/...g/templates/_otelcol_auth_basic.river.txt → ...g/templates/_otelcol_auth_basic.alloy.txt b/...g/templates/_otelcol_auth_basic.river.txt → ...g/templates/_otelcol_auth_basic.alloy.txt
@@ -1,6 +1,6 @@
 {{- define "auth.basic" -}}
 otelcol.auth.basic "grafana_cloud_tempo" {
-  // https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.auth.basic/
+  // https://grafana.com/docs/alloy/latest/reference/components/otelcol.auth.basic/
   username = env("GRAFANA_CLOUD_TEMPO_USERNAME")
   password = env("GRAFANA_CLOUD_API_KEY")
 }

diff --git a/..._otelcol_connector_servicegraph.river.txt → ..._otelcol_connector_servicegraph.alloy.txt b/..._otelcol_connector_servicegraph.river.txt → ..._otelcol_connector_servicegraph.alloy.txt
@@ -1,6 +1,6 @@
 {{- define "statefulset.connector.servicegraph" -}}
 otelcol.connector.servicegraph "default" {
-  // https://grafana.com/docs/agent/latest/flow/reference/components/otelcol.connector.servicegraph/
+  // https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.servicegraph/
   dimensions = [
     {{- range $.Values.metricsGeneration.dimensions }}
     {{ . | quote }},