From 0caac3b0dc9e9ec58448c92afb567af79ac2fd7d Mon Sep 17 00:00:00 2001 From: Philip Gough Date: Wed, 2 Aug 2023 15:17:04 +0100 Subject: [PATCH] Operations: Add tooling/docs/monitoring for thanos bucket inspect/replicate (#572) * operations: Add template to configure Thanos S3 secret from params * operations: Add template to run bucket inspect tool as a Job * operations: Add template to run bucket inspect tool as a CronJob * operations: Add docs for bucket inspect tool * operations: Add template to run bucket replicate tool as a Job * operations: Add PodMonitor template for replicate Job * operations: Add docs for bucket replicate tool --- resources/operations/bucket-inspect/README.md | 31 ++++++ .../bucket-inspect/cron-job-template.yaml | 62 ++++++++++++ .../bucket-inspect/job-template.yaml | 57 +++++++++++ .../bucket-inspect/s3-secret-template.yaml | 43 ++++++++ .../operations/bucket-replicate/README.md | 51 ++++++++++ .../bucket-replicate/job-template.yaml | 97 +++++++++++++++++++ .../bucket-replicate/monitoring-template.yaml | 33 +++++++ .../bucket-replicate/s3-secret-template.yaml | 43 ++++++++ 8 files changed, 417 insertions(+) create mode 100644 resources/operations/bucket-inspect/README.md create mode 100644 resources/operations/bucket-inspect/cron-job-template.yaml create mode 100644 resources/operations/bucket-inspect/job-template.yaml create mode 100644 resources/operations/bucket-inspect/s3-secret-template.yaml create mode 100644 resources/operations/bucket-replicate/README.md create mode 100644 resources/operations/bucket-replicate/job-template.yaml create mode 100644 resources/operations/bucket-replicate/monitoring-template.yaml create mode 100644 resources/operations/bucket-replicate/s3-secret-template.yaml diff --git a/resources/operations/bucket-inspect/README.md b/resources/operations/bucket-inspect/README.md new file mode 100644 index 0000000000..e582b71c4e --- /dev/null +++ b/resources/operations/bucket-inspect/README.md @@ -0,0 +1,31 @@ +# What + +This template deploys [Thanos Bucket Inspect](https://thanos.io/tip/components/tools.md/#bucket-insepct) +as a Kubernetes Job or CronJob. + +# SOP + +Create a Kubernetes Secret that contains the credentials for the target object storage provider, or use the +template provided in this directory for S3 compatible object storage providers. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: thanos-bucket-inspect-config +type: Opaque +stringData: + from-config.yaml: | + # see https://thanos.io/tip/thanos/storage.md/ +``` + +Process the template and run the Job + +```bash +oc process -f job-template.yaml | oc apply -f - +``` + +Alternatively, you can run it as a CronJob +```bash +oc process -f cron-job-template.yaml | oc apply -f - +``` diff --git a/resources/operations/bucket-inspect/cron-job-template.yaml b/resources/operations/bucket-inspect/cron-job-template.yaml new file mode 100644 index 0000000000..f070c2e7f1 --- /dev/null +++ b/resources/operations/bucket-inspect/cron-job-template.yaml @@ -0,0 +1,62 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: rhobs-thanos-bucket-inspect-cron + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium +description: | + Inspect data in an object storage provider bucket on a schedule +parameters: + - name: NAME + description: The name of the CronJob. + value: 'thanos-bucket-inspect' + - name: NAMESPACE + description: The namespace where the Job should run. + value: 'observatorium-operations' + - name: OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-inspect-config' + - name: SCHEDULE + description: The schedule for the Job to run. Defaults to every 12 hours. + value: '0 */12 * * *' + - name: TENANT_ID + value: 'rhobs' + - name: IMAGE_TAG + value: 'v0.31.0' + - name: LOG_LEVEL + value: 'info' +objects: + - apiVersion: batch/v1 + kind: CronJob + metadata: + name: ${NAME} + namespace: ${NAMESPACE} + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium + spec: + schedule: ${SCHEDULE} + jobTemplate: + spec: + template: + spec: + containers: + - name: thanos-bucket-inspect + image: quay.io/thanos/thanos:${IMAGE_TAG} + volumeMounts: + - name: obj-store-config + readOnly: true + mountPath: "/var/lib/thanos/bucket-inspect-config" + args: + - 'tools' + - 'bucket' + - 'inspect' + - '--log.level=${LOG_LEVEL}' + - '--objstore.config-file=/var/lib/thanos/bucket-inspect-config/config.yaml' + - '--selector=tenant_id="${TENANT_ID}"' + restartPolicy: Never + volumes: + - name: obj-store-config + secret: + secretName: ${OBJ_STORE_CONFIG_SECRET_NAME} + diff --git a/resources/operations/bucket-inspect/job-template.yaml b/resources/operations/bucket-inspect/job-template.yaml new file mode 100644 index 0000000000..363cd13112 --- /dev/null +++ b/resources/operations/bucket-inspect/job-template.yaml @@ -0,0 +1,57 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: rhobs-thanos-bucket-inspect + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium +description: | + Inspect data in an object storage provider bucket +parameters: + - name: NAME + description: The name of the Job. + value: 'thanos-bucket-inspect' + - name: NAMESPACE + description: The namespace where the Job should run. + value: 'observatorium-operations' + - name: OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-inspect-config' + - name: TENANT_ID + value: 'rhobs' + - name: IMAGE_TAG + value: 'v0.31.0' + - name: LOG_LEVEL + value: 'info' +objects: + - apiVersion: batch/v1 + kind: Job + metadata: + name: ${NAME} + namespace: ${NAMESPACE} + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium + spec: + backoffLimit: 4 + template: + spec: + containers: + - name: thanos-bucket-inspect + image: quay.io/thanos/thanos:${IMAGE_TAG} + volumeMounts: + - name: obj-store-config + readOnly: true + mountPath: "/var/lib/thanos/bucket-inspect-config" + args: + - 'tools' + - 'bucket' + - 'inspect' + - '--log.level=${LOG_LEVEL}' + - '--objstore.config-file=/var/lib/thanos/bucket-inspect-config/config.yaml' + - '--selector=tenant_id="${TENANT_ID}"' + restartPolicy: Never + volumes: + - name: obj-store-config + secret: + secretName: ${OBJ_STORE_CONFIG_SECRET_NAME} + diff --git a/resources/operations/bucket-inspect/s3-secret-template.yaml b/resources/operations/bucket-inspect/s3-secret-template.yaml new file mode 100644 index 0000000000..74d7ec5205 --- /dev/null +++ b/resources/operations/bucket-inspect/s3-secret-template.yaml @@ -0,0 +1,43 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: Thanos Bucket Inspect + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium +description: | + This template creates a Secret that supports Thanos Object Storage inspection for S3. +parameters: + - name: NAMESPACE + description: The namespace where the Secret will be created. + value: 'observatorium-operations' + - name: OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-inspect-config' + - name: ACCESS_KEY_ID + - name: SECRET_ACCESS_KEY + - name: S3_BUCKET_NAME + - name: S3_BUCKET_ENDPOINT + value: s3.us-east-1.amazonaws.com + - name: S3_BUCKET_REGION + value: us-east-1 +objects: + - apiVersion: v1 + kind: Secret + metadata: + name: ${OBJ_STORE_CONFIG_SECRET_NAME} + namespace: ${NAMESPACE} + labels: + app.kubernetes.io/name: thanos-bucket-inspect + app.kubernetes.io/part-of: observatorium + type: Opaque + stringData: + config.yaml: | + type: S3 + config: + bucket: ${S3_BUCKET_NAME} + region: ${S3_BUCKET_REGION} + access_key: ${ACCESS_KEY_ID} + secret_key: ${SECRET_ACCESS_KEY} + endpoint: ${S3_BUCKET_ENDPOINT} + + diff --git a/resources/operations/bucket-replicate/README.md b/resources/operations/bucket-replicate/README.md new file mode 100644 index 0000000000..081acd4fbc --- /dev/null +++ b/resources/operations/bucket-replicate/README.md @@ -0,0 +1,51 @@ +# What + +This template deploys [Thanos Bucket Inspect](https://thanos.io/tip/components/tools.md/#bucket-insepct) +as a Kubernetes Job. + +# SOP + +> **_NOTE:_** Before running this Job, if you wish to track progress via logs, +you can run the [Thanos Bucket Inspect](../bucket-inspect/README.md#sop) +Job against the source and the CronJob against the destination to make sure that the source and destination +are in sync. +Logs are extra useful if you don't have access to the Prometheus metrics or the Job will complete before a scrape. + +Create a Kubernetes Secret that contains the credentials for both the target and destination object storage +provider or use the template provided in this directory for S3 compatible object storage providers. + + +```yaml +--- +apiVersion: v1 +kind: Secret +metadata: + name: thanos-bucket-replicate-source-config +type: Opaque +stringData: + config.yaml: | + # see https://thanos.io/tip/thanos/storage.md/ +--- +apiVersion: v1 +kind: Secret +metadata: + name: thanos-bucket-replicate-destination-config +type: Opaque +stringData: + config.yaml: | + # see https://thanos.io/tip/thanos/storage.md/ +``` + +Optionally create the PodMonitor to scrape Prometheus metrics from the Job + +```bash +oc process -f monitoring-template.yaml | oc apply -f - +``` + +Process the template and run the Job + +```bash +oc process -f job-template.yaml | oc apply -f - +``` + + diff --git a/resources/operations/bucket-replicate/job-template.yaml b/resources/operations/bucket-replicate/job-template.yaml new file mode 100644 index 0000000000..245f2b0d4d --- /dev/null +++ b/resources/operations/bucket-replicate/job-template.yaml @@ -0,0 +1,97 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: rhobs-thanos-bucket-replicate + labels: + app.kubernetes.io/name: thanos-bucket-replicate + app.kubernetes.io/part-of: observatorium +description: | + Replicate data between object storage provider buckets +parameters: + - name: NAME + description: The name of the Job. + value: 'thanos-bucket-replicate' + - name: NAMESPACE + description: The namespace where the Job should run. + value: 'observatorium-operations' + - name: SOURCE_OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-replicate-config-source' + - name: DESTINATION_OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-replicate-config-destination' + - name: COMPACTION_MIN + value: '0' + - name: COMPACTION_MAX + value: '100' + - name: MIN_TIME + value: '0000-01-01T00:00:00Z' + - name: MAX_TIME + value: '9999-12-31T23:59:59Z' + - name: TENANT_ID + value: 'rhobs' + - name: IMAGE_TAG + value: 'main-2023-08-01-e1a3ec1' + - name: LOG_LEVEL + value: 'info' + - name: CPU_REQUEST + value: '1' + - name: CPU_LIMIT + value: '2' + - name: MEMORY_REQUEST + value: '500Mi' + - name: MEMORY_LIMIT + value: '1Gi' +objects: + - apiVersion: batch/v1 + kind: Job + metadata: + name: ${NAME} + namespace: ${NAMESPACE} + labels: + app.kubernetes.io/name: thanos-bucket-replicate + app.kubernetes.io/part-of: observatorium + spec: + parallelism: 1 + backoffLimit: 1 + template: + spec: + containers: + - name: thanos-bucket-replicate + image: quay.io/thanos/thanos:${IMAGE_TAG} + resources: + requests: + memory: ${MEMORY_REQUEST} + cpu: ${CPU_REQUEST} + limits: + memory: ${MEMORY_LIMIT} + cpu: ${CPU_LIMIT} + ports: + - containerPort: 10902 + name: metrics + volumeMounts: + - name: obj-store-from-config + readOnly: true + mountPath: "/var/lib/thanos/bucket-replicate-config/from" + - name: obj-store-to-config + readOnly: true + mountPath: "/var/lib/thanos/bucket-replicate-config/to" + args: + - 'tools' + - 'bucket' + - 'replicate' + - '--log.level=${LOG_LEVEL}' + - '--objstore.config-file=/var/lib/thanos/bucket-replicate-config/from/config.yaml' + - '--objstore-to.config-file=/var/lib/thanos/bucket-replicate-config/to/config.yaml' + - '--single-run' + - '--matcher=tenant_id="${TENANT_ID}"' + - '--min-time=${MIN_TIME}' + - '--max-time=${MAX_TIME}' + - '--compaction-min=${COMPACTION_MIN}' + - '--compaction-max=${COMPACTION_MAX}' + restartPolicy: Never + volumes: + - name: obj-store-from-config + secret: + secretName: ${SOURCE_OBJ_STORE_CONFIG_SECRET_NAME} + - name: obj-store-to-config + secret: + secretName: ${DESTINATION_OBJ_STORE_CONFIG_SECRET_NAME} diff --git a/resources/operations/bucket-replicate/monitoring-template.yaml b/resources/operations/bucket-replicate/monitoring-template.yaml new file mode 100644 index 0000000000..516f8c1203 --- /dev/null +++ b/resources/operations/bucket-replicate/monitoring-template.yaml @@ -0,0 +1,33 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: rhobs-thanos-bucket-replicate-pod-monitor + labels: + app.kubernetes.io/name: thanos-bucket-replicate + app.kubernetes.io/part-of: observatorium +parameters: + - name: NAMESPACE + description: The namespace where the running Job will reside. + value: 'observatorium-operations' + - name: NAME + description: The name of the Job. + value: 'thanos-bucket-replicate' +objects: + - apiVersion: monitoring.coreos.com/v1 + kind: PodMonitor + metadata: + name: observatorium-operations-thanos-bucket-replicate + labels: + prometheus: app-sre + spec: + namespaceSelector: + matchNames: + - ${NAMESPACE} + selector: + matchLabels: + job-name: ${NAME} + podMetricsEndpoints: + - port: metrics + interval: 30s + path: /metrics + diff --git a/resources/operations/bucket-replicate/s3-secret-template.yaml b/resources/operations/bucket-replicate/s3-secret-template.yaml new file mode 100644 index 0000000000..36313f8726 --- /dev/null +++ b/resources/operations/bucket-replicate/s3-secret-template.yaml @@ -0,0 +1,43 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: Thanos Bucket Inspect + labels: + app.kubernetes.io/name: thanos-bucket-replicate-secret + app.kubernetes.io/part-of: observatorium +description: | + This template creates a Secret that supports Thanos Object Storage for S3. +parameters: + - name: NAMESPACE + description: The namespace where the Secret will be created. + value: 'observatorium-operations' + - name: OBJ_STORE_CONFIG_SECRET_NAME + value: 'thanos-bucket-config' + - name: ACCESS_KEY_ID + - name: SECRET_ACCESS_KEY + - name: S3_BUCKET_NAME + - name: S3_BUCKET_ENDPOINT + value: s3.us-east-1.amazonaws.com + - name: S3_BUCKET_REGION + value: us-east-1 +objects: + - apiVersion: v1 + kind: Secret + metadata: + name: ${OBJ_STORE_CONFIG_SECRET_NAME} + namespace: ${NAMESPACE} + labels: + app.kubernetes.io/name: thanos-bucket-replicate-secret + app.kubernetes.io/part-of: observatorium + type: Opaque + stringData: + config.yaml: | + type: S3 + config: + bucket: ${S3_BUCKET_NAME} + region: ${S3_BUCKET_REGION} + access_key: ${ACCESS_KEY_ID} + secret_key: ${SECRET_ACCESS_KEY} + endpoint: ${S3_BUCKET_ENDPOINT} + +