Skip to content

Commit

Permalink
add ruler up probing deployment
Browse files Browse the repository at this point in the history
Signed-off-by: Thibault Mange <[email protected]>
  • Loading branch information
thibaultmg committed Jul 17, 2023
1 parent ae32bae commit 65562fa
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 1 deletion.
16 changes: 16 additions & 0 deletions configuration/observatorium/queries-ruler.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
queries: [
{
name: 'rule-query-path-sli-1M-samples',
query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[1h])',
},
{
name: 'rule-query-path-sli-10M-samples',
query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[10h])',
},
{
name: 'rule-query-path-sli-100M-samples',
query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[100h])',
},
],
}
2 changes: 1 addition & 1 deletion configuration/observatorium/slo.go
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ func ObservatoriumSLOs(envName rhobsInstanceEnv, signal signal) []pyrrav1alpha1.
description: "API /query endpoint for rules evaluation is burning too much error budget for 100M samples, to guarantee latency SLOs.",
successOrErrorsExpr: "up_custom_query_duration_seconds_bucket{query=\"rule-query-path-sli-1M-samples\", namespace=\"" + upNS[envName] + "\", http_code=~\"^2..$\", le=\"120\"}",
totalExpr: "up_custom_query_duration_seconds_count{query=\"rule-query-path-sli-1M-samples\", namespace=\"" + upNS[envName] + "\", http_code=~\"^2..$\"}",
alertName: "APIMetricsRuleReadLatency100MErrorBudgetBurning",
alertName: "APIMetricsRulenReadLatency100MErrorBudgetBurning",
sloType: sloTypeLatency,
},
{
Expand Down
112 changes: 112 additions & 0 deletions resources/services/observatorium-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,118 @@ objects:
- configMap:
name: observatorium-observatorium-up
name: query-config
- apiVersion: v1
data:
queries.yaml: |-
"queries":
- "name": "rule-query-path-sli-1M-samples"
"query": "avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id=\"0fc2b00e-201b-4c17-b9f2-19d91adc4fd2\"}[1h])"
- "name": "rule-query-path-sli-10M-samples"
"query": "avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id=\"0fc2b00e-201b-4c17-b9f2-19d91adc4fd2\"}[10h])"
- "name": "rule-query-path-sli-100M-samples"
"query": "avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id=\"0fc2b00e-201b-4c17-b9f2-19d91adc4fd2\"}[100h])"
kind: ConfigMap
metadata:
annotations:
qontract.recycle: "true"
labels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: master-2022-03-24-098c31a
name: observatorium-observatorium-up-ruler
- apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: master-2022-03-24-098c31a
name: observatorium-observatorium-up-ruler
spec:
replicas: ${{UP_REPLICAS}}
selector:
matchLabels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
template:
metadata:
labels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: master-2022-03-24-098c31a
spec:
containers:
- args:
- --duration=0
- --log.level=debug
- --endpoint-type=metrics
- --queries-file=/etc/up/queries.yaml
- --endpoint-read=http://observatorium-ruler-query.${OBSERVATORIUM_METRICS_NAMESPACE}.svc:9090
image: quay.io/observatorium/up:master-2022-03-24-098c31a
name: observatorium-up
ports:
- containerPort: 8080
name: http
resources:
limits:
cpu: ${UP_CPU_LIMIT}
memory: ${UP_MEMORY_LIMIT}
requests:
cpu: ${UP_CPU_REQUEST}
memory: ${UP_MEMORY_REQUEST}
volumeMounts:
- mountPath: /etc/up/
name: query-config
readOnly: false
volumes:
- configMap:
name: observatorium-observatorium-up-ruler
name: query-config
- apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
app.kubernetes.io/version: master-2022-03-24-098c31a
name: observatorium-observatorium-up-ruler
spec:
ports:
- name: http
port: 8080
targetPort: 8080
selector:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
- apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
prometheus: app-sre
name: observatorium-up-ruler
spec:
endpoints:
- port: http
namespaceSelector:
matchNames: ${{NAMESPACES}}
selector:
matchLabels:
app.kubernetes.io/component: blackbox-prober
app.kubernetes.io/instance: observatorium
app.kubernetes.io/name: observatorium-up
app.kubernetes.io/part-of: observatorium
- apiVersion: v1
kind: Service
metadata:
Expand Down
57 changes: 57 additions & 0 deletions services/observatorium.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,59 @@ local obsctlReloader = (import 'github.com/rhobs/obsctl-reloader/jsonnet/lib/obs
},
},

upRuler:: up({
local cfg = self,
name: obs.config.name + '-' + cfg.commonLabels['app.kubernetes.io/name'] + '-' + 'ruler',
namespace: obs.config.namespaces.default,
commonLabels+:: obs.config.commonLabels,
version: 'master-2022-03-24-098c31a',
image: 'quay.io/observatorium/up:' + cfg.version,
replicas: 1,
endpointType: 'metrics',
readEndpoint: 'http://%s.%s.svc:9090' % [obs.thanos.rulerQuery.service.metadata.name, obs.config.namespaces.metrics],
queryConfig: (import '../configuration/observatorium/queries-ruler.libsonnet'),
serviceMonitor: true,
resources: {
requests: {
cpu: '${UP_CPU_REQUEST}',
memory: '${UP_MEMORY_REQUEST}',
},
limits: {
cpu: '${UP_CPU_LIMIT}',
memory: '${UP_MEMORY_LIMIT}',
},
},
}) {
deployment+: {
spec+: {
replicas: '${{UP_REPLICAS}}',
},
},
serviceMonitor+: {
metadata+: {
name: 'observatorium-up-ruler',
labels+: {
prometheus: 'app-sre',
'app.kubernetes.io/version':: 'hidden',
},
},
spec+: {
namespaceSelector: {
// NOTICE:
// When using the ${{PARAMETER_NAME}} syntax only a single parameter reference is allowed and leading/trailing characters are not permitted.
// The resulting value will be unquoted unless, after substitution is performed, the result is not a valid json object.
// If the result is not a valid json value, the resulting value will be quoted and treated as a standard string.
matchNames: '${{NAMESPACES}}',
},
},
},
configmap+: {
metadata+: {
annotations+: { 'qontract.recycle': 'true' },
},
},
},


avalanche:: {

Expand Down Expand Up @@ -717,6 +770,10 @@ local obsctlReloader = (import 'github.com/rhobs/obsctl-reloader/jsonnet/lib/obs
['observatorium-up-' + name]: obs.up[name]
for name in std.objectFields(obs.up)
if obs.up[name] != null
} + {
['observatorium-up-ruler-' + name]: obs.upRuler[name]
for name in std.objectFields(obs.upRuler)
if obs.upRuler[name] != null
} + {
['observatorium-cache-' + name]: obs.memcached[name]
for name in std.objectFields(obs.memcached)
Expand Down

0 comments on commit 65562fa

Please sign in to comment.