Skip to content

Commit

Permalink
Add a dashboard for the rules objstore (#562)
Browse files Browse the repository at this point in the history
* Add a dashboard for the rules objstore

Signed-off-by: Douglas Camata <[email protected]>

* Sync loki rules to fix build

Signed-off-by: Douglas Camata <[email protected]>

* Sync obs logs crd template

Signed-off-by: Douglas Camata <[email protected]>

---------

Signed-off-by: Douglas Camata <[email protected]>
  • Loading branch information
douglascamata authored Jul 26, 2023
1 parent c3be781 commit d88b86a
Show file tree
Hide file tree
Showing 5 changed files with 662 additions and 1 deletion.
7 changes: 7 additions & 0 deletions crds/loki.grafana.com_recordingrules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@
description: 'The LogQL expression to evaluate. Every evaluation cycle this is evaluated at the current time, and all resultant time series become pending/firing alerts.',
type: 'string',
},
labels: {
additionalProperties: {
type: 'string',
},
description: 'Labels to add to each recording rule.',
type: 'object',
},
record: {
description: 'The name of the time series to output to. Must be a valid metric name.',
type: 'string',
Expand Down
110 changes: 110 additions & 0 deletions observability/dashboards/rules-objstore.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
local g = import 'github.com/thanos-io/thanos/mixin/lib/thanos-grafana-builder/builder.libsonnet';
local template = import 'grafonnet/template.libsonnet';
local config = (import '../config.libsonnet').thanos;

function() {
local intervalTemplate =
template.interval(
'interval',
'5m,10m,30m,1h,6h,12h,auto',
label='interval',
current='5m',
),
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(up{job=~"rules-objstore.*"}, namespace)',
label='namespace',
allValues='.+',
current='',
hide='',
refresh=2,
includeAll=false,
sort=1
),
local jobTemplate =
template.new(
name='job',
datasource='$datasource',
query='label_values(up{namespace="$namespace", job=~"rules-objstore.*"}, job)',
label='job',
allValues='.+',
current='',
hide='',
refresh=2,
includeAll=true,
sort=1
),
local dashboard =
g.dashboard('Rules Objstore Dashboard')
.addRow(
g.row('Validations')
.addPanel(
g.panel('Successful validations', 'Amount of success rule validations per tenant') +
g.queryPanel(
[
'sum by (tenant) (rate(rules_objstore_validations_total{namespace="$namespace", job=~"$job"}[$interval]))',
],
[
'{{tenant}}',
]
) { span:: 0 },
)
.addPanel(
g.panel('Failed validations', 'Amount of failed rule validations per tenant') +
g.queryPanel(
[
'sum by (tenant) (rate(rules_objstore_validations_failed_total{namespace="$namespace", job=~"$job"}[$interval]))',
],
[
'{{tenant}}',
]
) { span:: 0 },
)
)
.addRow(
g.row('Rules and rule groups')
.addPanel(
g.panel('Rule groups configured', 'Amount of rule groups configured per tenant') +
g.queryPanel(
[
'sum by (tenant) (rules_objstore_rule_groups_configured{namespace="$namespace", job=~"$job"})',
],
[
'{{tenant}}',
]
) { span:: 0 },
)
.addPanel(
g.panel('Rules configured', 'Amount of rules configured per tenant') +
g.queryPanel(
[
'sum by (tenant) (rules_objstore_rules_configured{namespace="$namespace", job=~"$job"})',
],
[
'{{tenant}}',
]
) { span:: 0 },
)
)
+ {
templating+: {
list+: [
if variable.name == 'datasource'
then variable { regex: config.dashboard.instance_name_filter }
else variable
for variable in super.list
] + [namespaceTemplate, jobTemplate, intervalTemplate],
},
},

apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: 'grafana-dashboard-rules-objstore',
},
data: {
'rhobs-observatorium-rules-objstore.json': std.manifestJsonEx(dashboard, ' '),
},
}
3 changes: 2 additions & 1 deletion observability/grafana.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ local dashboards =
{ 'grafana-dashboard-slo-mst-stage.configmap': (import 'dashboards/slo.libsonnet')('mst', 'stage', 'MST Stage SLOs') } +
{ 'grafana-dashboard-tracing-otel.configmap': (import 'dashboards/opentelemetry.libsonnet')(obsDatasource, obsTraces) } +
{ 'grafana-dashboard-tracing-jaeger.configmap': (import 'dashboards/tracing.libsonnet')(obsDatasource, obsTraces) } +
{ 'grafana-dashboard-rhobs-instance-utilization-overview.configmap': (import 'dashboards/rhobs-instance-utilization-overview.libsonnet')() };
{ 'grafana-dashboard-rhobs-instance-utilization-overview.configmap': (import 'dashboards/rhobs-instance-utilization-overview.libsonnet')() } +
{ 'grafana-dashboard-rules-objstore.configmap': (import 'dashboards/rules-objstore.libsonnet')() };
{
[name]: dashboards[name] {
metadata+: {
Expand Down
5 changes: 5 additions & 0 deletions resources/crds/observatorium-logs-crds-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ objects:
expr:
description: The LogQL expression to evaluate. Every evaluation cycle this is evaluated at the current time, and all resultant time series become pending/firing alerts.
type: string
labels:
additionalProperties:
type: string
description: Labels to add to each recording rule.
type: object
record:
description: The name of the time series to output to. Must be a valid metric name.
type: string
Expand Down
Loading

0 comments on commit d88b86a

Please sign in to comment.