Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(services): add database metrics interface #1739

Merged
merged 6 commits into from
Dec 25, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"strings": true
},
"editor.codeActionsOnSave": {
"source.fixAll.eslint": true
"source.fixAll.eslint": "explicit"
},
// eslint
"eslint.alwaysShowStatus": true,
Expand Down
4 changes: 4 additions & 0 deletions build/charts/laf-server/templates/rumtime-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ spec:
path: "/runtime/metrics/{{ .Values.default_region.runtime_exporter_secret}}"
scrapeTimeout: 10s
honorLabels: true
- interval: 30s
path: "/database/metrics/{{ .Values.default_region.runtime_exporter_secret}}"
scrapeTimeout: 10s
honorLabels: true
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
Expand Down
3 changes: 3 additions & 0 deletions services/runtime-exporter/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ export default class Config {
static get NAMESPACE(): string {
return process.env.NAMESPACE || ''
}
static get DB_NAMESPACE(): string {
return process.env.DB_NAMESPACE || ''
}

static get API_SECRET(): string {
if (!process.env.API_SECRET) {
Expand Down
58 changes: 58 additions & 0 deletions services/runtime-exporter/src/handler/get-db-metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { RequestHandler } from 'express'
import { ClusterService, Metric } from '../helper/cluster.service'
import Config from '../config'
import * as prom from 'prom-client'

const register = new prom.Registry()

const DATABASE_CPU = new prom.Gauge({
name: 'laf_mongo_cpu',
help: 'the cpu of the mongo',
registers: [register],
labelNames: ['container', 'pod', 'appid'],
})

const DATABASE_MEMORY = new prom.Gauge({
name: 'laf_mongo_memory',
help: 'the memory of the mongo',
registers: [register],
labelNames: ['container', 'pod', 'appid'],
})

function updateMetrics(metric: Metric) {
DATABASE_CPU.labels(metric.containerName, metric.podName, metric.appid).set(
metric.cpu,
)
DATABASE_MEMORY.labels(
metric.containerName,
metric.podName,
metric.appid,
).set(metric.memory)
}

const getDatabaseMetrics: RequestHandler = async (req, res) => {
const token = req.params.token

if (!token || Config.API_SECRET !== token) {
return res.status(403).send('forbidden')
}

// Clear the metrics data generated by the last request
DATABASE_CPU.reset()
DATABASE_MEMORY.reset()

const databaseMetrics = await ClusterService.getPodMetrics(
ClusterService.DB_NAMESPACE,
ClusterService.LABEL_DATABASE,
'DATABASE',
)

for (const metric of databaseMetrics) {
updateMetrics(metric)
}

res.set('Content-Type', 'text/plain')
res.send(await register.metrics())
}

export default getDatabaseMetrics
7 changes: 5 additions & 2 deletions services/runtime-exporter/src/handler/get-runtime-metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@ const getRuntimeMetrics: RequestHandler = async (req, res) => {
RUNTIME_MEMORY.reset()
RUNTIME_MEMORY_LIMIT.reset()

const runtimeMetrics =
await ClusterService.getRuntimePodMetricsForAllNamespaces()
const runtimeMetrics = await ClusterService.getPodMetrics(
ClusterService.NAMESPACE,
ClusterService.LABEL_KEY_APP_ID,
'RUNTIME',
)

for (const metric of runtimeMetrics) {
updateMetrics(metric)
Expand Down
71 changes: 51 additions & 20 deletions services/runtime-exporter/src/helper/cluster.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ export class ClusterService {
* - if kubeconfig is not empty, load from string
*/
static LABEL_KEY_APP_ID = 'laf.dev/appid'
static LABEL_DATABASE = 'app.kubernetes.io/managed-by=kubeblocks'
static NAMESPACE = Config.NAMESPACE
static DB_NAMESPACE = Config.DB_NAMESPACE

static loadKubeConfig() {
const conf = Config.KUBECONF
Expand Down Expand Up @@ -55,42 +57,71 @@ export class ClusterService {
return new k8s.Metrics(kc)
}

static async getRuntimePodMetricsForAllNamespaces(): Promise<Metric[]> {
static async getPodMetrics(
namespace: string,
label: string,
app: string,
): Promise<Metric[]> {
const metricsClient = this.getMetricsClient()
let res: any
if (ClusterService.NAMESPACE) {
if (namespace) {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
res = await metricsClient.metricsApiRequest(
`/apis/metrics.k8s.io/v1beta1/namespaces/${ClusterService.NAMESPACE}/pods?labelSelector=laf.dev/appid`,
`/apis/metrics.k8s.io/v1beta1/namespaces/${namespace}/pods?labelSelector=${label}`,
)
} else {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
res = await metricsClient.metricsApiRequest(
'/apis/metrics.k8s.io/v1beta1/pods?labelSelector=laf.dev/appid',
`/apis/metrics.k8s.io/v1beta1/pods?labelSelector=${label}`,
)
}

const metricsList: Metric[] = []
for (const item of res.items) {
const appid = item.metadata.labels[ClusterService.LABEL_KEY_APP_ID]
const podName = item.metadata.name
for (const container of item.containers) {
const containerName = container.name
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: containerName,
podName: podName,
if (app === 'RUNTIME') {
for (const item of res.items) {
const appid: string =
item.metadata.labels[ClusterService.LABEL_KEY_APP_ID]
const podName = item.metadata.name
for (const container of item.containers) {
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: container.name,
podName: podName,
}
metricsList.push(metric)
}
}
} else {
for (const item of res.items) {
const appid: string = item.metadata.labels['app.kubernetes.io/instance']
const podName = item.metadata.name
for (const container of item.containers) {
if (container.name === 'mongodb') {
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: container.name,
podName: podName,
}
metricsList.push(metric)
}
}
metricsList.push(metric)
}
}

Expand Down
2 changes: 2 additions & 0 deletions services/runtime-exporter/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import express, { Request, Response } from 'express'
import Config from './config'
import { logger } from './logger'
import getRuntimeMetrics from './handler/get-runtime-metrics'
import getDatabaseMetrics from './handler/get-db-metrics'

require('express-async-errors')
const app = express()
Expand All @@ -14,6 +15,7 @@ process.on('uncaughtException', (err: Error) => {
})

app.get('/runtime/metrics/:token', getRuntimeMetrics)
app.get('/database/metrics/:token', getDatabaseMetrics)
app.get('/healthz', (_, res: Response) => res.send('ok'))

// express error capture middleware
Expand Down