From f5cf5251026f64800db270e1d233b9127e2ce3ff Mon Sep 17 00:00:00 2001 From: Fabian Meyer <3982806+meyfa@users.noreply.github.com> Date: Sun, 14 Jul 2024 17:41:08 +0200 Subject: [PATCH] fix: Serialize access to pod logs Pod logs are a major performance bottleneck, both due to enormous data transfers from the Kubernetes API and due to formatting being a expensive. Before this patch, if multiple clients requested the same log simultaneously, these expensive operations were done multiple times in parallel. With this patch, mutexes are added around log retrieval and log formatting such that only one such operation can execute at any time. In the multi-client scenario, the client arriving first will lock the mutex and retrieve the logs. When this client unlocks the mutex and another client is allowed through, the logs will already be cached and the request returns immediately. Performance tests using the `ab` CLI tool confirm this: One of the requests still takes the same time as before, but all other requests are much faster. Memory requirements are reduced from O(client count) to O(1). In case multiple clients request different logs each, in the worst case, the response time may be slightly worse, but probably not by much on average: Requesting logs in parallel from Kubernetes is likely not faster than requesting in series due to storage or network bottlenecks, and since Foreman is single-threaded, formatting in parallel is also not faster. --- backend/src/api/pod-logs.ts | 23 ++++++++++++++------ backend/src/controllers/logs.ts | 38 +++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/backend/src/api/pod-logs.ts b/backend/src/api/pod-logs.ts index 852d05e..27f4ee4 100644 --- a/backend/src/api/pod-logs.ts +++ b/backend/src/api/pod-logs.ts @@ -5,6 +5,7 @@ import { authenticateSession } from '../auth/common.js' import { prettifyLogs } from '../renovate/prettify-logs.js' import { enums, optional, type } from 'superstruct' import { WeakCache } from '../util/cache.js' +import { Mutex } from '../util/mutex.js' export interface PodLogsRoute { Reply: string @@ -17,8 +18,10 @@ const logsQuerystringSchema = type({ export const podLogsRoute = ({ logsController }: Controllers): FastifyPluginAsync => async (app) => { app.addHook('preValidation', authenticateSession()) - // Prettifying is an expensive operation. Cache the results. + // Prettifying is an expensive operation. Cache the results, and only allow one prettification at a time. + // The latter helps if multiple clients request the same logs at the same time since no const prettyLogsCache = new WeakCache<{ data: string }>(5000) + const prettyLogsMutex = new Mutex() app.get { - return { - data: await prettifyLogs(logs) - } - }) - return result?.data + let unlock + try { + unlock = await prettyLogsMutex.lock() + const result = await prettyLogsCache.lazyCompute([request.params.namespace, request.params.name], async () => { + return { + data: await prettifyLogs(logs) + } + }) + return result?.data + } finally { + unlock?.() + } } return logs }) diff --git a/backend/src/controllers/logs.ts b/backend/src/controllers/logs.ts index 6662e3e..20cc922 100644 --- a/backend/src/controllers/logs.ts +++ b/backend/src/controllers/logs.ts @@ -1,12 +1,16 @@ import { KubernetesApi } from '../kubernetes/api.js' import { PodController } from './pod.js' import { WeakCache } from '../util/cache.js' +import { Mutex } from '../util/mutex.js' export class LogsController { // The log strings may be large, so we use a weak cache to allow them to be garbage collected. // However, it's impossible to create a WeakRef to a string, so we wrap the string in an object. private readonly cache = new WeakCache<{ data: string }>(5000) + // Only allow fetching one log at a time. + private readonly mutex = new Mutex() + constructor ( private readonly k8s: KubernetesApi, private readonly podController: PodController @@ -17,20 +21,26 @@ export class LogsController { namespace: string name: string }): Promise { - // Input must relate to an existing pod - const podItem = await this.podController.findPod(pod) - if (podItem == null) { - return undefined - } - // Requesting logs for a pod that is starting is invalid and will result in a 400 error. - if (podItem.status?.phase === 'Pending' || podItem.status?.phase === 'Unknown') { - return undefined - } - const result = await this.cache.lazyCompute([pod.namespace, pod.name], async () => { - return { - data: await this.k8s.getPodLogs(pod) + let unlock + try { + unlock = await this.mutex.lock() + // Input must relate to an existing pod + const podItem = await this.podController.findPod(pod) + if (podItem == null) { + return undefined } - }) - return result?.data + // Requesting logs for a pod that is starting is invalid and will result in a 400 error. + if (podItem.status?.phase === 'Pending' || podItem.status?.phase === 'Unknown') { + return undefined + } + const result = await this.cache.lazyCompute([pod.namespace, pod.name], async () => { + return { + data: await this.k8s.getPodLogs(pod) + } + }) + return result?.data + } finally { + unlock?.() + } } }