From 50b20695959f843b06313086e731a83378dfb123 Mon Sep 17 00:00:00 2001 From: David Donchez Date: Fri, 17 Nov 2023 11:35:40 +0100 Subject: [PATCH] chore(helm): disable delete-untagged parameter in Distribution GC job --- README.md | 5 +++++ helm/kube-image-keeper/README.md.gotmpl | 5 +++++ helm/kube-image-keeper/values.yaml | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d6090dc5..6cd12570 100644 --- a/README.md +++ b/README.md @@ -253,3 +253,8 @@ Howevever, when using kuik, once an image has been pulled and stored in kuik's r ### Cluster autoscaling delays With kuik, all image pulls (except in the namespaces excluded from kuik) go through kuik's registry proxy, which runs on each node thanks to a DaemonSet. When a node gets added to a Kubernetes cluster (for instance, by the cluster autoscaler), a kuik registry proxy Pod gets scheduled on that node, but it will take a brief moment to start. During that time, all other image pulls will fail. Thanks to Kubernetes automatic retry mechanisms, they will eventually succeed, but on new nodes, you may see Pods in `ErrImagePull` or `ImagePullBackOff` status for a minute before everything works correctly. If you are using cluster autoscaling and try to achieve very fast scale-up times, this is something that you might want to keep in mind. + + +### Garbage collection issue + +We use Docker Distribution in Kuik, along with the integrated garbage collection tool. There is a bug that occurs when untagged images are pushed into the registry, causing it to crash. It's possible to end up in a situation where the registry is in read-only mode and becomes unusable. Until a permanent solution is found, we advise keeping the value `registry.garbageCollection.deleteUntagged` set to false. diff --git a/helm/kube-image-keeper/README.md.gotmpl b/helm/kube-image-keeper/README.md.gotmpl index b1f3a1ae..a4ee585e 100644 --- a/helm/kube-image-keeper/README.md.gotmpl +++ b/helm/kube-image-keeper/README.md.gotmpl @@ -243,6 +243,11 @@ Howevever, when using kuik, once an image has been pulled and stored in kuik's r With kuik, all image pulls (except in the namespaces excluded from kuik) go through kuik's registry proxy, which runs on each node thanks to a DaemonSet. When a node gets added to a Kubernetes cluster (for instance, by the cluster autoscaler), a kuik registry proxy Pod gets scheduled on that node, but it will take a brief moment to start. During that time, all other image pulls will fail. Thanks to Kubernetes automatic retry mechanisms, they will eventually succeed, but on new nodes, you may see Pods in `ErrImagePull` or `ImagePullBackOff` status for a minute before everything works correctly. If you are using cluster autoscaling and try to achieve very fast scale-up times, this is something that you might want to keep in mind. +### Garbage collection issue + +We use Docker Distribution in Kuik, along with the integrated garbage collection tool. There is a bug that occurs when untagged images are pushed into the registry, causing it to crash. It's possible to end up in a situation where the registry is in read-only mode and becomes unusable. Until a permanent solution is found, we advise keeping the value `registry.garbageCollection.deleteUntagged` set to false. + + ## License MIT License diff --git a/helm/kube-image-keeper/values.yaml b/helm/kube-image-keeper/values.yaml index 39c0bd53..8aba5257 100644 --- a/helm/kube-image-keeper/values.yaml +++ b/helm/kube-image-keeper/values.yaml @@ -193,8 +193,8 @@ registry: garbageCollection: # -- Garbage collector cron schedule. Use standard crontab format. schedule: "0 0 * * 0" - # -- If true, delete untagged manifests - deleteUntagged: true + # -- If true, delete untagged manifests. Default to false since there is a known bug in **docker distribution** garbage collect job. + deleteUntagged: false service: # -- Registry service type type: ClusterIP