Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

selkies image streaming check #62

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ substitutions:
_ENABLE_COS_GPU: "true"
_ENABLE_UBUNTU_GPU: "false"
_WAIT_FOR_IAP: "true"
_TIER3: "true"
tags:
- selkies-deploy
steps:
Expand Down Expand Up @@ -243,3 +244,35 @@ steps:
timeout 1200 bash ./setup/scripts/wait_for_iap.sh broker@${PROJECT_ID}.iam.gserviceaccount.com $${CLIENT_ID} $${ENDPOINT}
waitFor:
- fix-autoneg

###
# Create image streaming
###
- name: "gcr.io/cloud-builders/gcloud"
id: "deploy-img-streaming"
dir: "setup/infra/image-streaming"
args:
- "builds"
- "submit"
- "--substitutions=_NAME=${_NAME},_ACTION=${_ACTION},_REGION=${_REGION},_TIER3=${_TIER3}"
waitFor:
- "wait-for-iap-2"

###
# Enable image-streaming feature on existing cluster
# Note: Changing the Image streaming setting on a cluster causes GKE to recreate nodes in node pools that inherit the cluster-level setting. Although GKE respects maintenance windows when recreating nodes,
# your workloads might still experience disruptions.
###
- name: "gcr.io/cloud-builders/kubectl"
id: enable-image-streaming
entrypoint: "bash"
args:
- "-exec"
- |
gcloud container clusters update "$${CLOUDSDK_CONTAINER_CLUSTER}" --enable-image-streaming --project="$${PROJECT_ID}" --region="$${CLOUDSDK_COMPUTE_REGION}"
env:
- "PROJECT_ID=${PROJECT_ID}"
- "CLOUDSDK_CORE_PROJECT=${PROJECT_ID}"
- "CLOUDSDK_COMPUTE_REGION=${_REGION}"
- "CLOUDSDK_CONTAINER_CLUSTER=${_NAME}-${_REGION}"
waitFor: ["deploy-img-streaming"]
96 changes: 66 additions & 30 deletions images/cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ substitutions:
_USE_CACHE: "false"
_IMAGE_PREFIX: kube-pod-broker
_IMAGE_TAG: latest

_TARGET_REPO: us-docker.pkg.dev/${PROJECT_ID}/selkies-images
#
options:
machineType: N1_HIGHCPU_8
dynamic_substitutions: true

tags:
- selkies-images
Expand All @@ -31,15 +33,23 @@ images: [
# installer
'gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG}',
'gcr.io/${PROJECT_ID}/broker-installer:latest',
'${_TARGET_REPO}/broker-installer:${_IMAGE_TAG}',
'${_TARGET_REPO}/broker-installer:latest',
# controller
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}',
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:latest',
# broker gce proxy
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}',
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:latest',
# autoneg controller
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}',
'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}',
'${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:latest'
]

steps:
Expand All @@ -53,14 +63,22 @@ steps:
waitFor: ["-"]
- name: 'gcr.io/cloud-builders/docker'
id: installer
args: [
'build',
'-t', 'gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG}',
'--cache-from', 'gcr.io/${PROJECT_ID}/broker-installer:latest',
'installer'
]
entrypoint: 'bash'
args:
- '-c'
- |
docker build -t gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/broker-installer:latest installer
docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/broker-installer:${_IMAGE_TAG}
docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/broker-installer:latest

apt install jq -y
export DOCKER_CLI_EXPERIMENTAL=enabled
chmod +x setup/scripts/check_docker_image.sh
./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} || true
waitFor:
- installer-pull
- installer-pull


- name: 'gcr.io/cloud-builders/docker'
id: installer-tags
args: [
Expand All @@ -81,14 +99,21 @@ steps:
waitFor: ["-"]
- name: 'gcr.io/cloud-builders/docker'
id: controller
args: [
'build',
'-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}',
'--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest',
'controller'
]
entrypoint: 'bash'
args:
- '-c'
- |
docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest controller
docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}
docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:latest

apt install jq -y
export DOCKER_CLI_EXPERIMENTAL=enabled
chmod +x setup/scripts/check_docker_image.sh
./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} || true
waitFor:
- controller-pull
- controller-pull

- name: 'gcr.io/cloud-builders/docker'
id: controller-tags
args: [
Expand All @@ -106,17 +131,23 @@ steps:
id: gce-proxy-pull
entrypoint: 'bash'
args: ["-c", "if [[ '${_USE_CACHE}' == 'true' ]]; then (docker pull gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest || exit 0); fi"]
waitFor: ["-"]
waitFor: ["-"]
- name: 'gcr.io/cloud-builders/docker'
id: gce-proxy
args: [
'build',
'-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}',
'--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest',
'gce-proxy'
]
entrypoint: 'bash'
args:
- '-c'
- |
docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest gce-proxy
docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}
docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:laest
apt install jq -y
export DOCKER_CLI_EXPERIMENTAL=enabled
chmod +x setup/scripts/check_docker_image.sh
./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} || true
waitFor:
- gce-proxy-pull
- gce-proxy-pull

- name: 'gcr.io/cloud-builders/docker'
id: gce-proxy-tags
args: [
Expand Down Expand Up @@ -160,14 +191,19 @@ steps:
- name: 'gcr.io/cloud-builders/docker'
id: autoneg-controller-docker
dir: gke-autoneg-controller
args: [
'build',
'-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}',
'--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest',
'.'
]
entrypoint: 'bash'
args:
- '-c'
- |
docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest .
docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}
docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:laest
apt install jq -y
export DOCKER_CLI_EXPERIMENTAL=enabled
chmod +x setup/scripts/check_docker_image.sh
./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} || true
waitFor:
- autoneg-controller-build
- autoneg-controller-build
- name: 'gcr.io/cloud-builders/docker'
id: autoneg-controller-tags
args: [
Expand Down
60 changes: 60 additions & 0 deletions images/setup/scripts/check_docker_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
# Limitations of GKE image streaming
# 1. You can't use a Secret to pull container images on GKE versions prior to 1.23.5-gke.1900.
# 2. Container images that use the V2 Image Manifest, schema version 1 are not eligible.
# 3. Container images encrypted with customer-managed encryption keys (CMEK) are not eligible for Image streaming. GKE downloads these images without streaming the data. You can still use CMEK to protect attached persistent disks and custom boot disks in clusters that use Image streaming.
# 4. Container images with empty layers or duplicate layers are not eligible for Image streaming. GKE downloads these images without streaming the data. Check your container image for empty layers or duplicate layers.
# 5. The Artifact Registry repository must be in the same region as your GKE nodes, or in a multi-region that corresponds with the region where your nodes are running. For example:
# If your nodes are in us-east1, Image streaming is available for repositories in the us-east1 region or the us multi-region since both GKE and Artifact Registry are running in data center locations within the United States.
# If your nodes are in the northamerica-northeast1 region, the nodes are running in Canada. In this situation, Image streaming is only available for repositories in the same region.
# 6. If your workloads read many files in an image during initialization, you might notice increased initialization times because of the latency added by the remote file reads.
# 7. You might not notice the benefits of Image streaming during the first pull of an eligible image. However, after Image streaming caches the image, future image pulls on any cluster benefit from Image streaming.
# 8. GKE uses the cluster-level configuration to determine whether to enable Image streaming on new node pools created using node auto-provisioning. However, you cannot use workload separation to create node pools with Image streaming enabled when Image streaming is disabled at the cluster level.
# 9. Linux file capabilities such as CAP_NET_RAW are supported with Image streaming in GKE version 1.22.6-gke.300 and later. For previous GKE versions, these capabilities are not available when the image file is streamed, or when the image is saved to the local disk. To avoid potential disruptions, do not use Image streaming for containers with these capabilities in GKE versions prior to 1.22.6-gke.300. If your container relies on Linux file capabilities, it might fail to start with permission denied errors when running with Image streaming enabled.
set -ex
display_usage() {

echo -e "\nUsage: $0 -i \n"
echo -e "Argument: \n"
echo -e "\t -i: IMAGE_NAME"
}
if [ $# -le 1 ]
then
display_usage
exit 1
fi

while getopts i:h: flag
do
case "${flag}" in
i) IMAGE=${OPTARG};;
*) display_usage
exit 1 ;;
esac
done

# docker pull $IMAGE
DOCKER_SCHEMA_VERSION=$(docker manifest inspect --verbose ${IMAGE} | grep '"schemaVersion": 2,' | wc -l)
LAYERS=$(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | wc -l)
UNIQUE_LAYERS=$(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | uniq | wc -l )
EMPTY_LAYER=$(docker inspect $IMAGE | jq .[].RootFS.Layers | grep -i "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" | wc -l)
reisbel marked this conversation as resolved.
Show resolved Hide resolved

if [[ $DOCKER_SCHEMA_VERSION -eq 0 ]]; then
echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Docker schema version mismatch, reqires schemaVersion: 2"
echo "[ ERROR ] schemaVersion : $(docker manifest inspect --verbose ${IMAGE} | grep '"schemaVersion"')"
exit 1
fi

if [[ $LAYERS -ne $UNIQUE_LAYERS ]]; then
echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Duplicate docker layers."
echo "[ ERROR ] Duplicate layers: $(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | uniq -d)"
exit 1
fi

if [[ $EMPTY_LAYER -gt 0 ]]; then
echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Empty docker layers."
echo "[ ERROR ] Image contains empty layers with sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4"
deepak7093 marked this conversation as resolved.
Show resolved Hide resolved
exit 1
fi

echo "[ INFO ] Success!!! Image ${IMAGE} matching criteria for image streaming."
20 changes: 20 additions & 0 deletions setup/infra/image-streaming/artifact-registry.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2022 The Selkies Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resource "google_artifact_registry_repository" "selkies-repo" {
provider = google-beta
location = var.region
repository_id = "selkies-images"
description = "selkies image artifact registry"
format = "DOCKER"
}
50 changes: 50 additions & 0 deletions setup/infra/image-streaming/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2022 The Selkies Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

timeout: 10800s
substitutions:
_ACTION: apply
_NAME: broker
_REGION: us-west1
_TIER3: "true"
tags:
- selkies-node-pool-image-streaming
- selkies-image-streaming
steps:
###
# Create GCS bucket to store terraform state
###
- name: "gcr.io/cloud-builders/gsutil"
id: "create-tf-state-bucket"
entrypoint: "/bin/bash"
args:
- "-xe"
- "-c"
- |
gsutil mb gs://${PROJECT_ID}-${_NAME}-tf-state || true
waitFor: ["-"]

###
# Apply terraform to create base infrastructure
###
- name: "gcr.io/${PROJECT_ID}/${_NAME}-installer"
id: "terraform-apply"
entrypoint: "/workspace/deploy.sh"
env:
- TF_VAR_project_id=${PROJECT_ID}
- TF_VAR_name=${_NAME}
- TF_VAR_region=${_REGION}
- TF_VAR_tier3_pool_enabled=${_TIER3}
- TERRAFORM_WORKSPACE_NAME=image-straming-${_REGION}
- ACTION=${_ACTION}
Loading