Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[strudel] Add a new cluster for strudel #4908

Merged
merged 3 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions config/clusters/strudel/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: strudel
provider: aws # https://2i2c.awsapps.com/start#/
aws:
key: enc-deployer-credentials.secret.json
clusterType: eks
clusterName: strudel
region: us-west-2
billing:
paid_by_us: true
support:
helm_chart_values_files:
- support.values.yaml
- enc-support.secret.values.yaml
hubs:
[]
# Uncomment the lines below once the support infrastructure was deployed and
# you are ready to add the first cluster

# - name: <hub_name>
# # Tip: consider changing this to something more human friendly
# display_name: "strudel - <hub_name>"
# domain: <hub_name>.strudel.2i2c.cloud
# helm_chart: basehub
# helm_chart_values_files:
# - common.values.yaml
# - <hub_name>.values.yaml
# - enc-<hub_name>.secret.values.yaml
25 changes: 25 additions & 0 deletions config/clusters/strudel/enc-deployer-credentials.secret.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"AccessKey": {
"AccessKeyId": "ENC[AES256_GCM,data:kNdAoKD/oxwtn4YAju7YN/j2/Kk=,iv:VCjHfPzCjbBxSrCh23iNCcn69/Ri68Br9bbcqhkXf4Q=,tag:B6IAiXhJspYpHx97E3zK2Q==,type:str]",
"SecretAccessKey": "ENC[AES256_GCM,data:WU8cfEuGyItKQwML2zegDE+1pmTapu2SmgfRoa6xlfeFy6PA3PpTgA==,iv:s5mlEIDHmkxn/vfjupPKofkvSRpz0wHYkHJrVPzYYNA=,tag:CC8nCy/58Dc5neAgpVr2jQ==,type:str]",
"UserName": "ENC[AES256_GCM,data:TOQWF4T+l36X+LIOa7mk5ZAcUrBWHSk=,iv:Sa/dIB5JO7Xy+TO/iimvgSB3fFW8TTqKUGxjATFa1dE=,tag:X46VYj5gESWF1tU7e5fHVQ==,type:str]"
},
"sops": {
"kms": null,
"gcp_kms": [
{
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs",
"created_at": "2024-10-01T12:40:28Z",
"enc": "CiUA4OM7eB2kdN8iHQE9iXrcGgIVn6xR9gICPqUxX3b13IugoLojEkkA5dG1Q6v/STpOaG+GzdyspkF8iBIun/SFzYVxuRYTTdqhHkNd/opNYMTbYSNvle+Uey9hT+bHRUGK6/8U83ga/XU+0cpJhCFh"
}
],
"azure_kv": null,
"hc_vault": null,
"age": null,
"lastmodified": "2024-10-01T12:40:28Z",
"mac": "ENC[AES256_GCM,data:oWvVIi5dQv+lfbfc1ff3PGoenVADkrGpQG4Bc3/CqRVRuw2cmSfO9TD1c8awmL3Hwtlc4fMtelT2eCATnzqyrsdoBypGscDRVCGreLYPbspsnd/sL8Brn/bQ1OfmobRGWp0VoKHcTldUDvW/g1TEyYOf4ahwxcE+G5HEXl93Geg=,iv:jEKD1u3/Ml6SoqIRbIAYa25DvQYBmyhBhXa/rTLpd/A=,tag:5lMtrIzbmiJ9SeAS1I+vAA==,type:str]",
"pgp": null,
"unencrypted_suffix": "_unencrypted",
"version": "3.8.1"
}
}
15 changes: 15 additions & 0 deletions config/clusters/strudel/enc-grafana-token.secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
grafana_token: ENC[AES256_GCM,data:irWWJLHCQeeaW37qi0BHDiLfFdJ9vctBjEI0owv+2xSY1MerlE3MY+Mmi20lTg==,iv:5KPENEOcETgtEkVti1prJ0qsmm5+lSepUTZ/5O3TIvA=,tag:OgeLAIgFpek4zyG5rhtY7w==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2024-10-01T12:59:53Z"
enc: CiUA4OM7eNrdiaLiPocfJTCLBwuM9YbI9qK1iBBp8+xYLziPl3ncEkkA5dG1Q93A5dIvMNu3945Iw/yUFQLgEofvzwhJYfegRLvKywnyzPmRHo2IfphssdM5Pe0JXpxywt0jv2nXEGyX+z2z+iYuGDD0
azure_kv: []
hc_vault: []
age: []
lastmodified: "2024-10-01T12:59:53Z"
mac: ENC[AES256_GCM,data:oNJ6wS4gCnMYyGOcWnDcP3X5HWT7bHPNjA/CTrljKEftzp0HtZJab66rJe94jgNnti007HfV34BD+tkikYPXo/8MAEcKROJ++Hj/JbUDOR0vYpcYQuOJTlYKGH52g41WlsENgRSxulO+wb7OQykokQNR98ihOCrrq0mmLXj3fG4=,iv:/xfa4gtfSakXKTz+8To4PeARl1U84p0EjWaXTbGtN8M=,tag:kq71ofPuXPpGnop38HDw4Q==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.8.1
17 changes: 17 additions & 0 deletions config/clusters/strudel/enc-support.secret.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
prometheusIngressAuthSecret:
username: ENC[AES256_GCM,data:RagJNkNWnDQQhDKNBa/C3OWom8PaXyBCNIG8e/blM1MzO2k8tL/CVKU+BMMHTyldOm/T1wuLlnwKhyVcqK2puQ==,iv:5hT2IJPZM1vrywr1E5YeoSnLya5FE6nic1GjURTP/jQ=,tag:nZXWjP9ewxgc67MROlldUQ==,type:str]
password: ENC[AES256_GCM,data:jmosU2aQZy+JVYfEaY+TANdQjM3zdfKW68+7adKbQUpXk9C+31VuDtTxMsnuuj+90E8GlCuzJSikEUmEt95v8w==,iv:TaRGGIQLgaGbdK6qY1eZ0A4Bp+WavwJTPdEhismeNhU=,tag:/BnvmxLD+yreNvbz0CieUw==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2024-10-01T12:10:40Z"
enc: CiUA4OM7eGXBxjEDNyx0Oz9+3To+ZtgJesA4jNahoZs8jXtr0MixEkgA5dG1Q77AFs6f/oCbIoHxf24xIf3P5kbKRxO2YcS3w93MoMHSdSETfOTBBi2qNJSE5WSdqzhFvUYQhBCYLbZDDEa3NeSnPyI=
azure_kv: []
hc_vault: []
age: []
lastmodified: "2024-10-01T12:10:41Z"
mac: ENC[AES256_GCM,data:xthk4pWv0PawZjq4HXMo7JXgrDu7mpTBJKEO8ZoNq0OBLn4vvZMnCze7PCoFEjFm7Pw4C3BeyyUd1f4UXoXk8jp4T/snJxZmYnSAaSqAPv4wDsFB4/yAAxbwWgqrjpIVvUTOeTEBQfgaVhUs+fiIotk76wi3oqYWsQFUZxKiiBs=,iv:Uu8uwR9YpHE05VrZ6kjUHF/EzlLGKAfGELYJvP/tuKE=,tag:mlRfOuo0M2rmEsMdiqyPfA==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.8.1
34 changes: 34 additions & 0 deletions config/clusters/strudel/support.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
prometheusIngressAuthSecret:
enabled: true

prometheus:
server:
ingress:
enabled: true
hosts:
- prometheus.strudel.2i2c.cloud
tls:
- secretName: prometheus-tls
hosts:
- prometheus.strudel.2i2c.cloud

grafana:
grafana.ini:
server:
root_url: https://grafana.strudel.2i2c.cloud/
auth.github:
enabled: true
allowed_organizations: 2i2c-org
ingress:
hosts:
- grafana.strudel.2i2c.cloud
tls:
- secretName: grafana-tls
hosts:
- grafana.strudel.2i2c.cloud

cluster-autoscaler:
enabled: true
autoDiscovery:
clusterName: strudel
awsRegion: us-west-2
21 changes: 21 additions & 0 deletions eksctl/ssh-keys/secret/strudel.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"data": "ENC[AES256_GCM,data:yvTr6FJ/eM95o/f7OWeltvM4R946+kFHY+AH174HWlbgyIFBdM7Yh9LKFR8WCHw5DJCOhGL/YTCZu+hrPOfOBIYPrC4pObvwaHfAHAOOBd4ssAiGQ6+GmusCVyQW9gWYwi/7vUmCMJA+yh3TQp894uP57xHA6UmSAh+FDLFfji/SctQzd/dU4j0iyRW6O+jx5SZVy6kDm0EXN2grH9KnEg++Rxr8quI7EPKovCWgE2JHKA9Afgi7WT4BDHJMAa1INZ+G+IsZG+1tQXt4q9GzQR228d+k+KZDJRnRNltcoAbrj2F/sO/o6BJYbNnsGiCDAteOuxxVHwDJ15NY3/rkP9nrj7kJYs3Gmtrr5Sij4hW/IaV4TnbuRJ6dA+xx2f/lkxVLtrx1klp7+vOnCl6HXL0ofK2BHY+psPXCsGAVqpi0TS8LKE6ppPlOjvBVGZ9MnCHMNR2MJHBNNPjEJcyt423YH865qpXQQUn68cVUb0VcCrgFzUH4KJEnQM9I5O8tab6MAECeso0OT2voLbofvQkRVx8OO6muD/PMH7fNn+xl354=,iv:oLWAktaCUvREppww1QQE7kZ93jn0BjmICNEmOPbAwsg=,tag:TLgrqKicbZr/rr7kNN4PjQ==,type:str]",
"sops": {
"kms": null,
"gcp_kms": [
{
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs",
"created_at": "2024-10-01T12:10:40Z",
"enc": "CiUA4OM7eJVYe/0dfyY4i2wf9zIxDj9meumOxnttGOBtMejjtGv4EkkA5dG1Q6xxPAzY9z/qiU94p2j2sxTH0ULmYyFEPIpZUGva6kH97RSikZQhpGfJJA7BCTH+Yh9bAg+TR+EP2zwkO9tcIfap07df"
}
],
"azure_kv": null,
"hc_vault": null,
"age": null,
"lastmodified": "2024-10-01T12:10:40Z",
"mac": "ENC[AES256_GCM,data:vT3CMaPPgm9psq3Ipa3YGGRgJSIoGRub/2xE4UcgDOcHyGxTbwGsifQ/NbJh353LIt7lpQ1xe2anNEqZtnjfqmv4TGzOcKslTV2xxPlBOqj6/Y6JckK/e/lNKY62hpb39cpP/AM3QJCxN/WA8kD8wmeTeswU9ryPz/c4oZEy0GE=,iv:FvsAiVtT2GySTbLUAj3GjNqdNz0FkFB4f3bjgpO3OBM=,tag:9yyehA7BvmWnCsS4fCLjXg==,type:str]",
"pgp": null,
"unencrypted_suffix": "_unencrypted",
"version": "3.8.1"
}
}
1 change: 1 addition & 0 deletions eksctl/ssh-keys/strudel.key.pub
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINISV8h2v7zSIUlVNgslOKIEsRN9EO5h22wwPm3T4QvW [email protected]
155 changes: 155 additions & 0 deletions eksctl/strudel.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
This file is a jsonnet template of a eksctl's cluster configuration file,
that is used with the eksctl CLI to both update and initialize an AWS EKS
based cluster.

This file has in turn been generated from eksctl/template.jsonnet which is
relevant to compare with for changes over time.

To use jsonnet to generate an eksctl configuration file from this, do:

jsonnet strudel.jsonnet > strudel.eksctl.yaml

References:
- https://eksctl.io/usage/schema/
*/
local ng = import "./libsonnet/nodegroup.jsonnet";

// place all cluster nodes here
local clusterRegion = "us-west-2";
local masterAzs = ["us-west-2a", "us-west-2b", "us-west-2c"];
local nodeAz = "us-west-2a";

// Node definitions for notebook nodes. Config here is merged
// with our notebook node definition.
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
];
local daskNodes = [];


{
apiVersion: 'eksctl.io/v1alpha5',
kind: 'ClusterConfig',
metadata+: {
name: "strudel",
region: clusterRegion,
version: "1.30",
tags+: {
"ManagedBy": "2i2c",
"2i2c.org/cluster-name": $.metadata.name,
},
},
availabilityZones: masterAzs,
iam: {
withOIDC: true,
},
// If you add an addon to this config, run the create addon command.
//
// eksctl create addon --config-file=strudel.eksctl.yaml
//
addons: [
{ version: "latest", tags: $.metadata.tags } + addon
for addon in
[
{
name: "vpc-cni",
# FIXME: network policy enforcement doesn't work, what's wrong
# isn't clear.
# configurationValues ref: https://github.com/aws/amazon-vpc-cni-k8s/blob/HEAD/charts/aws-vpc-cni/values.yaml
configurationValues: |||
enableNetworkPolicy: "true"
|||,
attachPolicyARNs: ["arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"],
},
{ name: "coredns" },
{ name: "kube-proxy" },
{
// aws-ebs-csi-driver ensures that our PVCs are bound to PVs that
// couple to AWS EBS based storage, without it expect to see pods
// mounting a PVC failing to schedule and PVC resources that are
// unbound.
//
// Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html
//
name: "aws-ebs-csi-driver",
wellKnownPolicies: {
ebsCSIController: true,
},
# configurationValues ref: https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/HEAD/charts/aws-ebs-csi-driver/values.yaml
configurationValues: |||
defaultStorageClass:
enabled: true
|||,
},
]
],
nodeGroups: [
n + {clusterName: $.metadata.name} for n in
[
ng + {
namePrefix: 'core',
nameSuffix: 'a',
nameIncludeInstanceType: false,
availabilityZones: [nodeAz],
ssh: {
publicKeyPath: 'ssh-keys/strudel.key.pub'
},
instanceType: "r5.xlarge",
minSize: 1,
maxSize: 6,
labels+: {
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core",
},
},
] + [
ng + {
namePrefix: 'nb',
availabilityZones: [nodeAz],
minSize: 0,
maxSize: 500,
instanceType: n.instanceType,
ssh: {
publicKeyPath: 'ssh-keys/strudel.key.pub'
},
labels+: {
"hub.jupyter.org/node-purpose": "user",
"k8s.dask.org/node-purpose": "scheduler"
},
taints+: {
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule",
},
} + n for n in notebookNodes
] + ( if daskNodes != null then
[
ng + {
namePrefix: 'dask',
availabilityZones: [nodeAz],
minSize: 0,
maxSize: 500,
ssh: {
publicKeyPath: 'ssh-keys/strudel.key.pub'
},
labels+: {
"k8s.dask.org/node-purpose": "worker"
},
taints+: {
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule",
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
spotAllocationStrategy: "capacity-optimized",
},
} + n for n in daskNodes
] else []
)
]
}
30 changes: 30 additions & 0 deletions terraform/aws/projects/strudel.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Some of the assumptions this jinja2 template makes about the cluster:
- location of the nodes of the kubernetes cluster will be <region>a
- no default scratch buckets support
*/
region = "us-west-2"
cluster_name = "strudel"
cluster_nodes_location = "us-west-2a"

# Tip: uncomment and fill the missing info in the lines below if you want
# to setup scratch buckets for the hubs on this cluster.
#
#user_buckets = {
# "scratch-staging" : {
# "delete_after" : 7,
# },
# # Tip: add more scratch buckets below, if this cluster will be multi-tenant
#}

# Tip: uncomment and fill the missing info in the lines below if you want
# to setup specific cloud permissions for the buckets in this cluster.
#
#hub_cloud_permissions = {
# "staging" : {
# "user-sa" : {
# bucket_admin_access : ["scratch-staging"],
# },
# },
# # Tip: add more namespaces below, if this cluster will be multi-tenant
#}