From 26535cedf2510d031c8e81a7d2f6f82dd4a8c40e Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 8 Aug 2024 13:28:03 +0300 Subject: [PATCH 01/14] Add a new nodepool for binder --- eksctl/nasa-veda.jsonnet | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index 70b29cf05..7dc318f2e 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -28,6 +28,15 @@ local notebookNodes = [ { instanceType: "r5.xlarge" }, { instanceType: "r5.4xlarge" }, { instanceType: "r5.16xlarge" }, + { + instanceType: "r5.xlarge", + namePrefix: "nb-binder", + labels+: { "2i2c/hub-name": "binder" }, + tags+: { "2i2c:hub-name": "binder" }, + taints+: { + "2i2c/hub-name": "binder" + }, + }, ]; local daskNodes = [ From 8b94c75cbdbb2e601774d1e38507193fc8d681e3 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 8 Aug 2024 13:31:50 +0300 Subject: [PATCH 02/14] Schedule nodes to run on the dedicated binder nodepool --- config/clusters/nasa-veda/binder.values.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/config/clusters/nasa-veda/binder.values.yaml b/config/clusters/nasa-veda/binder.values.yaml index d39d68f53..dd34f1102 100644 --- a/config/clusters/nasa-veda/binder.values.yaml +++ b/config/clusters/nasa-veda/binder.values.yaml @@ -28,9 +28,12 @@ jupyterhub: extraVolumeMounts: [] singleuser: nodeSelector: - # Schedule users on the smallest instance - # https://github.com/2i2c-org/infrastructure/issues/4241 - node.kubernetes.io/instance-type: r5.xlarge + 2i2c/hub-name: "binder" + extraTolerations: + - key: 2i2c.org/hub-name + operator: Equal + value: binder + effect: NoSchedule memory: guarantee: 1G limit: 2G @@ -92,9 +95,7 @@ binderhub-service: enabled: true dockerApi: nodeSelector: - # Schedule dockerApi pods to run on the smallest user nodes only - # https://github.com/2i2c-org/infrastructure/issues/4241 - node.kubernetes.io/instance-type: r5.xlarge + 2i2c/hub-name: "binder" ingress: enabled: true hosts: [binder.openveda.cloud] From e24a5d33cc8946e2320e0fc3b1f8c22c2b9e04f9 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Thu, 8 Aug 2024 13:32:45 +0300 Subject: [PATCH 03/14] Add a scratch bucket and set the extra_iam_policy as the staging hub --- terraform/aws/projects/nasa-veda.tfvars | 69 +++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/terraform/aws/projects/nasa-veda.tfvars b/terraform/aws/projects/nasa-veda.tfvars index 3d64aba58..de2decdd2 100644 --- a/terraform/aws/projects/nasa-veda.tfvars +++ b/terraform/aws/projects/nasa-veda.tfvars @@ -18,6 +18,9 @@ user_buckets = { "scratch" : { "delete_after" : 7 }, + "scratch-binder" : { + "delete_after" : 1 + }, } @@ -150,4 +153,70 @@ hub_cloud_permissions = { EOT }, }, + "binder" : { + "user-sa" : { + bucket_admin_access : ["scratch-binder"], + extra_iam_policy : <<-EOT + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:ListBucketMultipartUploads", + "s3:AbortMultipartUpload", + "s3:ListBucketVersions", + "s3:CreateBucket", + "s3:ListBucket", + "s3:DeleteObject", + "s3:GetBucketLocation", + "s3:ListMultipartUploadParts" + ], + "Resource": [ + "arn:aws:s3:::veda-data-store", + "arn:aws:s3:::veda-data-store/*", + "arn:aws:s3:::veda-data-store-staging", + "arn:aws:s3:::veda-data-store-staging/*", + "arn:aws:s3:::veda-nex-gddp-cmip6-public", + "arn:aws:s3:::veda-nex-gddp-cmip6-public/*", + "arn:aws:s3:::cmip6-staging", + "arn:aws:s3:::cmip6-staging/*", + "arn:aws:s3:::lp-prod-protected", + "arn:aws:s3:::lp-prod-protected/*", + "arn:aws:s3:::gesdisc-cumulus-prod-protected", + "arn:aws:s3:::gesdisc-cumulus-prod-protected/*", + "arn:aws:s3:::nsidc-cumulus-prod-protected", + "arn:aws:s3:::nsidc-cumulus-prod-protected/*", + "arn:aws:s3:::ornl-cumulus-prod-protected", + "arn:aws:s3:::ornl-cumulus-prod-protected/*", + "arn:aws:s3:::pangeo-forge-veda-output", + "arn:aws:s3:::pangeo-forge-veda-output/*", + "arn:aws:s3:::podaac-ops-cumulus-public", + "arn:aws:s3:::podaac-ops-cumulus-public/*", + "arn:aws:s3:::podaac-ops-cumulus-protected", + "arn:aws:s3:::podaac-ops-cumulus-protected/*", + "arn:aws:s3:::maap-ops-workspace", + "arn:aws:s3:::maap-ops-workspace/*", + "arn:aws:s3:::nasa-maap-data-store", + "arn:aws:s3:::nasa-maap-data-store/*", + "arn:aws:s3:::sdap-dev-zarr", + "arn:aws:s3:::sdap-dev-zarr/*", + "arn:aws:s3:::usgs-landsat", + "arn:aws:s3:::usgs-landsat/*", + "arn:aws:s3:::sentinel-cogs", + "arn:aws:s3:::sentinel-cogs/*" + ] + }, + { + "Effect": "Allow", + "Action": "s3:ListAllMyBuckets", + "Resource": "*" + } + ] + } + EOT + }, + }, } From fdb5f2c73e33dc1d998c0ade9d09260e7b4cd92e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 8 Aug 2024 15:53:08 +0100 Subject: [PATCH 04/14] Add valid taint effect --- eksctl/nasa-veda.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index 7dc318f2e..36b01cc19 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -34,7 +34,7 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "binder" }, tags+: { "2i2c:hub-name": "binder" }, taints+: { - "2i2c/hub-name": "binder" + "2i2c/hub-name": "binder:NoSchedule" }, }, ]; From a35fc17a8d49a6aa6abd61af2a60be04256229ed Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 8 Aug 2024 16:01:13 +0100 Subject: [PATCH 05/14] Add annotation for with extra_iam_policy for bucket access --- config/clusters/nasa-veda/binder.values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/clusters/nasa-veda/binder.values.yaml b/config/clusters/nasa-veda/binder.values.yaml index dd34f1102..415310fbc 100644 --- a/config/clusters/nasa-veda/binder.values.yaml +++ b/config/clusters/nasa-veda/binder.values.yaml @@ -1,3 +1,6 @@ +userServiceAccount: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::444055461661:role/nasa-veda-binder jupyterhub: ingress: hosts: From c0baff3244550c3877a7464d2ee33187c99e281e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Thu, 8 Aug 2024 16:51:11 +0100 Subject: [PATCH 06/14] Fix toleration key --- config/clusters/nasa-veda/binder.values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/clusters/nasa-veda/binder.values.yaml b/config/clusters/nasa-veda/binder.values.yaml index 415310fbc..691e35a89 100644 --- a/config/clusters/nasa-veda/binder.values.yaml +++ b/config/clusters/nasa-veda/binder.values.yaml @@ -33,7 +33,7 @@ jupyterhub: nodeSelector: 2i2c/hub-name: "binder" extraTolerations: - - key: 2i2c.org/hub-name + - key: 2i2c/hub-name operator: Equal value: binder effect: NoSchedule From 263fc911b6ff5da4ed3cd22c244d2e00304248de Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 10:38:29 +0100 Subject: [PATCH 07/14] Note nodegroup tainted for deletion --- eksctl/nasa-veda.jsonnet | 1 + 1 file changed, 1 insertion(+) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index 36b01cc19..7fb3a9020 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -26,6 +26,7 @@ local nodeAz = "us-west-2a"; // can request a particular kind of node with a nodeSelector local notebookNodes = [ { instanceType: "r5.xlarge" }, + { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade { instanceType: "r5.4xlarge" }, { instanceType: "r5.16xlarge" }, { From 49b005457ebfb2acea25031449c71c731dbc3fa6 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:14:15 +0100 Subject: [PATCH 08/14] Add node-purpose tags to nodegroups --- eksctl/nasa-veda.jsonnet | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index 7fb3a9020..ec43bb75d 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -103,6 +103,9 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core" }, + tags+: { + "2i2c:node-purpose": "core" + }, }, ] + [ ng + { @@ -118,6 +121,9 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "user", "k8s.dask.org/node-purpose": "scheduler" }, + tags+: { + "2i2c:node-purpose": "user" + }, taints+: { "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule" @@ -136,6 +142,9 @@ local daskNodes = [ labels+: { "k8s.dask.org/node-purpose": "worker" }, + tags+: { + "2i2c:node-purpose": "worker" + }, taints+: { "k8s.dask.org_dedicated" : "worker:NoSchedule", "k8s.dask.org/dedicated" : "worker:NoSchedule" From 8b4b2e2bde32d7addb8f112cc84dcd4d027b659a Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:15:15 +0100 Subject: [PATCH 09/14] Put each hub on it's own nodegroup(s) with the hub-name tag --- eksctl/nasa-veda.jsonnet | 53 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index ec43bb75d..baef7e44a 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -25,10 +25,44 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge" }, + { instanceType: "r5.xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade - { instanceType: "r5.4xlarge" }, - { instanceType: "r5.16xlarge" }, + { + instanceType: "r5.xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" } + }, + { + instanceType: "r5.xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" } + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" } + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" } + }, { instanceType: "r5.xlarge", namePrefix: "nb-binder", @@ -51,7 +85,18 @@ local daskNodes = [ // A not yet fully established policy is being developed about using a single // node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. // - { instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }}, + { + namePrefix: "dask-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, + { + namePrefix: "dask-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } + }, ]; From 3cbe9385867543b7e6ce8249201dbad9b2d50bc5 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:15:47 +0100 Subject: [PATCH 10/14] Remove taint from binder nodegroup --- eksctl/nasa-veda.jsonnet | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index baef7e44a..b20e91631 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -67,11 +67,8 @@ local notebookNodes = [ instanceType: "r5.xlarge", namePrefix: "nb-binder", labels+: { "2i2c/hub-name": "binder" }, - tags+: { "2i2c:hub-name": "binder" }, - taints+: { - "2i2c/hub-name": "binder:NoSchedule" - }, - }, + tags+: { "2i2c:hub-name": "binder" } + } ]; local daskNodes = [ From ae188cc30e055a272237792a6066d6c2062a6b23 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:15:56 +0100 Subject: [PATCH 11/14] Cycle core nodegroup --- eksctl/nasa-veda.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index b20e91631..542e49447 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -132,7 +132,7 @@ local daskNodes = [ nodeGroups: [ ng + { namePrefix: 'core', - nameSuffix: 'b', + nameSuffix: 'a', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { From 1db4e11fcdc9374dc5329b0cbb4c02afb4ee97f0 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:16:33 +0100 Subject: [PATCH 12/14] Add hub-name specific labels to nodeselectors --- config/clusters/nasa-veda/binder.values.yaml | 1 + config/clusters/nasa-veda/prod.values.yaml | 15 +++++++++++++++ config/clusters/nasa-veda/staging.values.yaml | 14 ++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/config/clusters/nasa-veda/binder.values.yaml b/config/clusters/nasa-veda/binder.values.yaml index 691e35a89..80e418b2a 100644 --- a/config/clusters/nasa-veda/binder.values.yaml +++ b/config/clusters/nasa-veda/binder.values.yaml @@ -113,6 +113,7 @@ binderhub-service: # Schedule builder pods to run on the smallest user nodes only # https://github.com/2i2c-org/infrastructure/issues/4241 node.kubernetes.io/instance-type: r5.xlarge + 2i2c/hub-name: "binder" BinderHub: base_url: / hub_url: https://hub.binder.nasa-veda.2i2c.cloud diff --git a/config/clusters/nasa-veda/prod.values.yaml b/config/clusters/nasa-veda/prod.values.yaml index 24d6bc29f..ebf0c5502 100644 --- a/config/clusters/nasa-veda/prod.values.yaml +++ b/config/clusters/nasa-veda/prod.values.yaml @@ -12,7 +12,22 @@ basehub: homepage: gitRepoBranch: "master" gitRepoUrl: "https://github.com/NASA-IMPACT/veda-hub-homepage" + singleuser: + nodeSelector: + 2i2c/hub-name: prod hub: config: GitHubOAuthenticator: oauth_callback_url: https://hub.openveda.cloud/hub/oauth_callback + +dask-gateway: + gateway: + backend: + scheduler: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: prod + worker: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: prod diff --git a/config/clusters/nasa-veda/staging.values.yaml b/config/clusters/nasa-veda/staging.values.yaml index 1ec06f776..18c9622b5 100644 --- a/config/clusters/nasa-veda/staging.values.yaml +++ b/config/clusters/nasa-veda/staging.values.yaml @@ -4,6 +4,8 @@ basehub: eks.amazonaws.com/role-arn: arn:aws:iam::444055461661:role/nasa-veda-staging jupyterhub: singleuser: + nodeSelector: + 2i2c/hub-name: staging initContainers: - &volume_ownership_fix_initcontainer name: volume-mount-ownership-fix @@ -39,3 +41,15 @@ basehub: homepage: gitRepoBranch: "staging" gitRepoUrl: "https://github.com/NASA-IMPACT/veda-hub-homepage" + +dask-gateway: + gateway: + backend: + scheduler: + extraPodConfig: + nodeSelector: + 2i2c/hub-name: staging + worker: + extraPodConfig: + node_selector: + 2i2c/hub-name: staging From 5a905090a2df06938a27cd1ded54c9c30db02d1a Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Fri, 9 Aug 2024 13:16:47 +0100 Subject: [PATCH 13/14] Remove toleration for taints that no longer exist --- config/clusters/nasa-veda/binder.values.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config/clusters/nasa-veda/binder.values.yaml b/config/clusters/nasa-veda/binder.values.yaml index 80e418b2a..61f9687b7 100644 --- a/config/clusters/nasa-veda/binder.values.yaml +++ b/config/clusters/nasa-veda/binder.values.yaml @@ -32,11 +32,6 @@ jupyterhub: singleuser: nodeSelector: 2i2c/hub-name: "binder" - extraTolerations: - - key: 2i2c/hub-name - operator: Equal - value: binder - effect: NoSchedule memory: guarantee: 1G limit: 2G From 2e5cbaa165bfb00ebd653e4e4129335b8b6a1c4f Mon Sep 17 00:00:00 2001 From: Sarah Gibson <44771837+sgibson91@users.noreply.github.com> Date: Fri, 9 Aug 2024 13:18:25 +0100 Subject: [PATCH 14/14] Clarify comment --- eksctl/nasa-veda.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eksctl/nasa-veda.jsonnet b/eksctl/nasa-veda.jsonnet index 542e49447..a62b37989 100644 --- a/eksctl/nasa-veda.jsonnet +++ b/eksctl/nasa-veda.jsonnet @@ -25,8 +25,8 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade - { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade + { instanceType: "r5.xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent + { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent { instanceType: "r5.xlarge", namePrefix: "nb-staging",