Skip to content

Commit

Permalink
Merge pull request #2974 from consideRatio/pr/towards-single-dask-wor…
Browse files Browse the repository at this point in the history
…ker-node-pool

gcp/aws, dask worker nodes: towards single r5.4xlarge/n2-highmem-16 dask worker node pool
  • Loading branch information
consideRatio authored Aug 23, 2023
2 parents 2d910a1 + 19e15ed commit 2132301
Show file tree
Hide file tree
Showing 20 changed files with 138 additions and 65 deletions.
4 changes: 4 additions & 0 deletions eksctl/2i2c-aws-us.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
21 changes: 11 additions & 10 deletions eksctl/carbonplan.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,18 @@ local notebookNodes = [
},
];

// Node definitions for dask worker nodes. Config here is merged
// with our dask worker node definition, which uses spot instances.
// A `node.kubernetes.io/instance-type label is set to the name of the
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
local daskNodes = [
{ instancesDistribution+: { instanceTypes: ["r5.large"] }},
{ instancesDistribution+: { instanceTypes: ["r5.xlarge"] }},
{ instancesDistribution+: { instanceTypes: ["r5.2xlarge"] }},
{ instancesDistribution+: { instanceTypes: ["r5.8xlarge"] }},
// Node definitions for dask worker nodes. Config here is merged
// with our dask worker node definition, which uses spot instances.
// A `node.kubernetes.io/instance-type label is set to the name of the
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

{
Expand Down
4 changes: 4 additions & 0 deletions eksctl/gridsst.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
8 changes: 5 additions & 3 deletions eksctl/jupyter-meets-the-earth.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
{ instancesDistribution+: { instanceTypes: ["m5.large"] }},
{ instancesDistribution+: { instanceTypes: ["m5.4xlarge"] }},
{ instancesDistribution+: { instanceTypes: ["m5.16xlarge"] }},
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];


Expand Down
4 changes: 4 additions & 0 deletions eksctl/nasa-cryo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
4 changes: 4 additions & 0 deletions eksctl/nasa-ghg.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
4 changes: 4 additions & 0 deletions eksctl/nasa-veda.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
6 changes: 4 additions & 2 deletions eksctl/openscapes.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
{ instancesDistribution+: { instanceTypes: ["r5.xlarge"] }},
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
{ instancesDistribution+: { instanceTypes: ["r5.16xlarge"] }},
];


Expand Down
4 changes: 4 additions & 0 deletions eksctl/smithsonian.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
4 changes: 4 additions & 0 deletions eksctl/template.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];
<% else %>
Expand Down
4 changes: 4 additions & 0 deletions eksctl/victor.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ local daskNodes = [
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];

Expand Down
5 changes: 5 additions & 0 deletions terraform/gcp/projects/awi-ciroh.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"medium" : {
min : 0,
Expand Down
7 changes: 6 additions & 1 deletion terraform/gcp/projects/cloudbank.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"worker" : {
min : 0,
max : 100,
machine_type : "n1-highmem-4"
machine_type : "n2-highmem-16"
},
}

Expand Down
7 changes: 6 additions & 1 deletion terraform/gcp/projects/daskhub-template.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,13 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"medium" : {
"worker" : {
min : 0,
max : 200,
machine_type : "n2-highmem-16",
Expand Down
5 changes: 5 additions & 0 deletions terraform/gcp/projects/leap.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"medium" : {
min : 0,
Expand Down
11 changes: 8 additions & 3 deletions terraform/gcp/projects/linked-earth.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,17 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"medium" : {
"worker" : {
min : 0,
max : 100,
machine_type : "e2-highmem-16"
},
machine_type : "n2-highmem-16",
}
}

hub_cloud_permissions = {
Expand Down
26 changes: 8 additions & 18 deletions terraform/gcp/projects/m2lines.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,17 @@ notebook_nodes = {
}
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"small" : {
min : 0,
max : 100,
machine_type : "n1-standard-2",
},
"medium" : {
"worker" : {
min : 0,
max : 100,
machine_type : "n1-standard-4",
},
"large" : {
min : 0,
max : 100,
machine_type : "n1-standard-8",
},
"huge" : {
min : 0,
max : 100,
machine_type : "n1-standard-16",
},
machine_type : "n2-highmem-16",
}
}

hub_cloud_permissions = {
Expand Down
34 changes: 9 additions & 25 deletions terraform/gcp/projects/meom-ige.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -40,33 +40,17 @@ notebook_nodes = {

}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"small" : {
min : 0,
max : 20,
machine_type : "n1-standard-2",
},
"medium" : {
"worker" : {
min : 0,
max : 20,
machine_type : "n1-standard-8",
},
"large" : {
min : 0,
max : 20,
machine_type : "n1-standard-16",
},
"very-large" : {
min : 0,
max : 20,
machine_type : "n1-standard-32",
},
"huge" : {
min : 0,
max : 20,
machine_type : "n1-standard-64",
},

max : 100,
machine_type : "n2-highmem-16",
}
}

user_buckets = {
Expand Down
32 changes: 32 additions & 0 deletions terraform/gcp/projects/pangeo-hubs.tfvars
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# SETTING UP TO WORK WITH THIS FILE:
# -------------------------------------------------------------------------------
#
# The terraform state associated with this file is stored in a dedicated GCP
# bucket, so in order to work with this file you need to do the following after
# clearing a local .terraform folder.
#
# terraform init -backend-config backends/pangeo-backend.hcl
# terraform workspace list
# terraform workspace select <...>
#
# The GCP project having the bucket is https://console.cloud.google.com/?project=columbia
#

prefix = "pangeo-hubs"
project_id = "pangeo-integration-te-3eea"
zone = "us-central1-b"
Expand Down Expand Up @@ -75,6 +89,24 @@ notebook_nodes = {
},
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
# TODO: Transition to a single n2-highmem-16 worker node pool to be able to
# provide standardized worker pod config for all daskhubs.
#
# Tracked in https://github.com/2i2c-org/infrastructure/issues/2687
#
# The node pool to setup should look like this:
#
# "worker" : {
# min : 0,
# max : 100,
# machine_type : "n2-highmem-16",
# },
#
dask_nodes = {
"small" : {
min : 0,
Expand Down
9 changes: 7 additions & 2 deletions terraform/gcp/projects/pilot-hubs.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,17 @@ notebook_nodes = {
}
}

# Setup a single node pool for dask workers.
#
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
#
dask_nodes = {
"worker" : {
min : 0,
max : 100,
machine_type : "n1-highmem-4",
}
machine_type : "n2-highmem-16",
},
}

user_buckets = {}
Expand Down

0 comments on commit 2132301

Please sign in to comment.