add scaling benchmark

Signed-off-by: Dmitry Shmulevich <[email protected]>
NVIDIA · Aug 13, 2024 · 021dc94 · 021dc94
1 parent 28095af
commit 021dc94
Show file tree

Hide file tree

Showing 19 changed files with 412 additions and 154 deletions.
diff --git a/resources/benchmarks/README.md b/resources/benchmarks/README.md
@@ -0,0 +1,53 @@
+# Benchmark Tests
+
+This directory contains benchmark tests for the following workload managers and schedulers:
+
+- Kueue
+- Volcano
+- Yunikorn
+- Run:ai
+
+The benchmark tests involve submitting workloads intended to evaluate the scheduler's performance under specific scenarios.
+
+These workloads are designed to fully utilize the cluster under optimal scheduling conditions.
+
+One approach to benchmarking is to run this workload on clusters with different schedulers and then compare the average GPU occupancy of the nodes.
+
+For all workload managers except Run:ai, the benchmark test involves two sequential workflows. The first workflow registers the CRDs, and the second workflow runs the common part of the test.
+Run:ai requires additional customization and thus has a separate workflow
+
+## Gang Scheduling Benchmark Test
+
+The gang-scheduling benchmark workflow operates on 32 virtual GPU nodes, submitting a burst of 53 jobs with replica numbers ranging from 1 to 32 in a [predetermined order](gang-scheduling/workflows/run-test-common.yml).
+
+#### Example
+
+To run the benchmark test for Kueue:
+
+```bash
+./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-kueue.yml,run-test-common.yml}'
+```
+
+#### Run:ai
+
+```bash
+./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
+```
+
+## Scaling Benchmark Test
+
+The scaling benchmark workflow operates on 500 virtual GPU nodes, submitting [two workloads](workflows/run-test-common.yml) one after another. The first workload is a job with 500 replicas, the second workload is 500 single node jobs started simultaneously.
+
+### Example
+
+To run the benchmark test for Volcano:
+
+```bash
+./bin/knavigator -workflow 'resources/benchmarks/scaling/workflows/{config-volcano.yml,run-test-common.yml}'
+```
+
+### Run:ai
+
+```bash
+./bin/knavigator -workflow resources/benchmarks/scaling/workflows/run-test-runai.yml
+```
diff --git a/resources/benchmarks/gang-scheduling/README.md b/resources/benchmarks/gang-scheduling/README.md
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-jobset.yml b/resources/benchmarks/gang-scheduling/workflows/config-jobset.yml
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-kueue.yml b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yml
@@ -38,6 +38,7 @@ tasks:
       flavor: gpu-node
       cpu: 8
       memory: 36Gi
+      pods: 32
       gpu: 256
 - id: create-local-queue
   type: SubmitObj
@@ -48,3 +49,80 @@ tasks:
       name: team-queue
       namespace: default
       clusterQueue: team
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: kueue-manager-config
+      namespace: kueue-system
+      op: create
+      data:
+        controller_manager_config.yaml: |
+          apiVersion: config.kueue.x-k8s.io/v1beta1
+          kind: Configuration
+          health:
+            healthProbeBindAddress: :8081
+          metrics:
+            bindAddress: :8080
+          # enableClusterQueueResources: true
+          webhook:
+            port: 9443
+          leaderElection:
+            leaderElect: true
+            resourceName: c1f6bfd2.kueue.x-k8s.io
+          controller:
+            groupKindConcurrency:
+              Job.batch: 5
+              Pod: 5
+              Workload.kueue.x-k8s.io: 5
+              LocalQueue.kueue.x-k8s.io: 1
+              ClusterQueue.kueue.x-k8s.io: 1
+              ResourceFlavor.kueue.x-k8s.io: 1
+          clientConnection:
+            qps: 50
+            burst: 100
+          #pprofBindAddress: :8083
+          waitForPodsReady:
+            enable: true
+            timeout: 5m
+            blockAdmission: true
+            requeuingStrategy:
+              timestamp: Eviction
+              backoffLimitCount: null # null indicates infinite requeuing
+              backoffBaseSeconds: 60
+              backoffMaxSeconds: 3600
+          #manageJobsWithoutQueueName: true
+          #internalCertManagement:
+          #  enable: false
+          #  webhookServiceName: ""
+          #  webhookSecretName: ""
+          integrations:
+            frameworks:
+            - "batch/job"
+            - "kubeflow.org/mpijob"
+            - "ray.io/rayjob"
+            - "ray.io/raycluster"
+            - "jobset.x-k8s.io/jobset"
+            - "kubeflow.org/mxjob"
+            - "kubeflow.org/paddlejob"
+            - "kubeflow.org/pytorchjob"
+            - "kubeflow.org/tfjob"
+            - "kubeflow.org/xgboostjob"
+          #  - "pod"
+          #  externalFrameworks:
+          #  - "Foo.v1.example.com"
+          #  podOptions:
+          #    namespaceSelector:
+          #      matchExpressions:
+          #        - key: kubernetes.io/metadata.name
+          #          operator: NotIn
+          #          values: [ kube-system, kueue-system ]
+          #fairSharing:
+          #  enable: true
+          #  preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
+          #resources:
+          #  excludeResourcePrefixes: []
+    deploymentRestarts:
+    - namespace: kueue-system
+      name: kueue-controller-manager
+    timeout: 1m
diff --git a/resources/benchmarks/scaling/workflows/config-kueue.yml b/resources/benchmarks/scaling/workflows/config-kueue.yml
@@ -0,0 +1,128 @@
+name: config-kueue
+tasks:
+- id: register-cluster-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/cluster-queue.yml"
+- id: register-local-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/local-queue.yml"
+- id: register-resource-flavor
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/resource-flavor.yml"
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/kueue/job.yml"
+    nameFormat: "job{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-[0-9]-.*"
+    podCount: "{{.replicas}}"
+- id: create-resource-flavor
+  type: SubmitObj
+  params:
+    refTaskId: register-resource-flavor
+    canExist: true
+    params:
+      name: "gpu-node"
+      nodeLabels:
+        nvidia.com/gpu.count: "8"
+- id: create-cluster-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-cluster-queue
+    canExist: true
+    params:
+      name: team
+      flavor: gpu-node
+      cpu: 50
+      memory: 360Gi
+      pods: 500
+      gpu: 4000
+- id: create-local-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-local-queue
+    canExist: true
+    params:
+      name: team-queue
+      namespace: default
+      clusterQueue: team
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: kueue-manager-config
+      namespace: kueue-system
+      op: create
+      data:
+        controller_manager_config.yaml: |
+          apiVersion: config.kueue.x-k8s.io/v1beta1
+          kind: Configuration
+          health:
+            healthProbeBindAddress: :8081
+          metrics:
+            bindAddress: :8080
+          # enableClusterQueueResources: true
+          webhook:
+            port: 9443
+          leaderElection:
+            leaderElect: true
+            resourceName: c1f6bfd2.kueue.x-k8s.io
+          controller:
+            groupKindConcurrency:
+              Job.batch: 5
+              Pod: 5
+              Workload.kueue.x-k8s.io: 5
+              LocalQueue.kueue.x-k8s.io: 1
+              ClusterQueue.kueue.x-k8s.io: 1
+              ResourceFlavor.kueue.x-k8s.io: 1
+          clientConnection:
+            qps: 50
+            burst: 100
+          #pprofBindAddress: :8083
+          waitForPodsReady:
+            enable: true
+            timeout: 5m
+            blockAdmission: true
+            requeuingStrategy:
+              timestamp: Eviction
+              backoffLimitCount: null # null indicates infinite requeuing
+              backoffBaseSeconds: 60
+              backoffMaxSeconds: 3600
+          #manageJobsWithoutQueueName: true
+          #internalCertManagement:
+          #  enable: false
+          #  webhookServiceName: ""
+          #  webhookSecretName: ""
+          integrations:
+            frameworks:
+            - "batch/job"
+            - "kubeflow.org/mpijob"
+            - "ray.io/rayjob"
+            - "ray.io/raycluster"
+            - "jobset.x-k8s.io/jobset"
+            - "kubeflow.org/mxjob"
+            - "kubeflow.org/paddlejob"
+            - "kubeflow.org/pytorchjob"
+            - "kubeflow.org/tfjob"
+            - "kubeflow.org/xgboostjob"
+          #  - "pod"
+          #  externalFrameworks:
+          #  - "Foo.v1.example.com"
+          #  podOptions:
+          #    namespaceSelector:
+          #      matchExpressions:
+          #        - key: kubernetes.io/metadata.name
+          #          operator: NotIn
+          #          values: [ kube-system, kueue-system ]
+          #fairSharing:
+          #  enable: true
+          #  preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
+          #resources:
+          #  excludeResourcePrefixes: []
+    deploymentRestarts:
+    - namespace: kueue-system
+      name: kueue-controller-manager
+    timeout: 1m
diff --git a/resources/benchmarks/scaling/workflows/config-volcano.yml b/resources/benchmarks/scaling/workflows/config-volcano.yml
@@ -0,0 +1,31 @@
+name: config-volcano
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/volcano/job.yml"
+    nameFormat: "j{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-test-[0-9]+"
+    podCount: "{{.replicas}}"
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: volcano-scheduler-configmap
+      namespace: volcano-system
+      op: create
+      data:
+        volcano-scheduler.conf: |
+          actions: "enqueue, allocate, backfill"
+          tiers:
+          - plugins:
+            - name: priority
+            - name: gang
+            - name: conformance
+          - plugins:
+            - name: drf
+            - name: predicates
+            - name: proportion
+            - name: nodeorder
+            - name: binpack
+    timeout: 1m
diff --git a/resources/benchmarks/scaling/workflows/config-yunikorn.yml b/resources/benchmarks/scaling/workflows/config-yunikorn.yml
@@ -0,0 +1,29 @@
+name: config-yunikorn
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/yunikorn/job.yml"
+    nameFormat: "job{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-.*"
+    podCount: "{{.replicas}}"
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: yunikorn-configs
+      namespace: yunikorn
+      op: create
+      data:
+        queues.yaml: |
+          partitions:
+            - name: default
+              queues:
+              - name: root
+                queues:
+                - name: sandbox
+                  submitacl: '*'
+                  resources:
+                    max:
+                      {memory: 360Gi, vcore: 50000m, nvidia.com/gpu: 4000}
+    timeout: 1m