From 92b1dd124d55d3394820e4b9a5b70a6fdc8cae27 Mon Sep 17 00:00:00 2001 From: Yuki Iwai Date: Wed, 28 Aug 2024 07:53:44 +0900 Subject: [PATCH] WIP Signed-off-by: Yuki Iwai --- cmd/training-operator.v2alpha1/main.go | 23 +- go.mod | 66 ++-- go.sum | 155 ++++----- hack/swagger/go.mod | 28 +- hack/swagger/go.sum | 89 +++--- pkg/controller.v2/setup.go | 10 +- pkg/controller.v2/trainjob_controller.go | 16 +- pkg/runtime.v2/core/clustertrainingruntime.go | 58 ++++ pkg/runtime.v2/core/registry.go | 36 +++ pkg/runtime.v2/core/runtime.go | 41 +++ pkg/runtime.v2/core/trainingruntime.go | 129 ++++++++ pkg/runtime.v2/framework/builder/jobset.go | 67 ++++ pkg/runtime.v2/framework/core/framework.go | 160 ++++++++++ .../framework/core/framework_test.go | 81 +++++ pkg/runtime.v2/framework/interface.go | 66 ++++ .../plugins/coscheduling/coscheduling.go | 302 ++++++++++++++++++ .../plugins/coscheduling/coscheduling_test.go | 1 + .../framework/plugins/coscheduling/indexer.go | 56 ++++ .../framework/plugins/jobset/jobset.go | 78 +++++ pkg/runtime.v2/framework/plugins/mpi/mpi.go | 62 ++++ .../framework/plugins/plainml/plainml.go | 58 ++++ .../framework/plugins/plainml/plainml_test.go | 1 + pkg/runtime.v2/framework/plugins/registry.go | 43 +++ .../framework/plugins/torch/torch.go | 58 ++++ pkg/runtime.v2/indexer/indexer.go | 45 +++ pkg/runtime.v2/interface.go | 33 ++ pkg/util.v2/testing/client.go | 43 +++ pkg/util.v2/testing/wrapper.go | 104 ++++++ pkg/webhook.v2/webhook.go | 8 +- test/integration/framework/framework.go | 8 +- 30 files changed, 1727 insertions(+), 198 deletions(-) create mode 100644 pkg/runtime.v2/core/clustertrainingruntime.go create mode 100644 pkg/runtime.v2/core/registry.go create mode 100644 pkg/runtime.v2/core/runtime.go create mode 100644 pkg/runtime.v2/core/trainingruntime.go create mode 100644 pkg/runtime.v2/framework/builder/jobset.go create mode 100644 pkg/runtime.v2/framework/core/framework.go create mode 100644 pkg/runtime.v2/framework/core/framework_test.go create mode 100644 pkg/runtime.v2/framework/interface.go create mode 100644 pkg/runtime.v2/framework/plugins/coscheduling/coscheduling.go create mode 100644 pkg/runtime.v2/framework/plugins/coscheduling/coscheduling_test.go create mode 100644 pkg/runtime.v2/framework/plugins/coscheduling/indexer.go create mode 100644 pkg/runtime.v2/framework/plugins/jobset/jobset.go create mode 100644 pkg/runtime.v2/framework/plugins/mpi/mpi.go create mode 100644 pkg/runtime.v2/framework/plugins/plainml/plainml.go create mode 100644 pkg/runtime.v2/framework/plugins/plainml/plainml_test.go create mode 100644 pkg/runtime.v2/framework/plugins/registry.go create mode 100644 pkg/runtime.v2/framework/plugins/torch/torch.go create mode 100644 pkg/runtime.v2/indexer/indexer.go create mode 100644 pkg/runtime.v2/interface.go create mode 100644 pkg/util.v2/testing/client.go create mode 100644 pkg/util.v2/testing/wrapper.go diff --git a/cmd/training-operator.v2alpha1/main.go b/cmd/training-operator.v2alpha1/main.go index 08bb9d4791..ab816b450a 100644 --- a/cmd/training-operator.v2alpha1/main.go +++ b/cmd/training-operator.v2alpha1/main.go @@ -17,15 +17,17 @@ limitations under the License. package main import ( + "context" "crypto/tls" "errors" "flag" + runtimecore "github.com/kubeflow/training-operator/pkg/runtime.v2/core" "net/http" "os" zaplog "go.uber.org/zap" "go.uber.org/zap/zapcore" - "k8s.io/apimachinery/pkg/runtime" + apiruntime "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" @@ -42,7 +44,7 @@ import ( ) var ( - scheme = runtime.NewScheme() + scheme = apiruntime.NewScheme() setupLog = ctrl.Log.WithName("setup") ) @@ -127,27 +129,34 @@ func main() { os.Exit(1) } + ctx := ctrl.SetupSignalHandler() + setupProbeEndpoints(mgr, certsReady) // Set up controllers using goroutines to start the manager quickly. - go setupControllers(mgr, certsReady) + go setupControllers(ctx, mgr, certsReady) setupLog.Info("Starting manager") - if err = mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err = mgr.Start(ctx); err != nil { setupLog.Error(err, "Could not run manager") os.Exit(1) } } -func setupControllers(mgr ctrl.Manager, certsReady <-chan struct{}) { +func setupControllers(ctx context.Context, mgr ctrl.Manager, certsReady <-chan struct{}) { setupLog.Info("Waiting for certificate generation to complete") <-certsReady setupLog.Info("Certs ready") - if failedCtrlName, err := controllerv2.SetupControllers(mgr); err != nil { + runtimes, err := runtimecore.New(ctx, mgr.GetClient(), mgr.GetRESTMapper(), mgr.GetFieldIndexer(), mgr.GetScheme()) + if err != nil { + setupLog.Error(err, "Could not initialize runtimes") + os.Exit(1) + } + if failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes); err != nil { setupLog.Error(err, "Could not create controller", "controller", failedCtrlName) os.Exit(1) } - if failedWebhook, err := webhookv2.Setup(mgr); err != nil { + if failedWebhook, err := webhookv2.Setup(mgr, runtimes); err != nil { setupLog.Error(err, "Could not create webhook", "webhook", failedWebhook) os.Exit(1) } diff --git a/go.mod b/go.mod index eb2f0afcbc..f55eda8536 100644 --- a/go.mod +++ b/go.mod @@ -3,24 +3,25 @@ module github.com/kubeflow/training-operator go 1.22 require ( - github.com/go-logr/logr v1.4.1 + github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 - github.com/onsi/ginkgo/v2 v2.17.1 - github.com/onsi/gomega v1.32.0 + github.com/onsi/ginkgo/v2 v2.19.0 + github.com/onsi/gomega v1.33.1 github.com/open-policy-agent/cert-controller v0.10.1 github.com/prometheus/client_golang v1.18.0 - github.com/sirupsen/logrus v1.9.0 + github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.27.0 - k8s.io/api v0.29.3 - k8s.io/apimachinery v0.29.3 - k8s.io/client-go v0.29.3 - k8s.io/code-generator v0.29.3 - k8s.io/klog/v2 v2.110.1 + k8s.io/api v0.29.5 + k8s.io/apimachinery v0.29.5 + k8s.io/client-go v0.29.5 + k8s.io/code-generator v0.29.5 + k8s.io/klog/v2 v2.120.1 k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 - k8s.io/utils v0.0.0-20230726121419-3b25d923346b + k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 sigs.k8s.io/controller-runtime v0.17.3 sigs.k8s.io/jobset v0.5.2 + sigs.k8s.io/kueue v0.6.3 sigs.k8s.io/scheduler-plugins v0.28.9 sigs.k8s.io/yaml v1.4.0 volcano.sh/apis v1.9.0 @@ -28,24 +29,24 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch v5.9.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.8.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/zapr v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect - github.com/google/uuid v1.3.1 // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -56,30 +57,29 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect - golang.org/x/mod v0.16.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.19.0 // indirect + golang.org/x/exp v0.0.0-20240530194437-404ba88c7ed0 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/oauth2 v0.20.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + golang.org/x/time v0.5.0 // indirect + golang.org/x/tools v0.21.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.33.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.29.2 // indirect - k8s.io/component-base v0.29.2 // indirect - k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect + k8s.io/component-base v0.29.5 // indirect + k8s.io/gengo v0.0.0-20240404160639-a0386bf69313 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/go.sum b/go.sum index da8a571436..7e8820b975 100644 --- a/go.sum +++ b/go.sum @@ -1,41 +1,35 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro= github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= @@ -48,11 +42,10 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/ github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -62,7 +55,6 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -80,10 +72,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= -github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= -github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/open-policy-agent/cert-controller v0.10.1 h1:RXSYoyn8FdCenWecRP//UV5nbVfmstNpj4kHQFkvPK4= github.com/open-policy-agent/cert-controller v0.10.1/go.mod h1:4uRbBLY5DsPOog+a9pqk3JLxuuhrWsbUedQW65HcLTI= github.com/open-policy-agent/frameworks/constraint v0.0.0-20230822235116-f0b62fe1e4c4 h1:5dum5SLEz+95JDLkMls7Z7IDPjvSq3UhJSFe4f5einQ= @@ -94,27 +86,21 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= -github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -130,57 +116,54 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= -golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/exp v0.0.0-20240530194437-404ba88c7ed0 h1:Mi0bCswbz+9cXmwFAdxoo5GPFMKONUpua6iUdtQS7lk= +golang.org/x/exp v0.0.0-20240530194437-404ba88c7ed0/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= -golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= -golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/oauth2 v0.20.0 h1:4mQdhULixXKP1rwYBW0vAijoXnkTG0BLCDRzfe1idMo= +golang.org/x/oauth2 v0.20.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= -golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= @@ -193,35 +176,37 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= +k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI= +k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ= k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg= k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg= -k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0= -k8s.io/code-generator v0.29.3 h1:m7E25/t9R9NvejspO2zBdyu+/Gl0Z5m7dCRc680KS14= -k8s.io/code-generator v0.29.3/go.mod h1:x47ofBhN4gxYFcxeKA1PYXeaPreAGaDN85Y/lNUsPoM= -k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8= -k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM= -k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 h1:pWEwq4Asjm4vjW7vcsmijwBhOr1/shsbSYiWXmNGlks= -k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4= +k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= +k8s.io/client-go v0.29.5 h1:nlASXmPQy190qTteaVP31g3c/wi2kycznkTP7Sv1zPc= +k8s.io/client-go v0.29.5/go.mod h1:aY5CnqUUvXYccJhm47XHoPcRyX6vouHdIBHaKZGTbK4= +k8s.io/code-generator v0.29.5 h1:WqSdBPVV1B3jsPnKtPS39U02zj6Q7+FsjhAj1EPBJec= +k8s.io/code-generator v0.29.5/go.mod h1:7TYnI0dYItL2cKuhhgPSuF3WED9uMdELgbVXFfn/joE= +k8s.io/component-base v0.29.5 h1:Ptj8AzG+p8c2a839XriHwxakDpZH9uvIgYz+o1agjg8= +k8s.io/component-base v0.29.5/go.mod h1:9nBUoPxW/yimISIgAG7sJDrUGJlu7t8HnDafIrOdU8Q= +k8s.io/gengo v0.0.0-20240404160639-a0386bf69313 h1:wBIDZID8ju9pwOiLlV22YYKjFGtiNSWgHf5CnKLRUuM= +k8s.io/gengo v0.0.0-20240404160639-a0386bf69313/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-aggregator v0.28.1 h1:rvG4llYnQKHjj6YjjoBPEJxfD1uH0DJwkrJTNKGAaCs= k8s.io/kube-aggregator v0.28.1/go.mod h1:JaLizMe+AECSpO2OmrWVsvnG0V3dX1RpW+Wq/QHbu18= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= sigs.k8s.io/jobset v0.5.2 h1:276q5Pi/ErLYj+GQ0ydEXR6tx3LwBhEzHLQv+k8bYF4= sigs.k8s.io/jobset v0.5.2/go.mod h1:Vg99rj/6OoGvy1uvywGEHOcVLCWWJYkJtisKqdWzcFw= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/kueue v0.6.3 h1:PmccdKPDFQIaAboyuSG6M0w6hXtxVA51RV+DjCUtBtQ= +sigs.k8s.io/kueue v0.6.3/go.mod h1:rliYfK/K7pJ7CT4ReV1szzciNkAo3sBn5Bmr5Sn6uCY= sigs.k8s.io/scheduler-plugins v0.28.9 h1:1/bXRoXuSUFr1FLqxrzScdyZMl/G1psuDJcDKYxTo+Q= sigs.k8s.io/scheduler-plugins v0.28.9/go.mod h1:32+kIPGT0aTRsEDzKNga7zCbcCHK0dSk5UFCY+gzCLE= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= diff --git a/hack/swagger/go.mod b/hack/swagger/go.mod index f45fedaf0b..6635b8079c 100644 --- a/hack/swagger/go.mod +++ b/hack/swagger/go.mod @@ -4,18 +4,18 @@ go 1.22 require ( github.com/kubeflow/training-operator v0.0.0-00010101000000-000000000000 - k8s.io/klog/v2 v2.110.1 + k8s.io/klog/v2 v2.120.1 k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 ) replace github.com/kubeflow/training-operator => ../../ require ( - github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/go-logr/logr v1.4.1 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.3 // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect @@ -25,17 +25,17 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/sirupsen/logrus v1.9.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/protobuf v1.33.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.29.3 // indirect - k8s.io/apimachinery v0.29.3 // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + k8s.io/api v0.29.5 // indirect + k8s.io/apimachinery v0.29.5 // indirect + k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 // indirect sigs.k8s.io/controller-runtime v0.17.3 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/hack/swagger/go.sum b/hack/swagger/go.sum index 307010aea6..7980dca80f 100644 --- a/hack/swagger/go.sum +++ b/hack/swagger/go.sum @@ -1,20 +1,19 @@ -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= @@ -27,19 +26,16 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -49,26 +45,21 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8= -github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= -github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= -github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -82,8 +73,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -91,24 +82,24 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= -golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -120,16 +111,16 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI= +k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ= +k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4= +k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= diff --git a/pkg/controller.v2/setup.go b/pkg/controller.v2/setup.go index 79e89fa0c5..e2fadd3a96 100644 --- a/pkg/controller.v2/setup.go +++ b/pkg/controller.v2/setup.go @@ -16,13 +16,17 @@ limitations under the License. package controllerv2 -import ctrl "sigs.k8s.io/controller-runtime" +import ( + ctrl "sigs.k8s.io/controller-runtime" -func SetupControllers(mgr ctrl.Manager) (string, error) { + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +func SetupControllers(mgr ctrl.Manager, runtimes map[string]runtime.Runtime) (string, error) { if err := NewTrainJobReconciler( mgr.GetClient(), mgr.GetEventRecorderFor("training-operator-trainjob-controller"), - ).SetupWithManager(mgr); err != nil { + ).SetupWithManager(mgr, runtimes); err != nil { return "TrainJob", err } return "", nil diff --git a/pkg/controller.v2/trainjob_controller.go b/pkg/controller.v2/trainjob_controller.go index e12cc3c2d7..ef2f3242ce 100644 --- a/pkg/controller.v2/trainjob_controller.go +++ b/pkg/controller.v2/trainjob_controller.go @@ -26,6 +26,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" ) type TrainJobReconciler struct { @@ -53,8 +54,15 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c return ctrl.Result{}, nil } -func (r *TrainJobReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&kubeflowv2.TrainJob{}). - Complete(r) +func (r *TrainJobReconciler) SetupWithManager(mgr ctrl.Manager, runtimes map[string]runtime.Runtime) error { + b := ctrl.NewControllerManagedBy(mgr). + For(&kubeflowv2.TrainJob{}) + for _, run := range runtimes { + for _, registrar := range run.EventHandlerRegistrars() { + if registrar != nil { + b = registrar(b, mgr.GetClient()) + } + } + } + return b.Complete(r) } diff --git a/pkg/runtime.v2/core/clustertrainingruntime.go b/pkg/runtime.v2/core/clustertrainingruntime.go new file mode 100644 index 0000000000..01640dd845 --- /dev/null +++ b/pkg/runtime.v2/core/clustertrainingruntime.go @@ -0,0 +1,58 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + + "k8s.io/apimachinery/pkg/api/meta" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +type ClusterTrainingRuntime struct { + *TrainingRuntime +} + +var _ runtime.Runtime = (*ClusterTrainingRuntime)(nil) + +var ClusterTrainingRuntimeGroupKind = schema.GroupKind{ + Group: kubeflowv2.GroupVersion.Group, + Kind: "ClusterTrainingRuntime", +}.String() + +func NewClusterTrainingRuntime(context.Context, client.Client, meta.RESTMapper, client.FieldIndexer, *apiruntime.Scheme) (runtime.Runtime, error) { + return &ClusterTrainingRuntime{ + TrainingRuntime: trainingRuntimeFactory, + }, nil +} + +func (r *ClusterTrainingRuntime) NewObjects(ctx context.Context, trainJob *kubeflowv2.TrainJob) ([]client.Object, error) { + var clTrainingRuntime kubeflowv2.ClusterTrainingRuntime + if err := r.client.Get(ctx, client.ObjectKey{Name: trainJob.Spec.TrainingRuntimeRef.Name}, &clTrainingRuntime); err != nil { + return nil, err + } + return r.buildObjects(ctx, trainJob, clTrainingRuntime.Spec.Template, clTrainingRuntime.Spec.MLPolicy, clTrainingRuntime.Spec.PodGroupPolicy) +} + +func (r *ClusterTrainingRuntime) EventHandlerRegistrars() []runtime.ReconcilerBuilder { + return nil +} diff --git a/pkg/runtime.v2/core/registry.go b/pkg/runtime.v2/core/registry.go new file mode 100644 index 0000000000..003ec5b723 --- /dev/null +++ b/pkg/runtime.v2/core/registry.go @@ -0,0 +1,36 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + + "k8s.io/apimachinery/pkg/api/meta" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +type Registry map[string]func(ctx context.Context, client client.Client, restMapper meta.RESTMapper, indexer client.FieldIndexer, scheme *apiruntime.Scheme) (runtime.Runtime, error) + +func NewRuntimeRegistry() Registry { + return Registry{ + TrainingRuntimeGroupKind: NewTrainingRuntime, + ClusterTrainingRuntimeGroupKind: NewClusterTrainingRuntime, + } +} diff --git a/pkg/runtime.v2/core/runtime.go b/pkg/runtime.v2/core/runtime.go new file mode 100644 index 0000000000..64c15d2904 --- /dev/null +++ b/pkg/runtime.v2/core/runtime.go @@ -0,0 +1,41 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/api/meta" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +func New(ctx context.Context, client client.Client, restMapper meta.RESTMapper, indexer client.FieldIndexer, scheme *apiruntime.Scheme) (map[string]runtime.Runtime, error) { + registry := NewRuntimeRegistry() + runtimes := make(map[string]runtime.Runtime, len(registry)) + for name, factory := range registry { + r, err := factory(ctx, client, restMapper, indexer, scheme) + if err != nil { + return nil, fmt.Errorf("initializing runtime %q: %w", name, err) + } + runtimes[name] = r + } + return runtimes, nil +} diff --git a/pkg/runtime.v2/core/trainingruntime.go b/pkg/runtime.v2/core/trainingruntime.go new file mode 100644 index 0000000000..5039d12d11 --- /dev/null +++ b/pkg/runtime.v2/core/trainingruntime.go @@ -0,0 +1,129 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + "fmt" + runtimebuilder "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/builder" + fwkcore "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/core" + fwkplugins "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins" + collectionsidxer "github.com/kubeflow/training-operator/pkg/runtime.v2/indexer" + "maps" + + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +type TrainingRuntime struct { + framework *fwkcore.Framework + client client.Client + scheme *apiruntime.Scheme +} + +var TrainingRuntimeGroupKind = schema.GroupKind{ + Group: kubeflowv2.GroupVersion.Group, + Kind: "TrainingRuntime", +}.String() + +var _ runtime.Runtime = (*TrainingRuntime)(nil) + +var trainingRuntimeFactory *TrainingRuntime + +func NewTrainingRuntime(ctx context.Context, client client.Client, restMapper meta.RESTMapper, indexer client.FieldIndexer, scheme *apiruntime.Scheme) (runtime.Runtime, error) { + if err := indexer.IndexField(ctx, &kubeflowv2.TrainJob{}, collectionsidxer.TrainJobTrainingRuntimeRefKey, collectionsidxer.IndexTrainJobTrainingRuntimes); err != nil { + return nil, fmt.Errorf("setting index on TrainingRuntime and ClusterTrainigRuntime for TrainJob: %w", err) + } + fwk, err := fwkcore.New(ctx, client, restMapper, fwkplugins.NewRegistry(), indexer) + if err != nil { + return nil, err + } + trainingRuntimeFactory = &TrainingRuntime{ + framework: fwk, + client: client, + scheme: scheme, + } + return trainingRuntimeFactory, nil +} + +func (r *TrainingRuntime) NewObjects(ctx context.Context, trainJob *kubeflowv2.TrainJob) ([]client.Object, error) { + var trainingRuntime kubeflowv2.TrainingRuntime + err := r.client.Get(ctx, client.ObjectKey{Namespace: trainJob.Namespace, Name: trainJob.Spec.TrainingRuntimeRef.Name}, &trainingRuntime) + if err != nil { + return nil, err + } + return r.buildObjects(ctx, trainJob, trainingRuntime.Spec.Template, trainingRuntime.Spec.MLPolicy, trainingRuntime.Spec.PodGroupPolicy) +} + +func (r *TrainingRuntime) buildObjects(ctx context.Context, trainJob *kubeflowv2.TrainJob, jobSetTemplateSpec kubeflowv2.JobSetTemplateSpec, + mlPolicy *kubeflowv2.MLPolicy, podGroupPolicy *kubeflowv2.PodGroupPolicy) ([]client.Object, error) { + jobSet := runtimebuilder.NewJobSetBuilder(client.ObjectKeyFromObject(trainJob), jobSetTemplateSpec). + ContainerImage(trainJob.Spec.Trainer.Image). + Build() + jobSet, err := r.framework.RunEnforceMLPolicyPlugins(mlPolicy, jobSet) + if err != nil { + return nil, err + } + var podGroup client.Object + jobSet, podGroup, err = r.framework.RunEnforcePodGroupPolicyPlugins(ctx, client.ObjectKeyFromObject(trainJob), podGroupPolicy, jobSet) + if err != nil { + return nil, err + } + + var oldJobSet jobsetv1alpha2.JobSet + needsJobSetUpdate := false + if err = r.client.Get(ctx, client.ObjectKeyFromObject(jobSet), &oldJobSet); err != nil { + if apierrors.IsNotFound(err) { + needsJobSetUpdate = true + } + return nil, err + } else { + needsJobSetUpdate = !equality.Semantic.DeepEqual(oldJobSet.Spec, jobSet.Spec) || + !maps.Equal(oldJobSet.Labels, jobSet.Labels) || !maps.Equal(oldJobSet.Annotations, jobSet.Annotations) + } + + var objs []client.Object + if needsJobSetUpdate { + objs = append(objs, jobSet) + } + if podGroup != nil { + objs = append(objs, podGroup) + } + for _, obj := range objs { + if err = ctrlutil.SetControllerReference(trainJob, obj, r.scheme); err != nil { + return nil, err + } + } + return objs, nil +} + +func (r *TrainingRuntime) EventHandlerRegistrars() []runtime.ReconcilerBuilder { + var builders []runtime.ReconcilerBuilder + for _, ex := range r.framework.WatchExtensionPlugins() { + builders = append(builders, ex.ReconcilerBuilders()...) + } + return builders +} diff --git a/pkg/runtime.v2/framework/builder/jobset.go b/pkg/runtime.v2/framework/builder/jobset.go new file mode 100644 index 0000000000..61c333d36f --- /dev/null +++ b/pkg/runtime.v2/framework/builder/jobset.go @@ -0,0 +1,67 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "maps" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +type JobSetBuilder struct { + *jobsetv1alpha2.JobSet +} + +func NewJobSetBuilder(objectKey client.ObjectKey, jobSetTemplateSpec kubeflowv2.JobSetTemplateSpec) *JobSetBuilder { + return &JobSetBuilder{ + &jobsetv1alpha2.JobSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: jobsetv1alpha2.SchemeGroupVersion.String(), + Kind: "JobSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: objectKey.Namespace, + Name: objectKey.Name, + Labels: maps.Clone(jobSetTemplateSpec.Labels), + Annotations: maps.Clone(jobSetTemplateSpec.Annotations), + }, + Spec: *jobSetTemplateSpec.Spec.DeepCopy(), + }, + } +} + +func (b *JobSetBuilder) ContainerImage(image *string) *JobSetBuilder { + if image == nil || *image == "" { + return b + } + for i, rJob := range b.Spec.ReplicatedJobs { + for j := range rJob.Template.Spec.Template.Spec.Containers { + b.Spec.ReplicatedJobs[i].Template.Spec.Template.Spec.Containers[j].Image = *image + } + } + return b +} + +// TODO: Need to support all TrainJob field. + +func (b *JobSetBuilder) Build() *jobsetv1alpha2.JobSet { + return b.JobSet +} diff --git a/pkg/runtime.v2/framework/core/framework.go b/pkg/runtime.v2/framework/core/framework.go new file mode 100644 index 0000000000..dee642ebf3 --- /dev/null +++ b/pkg/runtime.v2/framework/core/framework.go @@ -0,0 +1,160 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/util/validation/field" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + fwkplugins "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins" +) + +type Framework struct { + pluginRegistry fwkplugins.Registry + plugins map[string]framework.Plugin + enforceMLPlugins []framework.EnforceMLPolicyPlugin + enforcePodGroupPolicyPlugins []framework.EnforcePodGroupPolicyPlugin + customValidationPlugins []framework.CustomValidationPlugin + watchExtensionPlugins []framework.WatchExtensionPlugin + podTemplateSpecPlugins []framework.PodTemplateSpecsPlugin +} + +func New(ctx context.Context, client client.Client, restMapper meta.RESTMapper, pluginRegistry fwkplugins.Registry, indexer client.FieldIndexer) (*Framework, error) { + f := &Framework{ + pluginRegistry: pluginRegistry, + } + plugins := make(map[string]framework.Plugin, len(pluginRegistry)) + + for name, factory := range pluginRegistry { + plugin, err := factory(ctx, client, restMapper, indexer) + if err != nil { + return nil, err + } + plugins[name] = plugin + if p, ok := plugin.(framework.EnforceMLPolicyPlugin); ok { + f.enforceMLPlugins = append(f.enforceMLPlugins, p) + } + if p, ok := plugin.(framework.EnforcePodGroupPolicyPlugin); ok { + f.enforcePodGroupPolicyPlugins = append(f.enforcePodGroupPolicyPlugins, p) + } + if p, ok := plugin.(framework.CustomValidationPlugin); ok { + f.customValidationPlugins = append(f.customValidationPlugins, p) + } + if p, ok := plugin.(framework.WatchExtensionPlugin); ok { + f.watchExtensionPlugins = append(f.watchExtensionPlugins, p) + } + if p, ok := plugin.(framework.PodTemplateSpecsPlugin); ok { + f.podTemplateSpecPlugins = append(f.podTemplateSpecPlugins, p) + } + } + f.plugins = plugins + return f, nil +} + +func (f *Framework) RunEnforceMLPolicyPlugins(mlPolicy *kubeflowv2.MLPolicy, jobSet *jobsetv1alpha2.JobSet) (*jobsetv1alpha2.JobSet, error) { + for _, plugin := range f.enforceMLPlugins { + var err error + jobSet, err = plugin.EnforceMLPolicy(mlPolicy, jobSet) + if err != nil { + return nil, err + } + } + return jobSet, nil +} + +func (f *Framework) RunEnforcePodGroupPolicyPlugins( + ctx context.Context, + trainJobKey client.ObjectKey, + podGroupPolicy *kubeflowv2.PodGroupPolicy, + podTemplateSpecs []corev1.PodTemplateSpec, + podResourceTotals map[string]framework.PodResourceRequestTotal, +) ([]corev1.PodTemplateSpec, client.Object, error) { + var podGroup client.Object + for _, plugin := range f.enforcePodGroupPolicyPlugins { + var err error + var pg client.Object + podTemplateSpecs, pg, err = plugin.EnforcePodGroupPolicy(ctx, trainJobKey, podGroupPolicy, podTemplateSpecs, podResourceTotals) + if err != nil { + return nil, nil, err + } + if pg != nil { + if podGroup != nil { + return nil, nil, fmt.Errorf("output PodGroup must be only one") + } + podGroup = pg + } + } + return podTemplateSpecs, podGroup, nil +} + +func (f *Framework) RunCustomValidationPlugins(oldObj, newObj client.Object) (admission.Warnings, error) { + var aggregatedWarnings admission.Warnings + var aggregatedErrors field.ErrorList + for _, plugin := range f.customValidationPlugins { + warnings, errs := plugin.Validate(oldObj, newObj) + if len(warnings) != 0 { + aggregatedWarnings = append(aggregatedWarnings, warnings...) + } + if errs != nil { + aggregatedErrors = append(aggregatedErrors, errs...) + } + } + if len(aggregatedErrors) == 0 { + return aggregatedWarnings, nil + } + return aggregatedWarnings, aggregatedErrors.ToAggregate() +} + +func (f *Framework) RunPodTemplateSpecPlugins(object client.Object) ([]corev1.PodTemplateSpec, map[string]framework.PodResourceRequestTotal, error) { + var podTemplateSpecs []corev1.PodTemplateSpec + totalResourceRequest := make(map[string]framework.PodResourceRequestTotal) + for _, plugin := range f.podTemplateSpecPlugins { + podTemplateSpec, err := plugin.PodTemplateSpecs(object) + if err != nil { + return nil, nil, err + } + // TODO: Implement a mechanism to merge multiple podTemplateSpecs to a single one. + if len(podTemplateSpec) != 0 { + if len(podTemplateSpecs) != 0 { + return nil, nil, fmt.Errorf("output PodTemplateSpec must be only one") + } + podTemplateSpecs = podTemplateSpec + } + total, err := plugin.PodResourceRequestTotals(object) + if err != nil { + return nil, nil, err + } + // TODO: Allow merging multiple resources into one. + if total != nil { + totalResourceRequest = total + } + } + return podTemplateSpecs, totalResourceRequest, nil +} + +func (f *Framework) WatchExtensionPlugins() []framework.WatchExtensionPlugin { + return f.watchExtensionPlugins +} diff --git a/pkg/runtime.v2/framework/core/framework_test.go b/pkg/runtime.v2/framework/core/framework_test.go new file mode 100644 index 0000000000..eb92a1f83f --- /dev/null +++ b/pkg/runtime.v2/framework/core/framework_test.go @@ -0,0 +1,81 @@ +package core + +import ( + "context" + fwkplugins "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "k8s.io/apimachinery/pkg/api/meta" + "sigs.k8s.io/controller-runtime/pkg/client" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + testingutil "github.com/kubeflow/training-operator/pkg/util.v2/testing" +) + +func TestRunEnforceMLPolicyPlugins(t *testing.T) { + cases := map[string]struct { + pluginRegistry fwkplugins.Registry + mlPolicy *kubeflowv2.MLPolicy + jobSet *jobsetv1alpha2.JobSet + wantJobSet *jobsetv1alpha2.JobSet + wantError error + }{} + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + clientBuilder := testingutil.NewClientBuilder() + restMapper := meta.NewDefaultRESTMapper(nil) + + fwk, err := New(ctx, clientBuilder.Build(), restMapper, tc.pluginRegistry, testingutil.AsIndex(clientBuilder)) + if err != nil { + t.Fatal(err) + } + gotJobSet, gotErr := fwk.RunEnforceMLPolicyPlugins(tc.mlPolicy, tc.jobSet) + if diff := cmp.Diff(tc.wantError, gotErr, cmpopts.EquateErrors()); len(diff) != 0 { + t.Errorf("Unexpected error (-want,+got): %s", diff) + } + if diff := cmp.Diff(tc.wantJobSet, gotJobSet); len(diff) != 0 { + t.Errorf("Unexpected JobSet (-want,+got): %s", diff) + } + }) + } +} + +func TestRunEnforcePodGroupPolicyPlugins(t *testing.T) { + cases := map[string]struct { + trainJobKey client.ObjectKey + podGroupPolicy *kubeflowv2.PodGroupPolicy + jobSet *jobsetv1alpha2.JobSet + pluginRegistry fwkplugins.Registry + wantJobSet *jobsetv1alpha2.JobSet + wantPodGroup client.Object + wantError error + }{} + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + clientBuilder := testingutil.NewClientBuilder() + restMapper := meta.NewDefaultRESTMapper(nil) + + fwk, err := New(ctx, clientBuilder.Build(), restMapper, tc.pluginRegistry, testingutil.AsIndex(clientBuilder)) + if err != nil { + t.Fatal(err) + } + gotJobSet, gotPodGroup, gotErr := fwk.RunEnforcePodGroupPolicyPlugins(ctx, tc.trainJobKey, tc.podGroupPolicy, tc.jobSet) + if diff := cmp.Diff(tc.wantError, gotErr, cmpopts.EquateErrors()); len(diff) != 0 { + t.Errorf("Unexpected error (-want,+got): %s", diff) + } + if diff := cmp.Diff(tc.wantPodGroup, gotPodGroup); len(diff) != 0 { + t.Errorf("Unexpected PodGroup (-want,+got): %s", diff) + } + if diff := cmp.Diff(tc.wantJobSet, gotJobSet); len(diff) != 0 { + t.Errorf("Unexpected JobSet (-want,+got): %s", diff) + } + }) + } +} diff --git a/pkg/runtime.v2/framework/interface.go b/pkg/runtime.v2/framework/interface.go new file mode 100644 index 0000000000..bcf9fb45a8 --- /dev/null +++ b/pkg/runtime.v2/framework/interface.go @@ -0,0 +1,66 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/validation/field" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +type Plugin interface { + Name() string +} + +type WatchExtensionPlugin interface { + Plugin + ReconcilerBuilders() []runtime.ReconcilerBuilder +} + +type EnforcePodGroupPolicyPlugin interface { + Plugin + EnforcePodGroupPolicy(ctx context.Context, podGroupKey client.ObjectKey, pgPolicy *kubeflowv2.PodGroupPolicy, podTemplateSpec []corev1.PodTemplateSpec, + podResourceTotal map[string]PodResourceRequestTotal) ([]corev1.PodTemplateSpec, client.Object, error) +} + +type EnforceMLPolicyPlugin interface { + Plugin + EnforceMLPolicy(mlPolicy *kubeflowv2.MLPolicy, jobSet *jobsetv1alpha2.JobSet) (*jobsetv1alpha2.JobSet, error) +} + +type CustomValidationPlugin interface { + Plugin + Validate(oldObj, newObj client.Object) (admission.Warnings, field.ErrorList) +} + +type PodTemplateSpecsPlugin interface { + Plugin + PodTemplateSpecs(object client.Object) ([]corev1.PodTemplateSpec, error) + PodResourceRequestTotals(object client.Object) (map[string]PodResourceRequestTotal, error) +} + +type PodResourceRequestTotal struct { + Replicas int32 + PodResourceRequests corev1.ResourceList +} diff --git a/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling.go b/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling.go new file mode 100644 index 0000000000..9bc3b81c4d --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling.go @@ -0,0 +1,302 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coscheduling + +import ( + "context" + "fmt" + "maps" + + corev1 "k8s.io/api/core/v1" + nodev1 "k8s.io/api/node/v1" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + runtimeindexer "github.com/kubeflow/training-operator/pkg/runtime.v2/indexer" +) + +type CoScheduling struct { + client client.Client + restMapper meta.RESTMapper +} + +var _ framework.EnforcePodGroupPolicyPlugin = (*CoScheduling)(nil) +var _ framework.WatchExtensionPlugin = (*CoScheduling)(nil) + +const Name = "CoScheduling" + +func New(ctx context.Context, client client.Client, restMapper meta.RESTMapper, indexer client.FieldIndexer) (framework.Plugin, error) { + if err := indexer.IndexField(ctx, &kubeflowv2.TrainingRuntime{}, TrainingRuntimeContainerRuntimeClassKey, + IndexTrainingRuntimeContainerRuntimeClass); err != nil { + return nil, fmt.Errorf("setting index on runtimeClass for TrainingRuntime: %w", err) + } + if err := indexer.IndexField(ctx, &kubeflowv2.ClusterTrainingRuntime{}, ClusterTrainingRuntimeContainerRuntimeClassKey, + IndexClusterTrainingRuntimeContainerRuntimeClass); err != nil { + return nil, fmt.Errorf("setting index on runtimeClass for ClusterTrainingRuntime: %w", err) + } + return &CoScheduling{ + client: client, + restMapper: restMapper, + }, nil +} + +func (c *CoScheduling) Name() string { + return Name +} + +func (c *CoScheduling) EnforcePodGroupPolicy( + ctx context.Context, + podGroupKey client.ObjectKey, + pgPolicy *kubeflowv2.PodGroupPolicy, + podTemplatesSpecs []corev1.PodTemplateSpec, + podResourceTotal map[string]framework.PodResourceRequestTotal, +) ([]corev1.PodTemplateSpec, client.Object, error) { + if pgPolicy == nil || pgPolicy.Coscheduling == nil || podTemplatesSpecs == nil { + return podTemplatesSpecs, nil, nil + } + + for _, podTemplateSpec := range podTemplatesSpecs { + if podTemplateSpec.Labels == nil { + podTemplateSpec.Labels = make(map[string]string, 1) + } + podTemplateSpec.Labels[schedulerpluginsv1alpha1.PodGroupLabel] = podGroupKey.Namespace + } + newPG := newPodGroup(podGroupKey, pgPolicy, podResourceTotal) + var oldPG schedulerpluginsv1alpha1.PodGroup + needsUpdate := false + if err := c.client.Get(ctx, client.ObjectKeyFromObject(newPG), &oldPG); err != nil { + if !apierrors.IsNotFound(err) { + return nil, nil, err + } + needsUpdate = true + } else { + needsUpdate = !equality.Semantic.DeepEqual(newPG.Spec, oldPG.Spec) || + !maps.Equal(newPG.Labels, oldPG.Labels) || !maps.Equal(newPG.Annotations, oldPG.Annotations) + } + if needsUpdate { + return podTemplatesSpecs, newPG, nil + } + return podTemplatesSpecs, nil, nil +} + +func newPodGroup( + podGroupKey client.ObjectKey, + policy *kubeflowv2.PodGroupPolicy, + podResourceTotal map[string]framework.PodResourceRequestTotal, +) *schedulerpluginsv1alpha1.PodGroup { + var totalMembers int32 + totalResources := make(corev1.ResourceList) + for _, rJobResource := range podResourceTotal { + totalMembers += rJobResource.Replicas + for resName, quantity := range rJobResource.PodResourceRequests { + quantity.Mul(int64(rJobResource.Replicas)) + current := totalResources[resName] + current.Add(quantity) + totalResources[resName] = current + } + } + return &schedulerpluginsv1alpha1.PodGroup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: schedulerpluginsv1alpha1.SchemeGroupVersion.String(), + Kind: "PodGroup", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: podGroupKey.Name, + Namespace: podGroupKey.Namespace, + }, + Spec: schedulerpluginsv1alpha1.PodGroupSpec{ + ScheduleTimeoutSeconds: policy.Coscheduling.ScheduleTimeoutSeconds, + MinMember: totalMembers, + MinResources: totalResources, + }, + } +} + +type PodGroupRuntimeClassHandler struct { + client client.Client +} + +var _ handler.EventHandler = (*PodGroupRuntimeClassHandler)(nil) + +func (h *PodGroupRuntimeClassHandler) Create(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { + containerRuntimeClass, ok := e.Object.(*nodev1.RuntimeClass) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("runtimeClass", klog.KObj(containerRuntimeClass)) + if err := h.queueSuspendedTrainJob(ctx, containerRuntimeClass, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupRuntimeClassHandler) Update(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { + _, ok := e.ObjectOld.(*nodev1.RuntimeClass) + if !ok { + return + } + newContainerRuntimeClass, ok := e.ObjectNew.(*nodev1.RuntimeClass) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("runtimeClass", klog.KObj(newContainerRuntimeClass)) + if err := h.queueSuspendedTrainJob(ctx, newContainerRuntimeClass, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupRuntimeClassHandler) Delete(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { + containerRuntimeClass, ok := e.Object.(*nodev1.RuntimeClass) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("runtimeClass", klog.KObj(containerRuntimeClass)) + if err := h.queueSuspendedTrainJob(ctx, containerRuntimeClass, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupRuntimeClassHandler) Generic(context.Context, event.GenericEvent, workqueue.RateLimitingInterface) { +} + +func (h *PodGroupRuntimeClassHandler) queueSuspendedTrainJob(ctx context.Context, runtimeClass *nodev1.RuntimeClass, q workqueue.RateLimitingInterface) error { + var trainingRuntimes kubeflowv2.TrainingRuntimeList + if err := h.client.List(ctx, &trainingRuntimes, client.MatchingFields{TrainingRuntimeContainerRuntimeClassKey: runtimeClass.Name}); err != nil { + return err + } + var clusterTrainingRuntimes kubeflowv2.ClusterTrainingRuntimeList + if err := h.client.List(ctx, &clusterTrainingRuntimes, client.MatchingFields{ClusterTrainingRuntimeContainerRuntimeClassKey: runtimeClass.Name}); err != nil { + return err + } + + var runtimeNames []string + for _, trainingRuntime := range trainingRuntimes.Items { + runtimeNames = append(runtimeNames, trainingRuntime.Name) + } + for _, clusterTrainingRuntime := range clusterTrainingRuntimes.Items { + runtimeNames = append(runtimeNames, clusterTrainingRuntime.Name) + } + for _, runtimeName := range runtimeNames { + var trainJobs kubeflowv2.TrainJobList + if err := h.client.List(ctx, &trainJobs, client.MatchingFields{runtimeindexer.TrainJobTrainingRuntimeRefKey: runtimeName}); err != nil { + return err + } + for _, trainJob := range trainJobs.Items { + if ptr.Deref(trainJob.Spec.Suspend, false) { + q.Add(client.ObjectKeyFromObject(&trainJob)) + } + } + } + return nil +} + +type PodGroupLimitRangeHandler struct { + client client.Client +} + +var _ handler.EventHandler = (*PodGroupLimitRangeHandler)(nil) + +func (h *PodGroupLimitRangeHandler) Create(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { + limitRange, ok := e.Object.(*corev1.LimitRange) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("limitRange", klog.KObj(limitRange)) + if err := h.queueSuspendedTrainJob(ctx, limitRange.Namespace, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupLimitRangeHandler) Update(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { + _, ok := e.ObjectOld.(*corev1.LimitRange) + if !ok { + return + } + newLimitRange, ok := e.ObjectNew.(*corev1.LimitRange) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("limitRange", klog.KObj(newLimitRange)) + if err := h.queueSuspendedTrainJob(ctx, newLimitRange.Namespace, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupLimitRangeHandler) Delete(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { + limitRange, ok := e.Object.(*corev1.LimitRange) + if !ok { + return + } + log := ctrl.LoggerFrom(ctx).WithValues("limitRange", klog.KObj(limitRange)) + if err := h.queueSuspendedTrainJob(ctx, limitRange.Namespace, q); err != nil { + log.Error(err, "could not queue suspended TrainJob to reconcile queue") + } +} + +func (h *PodGroupLimitRangeHandler) Generic(context.Context, event.GenericEvent, workqueue.RateLimitingInterface) { +} + +func (h *PodGroupLimitRangeHandler) queueSuspendedTrainJob(ctx context.Context, ns string, q workqueue.RateLimitingInterface) error { + var trainJobs kubeflowv2.TrainJobList + if err := h.client.List(ctx, &trainJobs, client.InNamespace(ns)); err != nil { + return err + } + for _, trainJob := range trainJobs.Items { + if ptr.Deref(trainJob.Spec.Suspend, false) { + q.Add(client.ObjectKeyFromObject(&trainJob)) + } + } + return nil +} + +func (c *CoScheduling) ReconcilerBuilders() []runtime.ReconcilerBuilder { + if _, err := c.restMapper.RESTMapping( + schema.GroupKind{Group: schedulerpluginsv1alpha1.SchemeGroupVersion.Group, Kind: "PodGroup"}, + schedulerpluginsv1alpha1.SchemeGroupVersion.Version, + ); err != nil { + return nil + } + return []runtime.ReconcilerBuilder{ + func(b *builder.Builder, c client.Client) *builder.Builder { + return b.Owns(&schedulerpluginsv1alpha1.PodGroup{}) + }, + func(b *builder.Builder, c client.Client) *builder.Builder { + return b.Watches(&corev1.LimitRange{}, &PodGroupLimitRangeHandler{ + client: c, + }) + }, + func(b *builder.Builder, c client.Client) *builder.Builder { + return b.Watches(&nodev1.RuntimeClass{}, &PodGroupRuntimeClassHandler{ + client: c, + }) + }, + } +} diff --git a/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling_test.go b/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling_test.go new file mode 100644 index 0000000000..9e56da09ae --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/coscheduling/coscheduling_test.go @@ -0,0 +1 @@ +package coscheduling diff --git a/pkg/runtime.v2/framework/plugins/coscheduling/indexer.go b/pkg/runtime.v2/framework/plugins/coscheduling/indexer.go new file mode 100644 index 0000000000..723f3fa708 --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/coscheduling/indexer.go @@ -0,0 +1,56 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coscheduling + +import ( + "sigs.k8s.io/controller-runtime/pkg/client" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +const ( + TrainingRuntimeContainerRuntimeClassKey = ".trainingRuntimeSpec.jobSetTemplateSpec.replicatedJobs.podTemplateSpec.runtimeClassName" + ClusterTrainingRuntimeContainerRuntimeClassKey = ".clusterTrainingRuntimeSpec.jobSetTemplateSpec.replicatedJobs.podTemplateSpec.runtimeClassName" +) + +func IndexTrainingRuntimeContainerRuntimeClass(obj client.Object) []string { + runtime, ok := obj.(*kubeflowv2.TrainingRuntime) + if !ok { + return nil + } + var runtimeClasses []string + for _, rJob := range runtime.Spec.Template.Spec.ReplicatedJobs { + if rJob.Template.Spec.Template.Spec.RuntimeClassName != nil { + runtimeClasses = append(runtimeClasses, *rJob.Template.Spec.Template.Spec.RuntimeClassName) + } + } + return runtimeClasses +} + +func IndexClusterTrainingRuntimeContainerRuntimeClass(obj client.Object) []string { + clRuntime, ok := obj.(*kubeflowv2.ClusterTrainingRuntime) + if !ok { + return nil + } + var runtimeClasses []string + for _, rJob := range clRuntime.Spec.Template.Spec.ReplicatedJobs { + if rJob.Template.Spec.Template.Spec.RuntimeClassName != nil { + runtimeClasses = append(runtimeClasses, *rJob.Template.Spec.Template.Spec.RuntimeClassName) + } + } + return runtimeClasses +} diff --git a/pkg/runtime.v2/framework/plugins/jobset/jobset.go b/pkg/runtime.v2/framework/plugins/jobset/jobset.go new file mode 100644 index 0000000000..dff48d3c77 --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/jobset/jobset.go @@ -0,0 +1,78 @@ +package jobset + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + kueuelr "sigs.k8s.io/kueue/pkg/util/limitrange" + + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" +) + +type JobSet struct { + restMapper meta.RESTMapper +} + +var _ framework.WatchExtensionPlugin = (*JobSet)(nil) +var _ framework.PodTemplateSpecsPlugin = (*JobSet)(nil) + +const Name = "JobSet" + +func New(_ context.Context, _ client.Client, restMapper meta.RESTMapper, _ client.FieldIndexer) (framework.Plugin, error) { + return &JobSet{ + restMapper: restMapper, + }, nil +} + +func (j *JobSet) Name() string { + return Name +} + +func (j *JobSet) PodTemplateSpecs(object client.Object) ([]corev1.PodTemplateSpec, error) { + jobSet, ok := object.(*jobsetv1alpha2.JobSet) + if !ok { + return nil, nil + } + var podTemplateSpecs []corev1.PodTemplateSpec + for _, rJob := range jobSet.Spec.ReplicatedJobs { + podTemplateSpecs = append(podTemplateSpecs, rJob.Template.Spec.Template) + } + return podTemplateSpecs, nil +} + +func (j *JobSet) PodResourceRequestTotals(object client.Object) (map[string]framework.PodResourceRequestTotal, error) { + jobSet, ok := object.(*jobsetv1alpha2.JobSet) + if !ok { + return nil, nil + } + podResourceTotals := make(map[string]framework.PodResourceRequestTotal, len(jobSet.Spec.ReplicatedJobs)) + for _, rJob := range jobSet.Spec.ReplicatedJobs { + podResourceTotals[rJob.Name] = framework.PodResourceRequestTotal{ + Replicas: rJob.Replicas * ptr.Deref(rJob.Template.Spec.Completions, 1), + // TODO: Need to address LimitRange and RuntimeClass. + PodResourceRequests: kueuelr.TotalRequests(&rJob.Template.Spec.Template.Spec), + } + } + return podResourceTotals, nil +} + +func (j *JobSet) ReconcilerBuilders() []runtime.ReconcilerBuilder { + if _, err := j.restMapper.RESTMapping( + schema.GroupKind{Group: jobsetv1alpha2.GroupVersion.Group, Kind: "JobSet"}, + jobsetv1alpha2.SchemeGroupVersion.Version, + ); err != nil { + return nil + } + return []runtime.ReconcilerBuilder{ + func(b *builder.Builder, c client.Client) *builder.Builder { + return b.Owns(&jobsetv1alpha2.JobSet{}) + }, + } +} diff --git a/pkg/runtime.v2/framework/plugins/mpi/mpi.go b/pkg/runtime.v2/framework/plugins/mpi/mpi.go new file mode 100644 index 0000000000..c360877ebc --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/mpi/mpi.go @@ -0,0 +1,62 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mpi + +import ( + "context" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + "k8s.io/apimachinery/pkg/util/validation/field" + + "k8s.io/apimachinery/pkg/api/meta" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +type MPI struct { + client client.Client +} + +var _ framework.EnforceMLPolicyPlugin = (*MPI)(nil) +var _ framework.CustomValidationPlugin = (*MPI)(nil) + +const Name = "MPI" + +func New(_ context.Context, client client.Client, _ meta.RESTMapper, _ client.FieldIndexer) (framework.Plugin, error) { + return &MPI{ + client: client, + }, nil +} + +func (m *MPI) Name() string { + return Name +} + +func (m *MPI) EnforceMLPolicy(mlPolicy *kubeflowv2.MLPolicy, jobSet *jobsetv1alpha2.JobSet) (*jobsetv1alpha2.JobSet, error) { + if mlPolicy == nil || jobSet == nil || mlPolicy.MPI == nil { + return jobSet, nil + } + // TODO: Need to implement main logic. + return nil, nil +} + +// TODO: Need to implement validations for MPIJob. +func (m *MPI) Validate(oldObj, newObj client.Object) (admission.Warnings, field.ErrorList) { + return nil, nil +} diff --git a/pkg/runtime.v2/framework/plugins/plainml/plainml.go b/pkg/runtime.v2/framework/plugins/plainml/plainml.go new file mode 100644 index 0000000000..325a584534 --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/plainml/plainml.go @@ -0,0 +1,58 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package plainml + +import ( + "context" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + + batchv1 "k8s.io/api/batch/v1" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +var _ framework.EnforceMLPolicyPlugin = (*PlainML)(nil) + +type PlainML struct{} + +const Name = "PlainML" + +func New(context.Context, client.Client, meta.RESTMapper, client.FieldIndexer) (framework.Plugin, error) { + return &PlainML{}, nil +} + +func (p *PlainML) Name() string { + return Name +} + +func (p *PlainML) EnforceMLPolicy(mlPolicy *kubeflowv2.MLPolicy, jobSet *jobsetv1alpha2.JobSet) (*jobsetv1alpha2.JobSet, error) { + if mlPolicy == nil || jobSet == nil || mlPolicy.Torch != nil || mlPolicy.MPI != nil { + return jobSet, nil + } + numNodes := ptr.Deref(mlPolicy.NumNodes, 1) + for i := range jobSet.Spec.ReplicatedJobs { + jobSet.Spec.ReplicatedJobs[i].Replicas = 1 + jobSet.Spec.ReplicatedJobs[i].Template.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion) + jobSet.Spec.ReplicatedJobs[i].Template.Spec.Completions = &numNodes + jobSet.Spec.ReplicatedJobs[i].Template.Spec.Parallelism = &numNodes + } + return jobSet, nil +} diff --git a/pkg/runtime.v2/framework/plugins/plainml/plainml_test.go b/pkg/runtime.v2/framework/plugins/plainml/plainml_test.go new file mode 100644 index 0000000000..3c1762ed77 --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/plainml/plainml_test.go @@ -0,0 +1 @@ +package plainml diff --git a/pkg/runtime.v2/framework/plugins/registry.go b/pkg/runtime.v2/framework/plugins/registry.go new file mode 100644 index 0000000000..7d7b08b59d --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/registry.go @@ -0,0 +1,43 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package plugins + +import ( + "context" + + "k8s.io/apimachinery/pkg/api/meta" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins/coscheduling" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins/jobset" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins/mpi" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins/plainml" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/plugins/torch" +) + +type Registry map[string]func(ctx context.Context, client client.Client, restMapper meta.RESTMapper, indexer client.FieldIndexer) (framework.Plugin, error) + +func NewRegistry() Registry { + return Registry{ + coscheduling.Name: coscheduling.New, + mpi.Name: mpi.New, + plainml.Name: plainml.New, + torch.Name: torch.New, + jobset.Name: jobset.New, + } +} diff --git a/pkg/runtime.v2/framework/plugins/torch/torch.go b/pkg/runtime.v2/framework/plugins/torch/torch.go new file mode 100644 index 0000000000..23e52120a8 --- /dev/null +++ b/pkg/runtime.v2/framework/plugins/torch/torch.go @@ -0,0 +1,58 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package torch + +import ( + "context" + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/util/validation/field" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +type Torch struct{} + +var _ framework.EnforceMLPolicyPlugin = (*Torch)(nil) +var _ framework.CustomValidationPlugin = (*Torch)(nil) + +const Name = "Torch" + +func New(context.Context, client.Client, meta.RESTMapper, client.FieldIndexer) (framework.Plugin, error) { + return &Torch{}, nil +} + +func (t *Torch) Name() string { + return Name +} + +func (t *Torch) EnforceMLPolicy(mlPolicy *kubeflowv2.MLPolicy, jobSet *jobsetv1alpha2.JobSet) (*jobsetv1alpha2.JobSet, error) { + if mlPolicy == nil || jobSet == nil || mlPolicy.Torch == nil { + return jobSet, nil + } + // TODO: Need to implement main logic. + return nil, nil +} + +// TODO: Need to implement validateions for TorchJob. +func (t *Torch) Validate(oldObj, newObj client.Object) (admission.Warnings, field.ErrorList) { + return nil, nil +} diff --git a/pkg/runtime.v2/indexer/indexer.go b/pkg/runtime.v2/indexer/indexer.go new file mode 100644 index 0000000000..9ba2c057f7 --- /dev/null +++ b/pkg/runtime.v2/indexer/indexer.go @@ -0,0 +1,45 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package indexer + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +const ( + TrainJobTrainingRuntimeRefKey = ".spec.trainingRuntimeRef" +) + +func IndexTrainJobTrainingRuntimes(obj client.Object) []string { + trainJob, ok := obj.(*kubeflowv2.TrainJob) + if !ok { + return nil + } + runtimeRefGroupKind := schema.GroupKind{ + Group: ptr.Deref(trainJob.Spec.TrainingRuntimeRef.APIGroup, ""), + Kind: ptr.Deref(trainJob.Spec.TrainingRuntimeRef.Kind, ""), + } + if runtimeRefGroupKind.Group == kubeflowv2.GroupVersion.Group && + (runtimeRefGroupKind.Kind == "TrainingRuntime" || runtimeRefGroupKind.Kind == "ClusterTrainingRuntime") { + return []string{trainJob.Spec.TrainingRuntimeRef.Name} + } + return nil +} diff --git a/pkg/runtime.v2/interface.go b/pkg/runtime.v2/interface.go new file mode 100644 index 0000000000..d7b84e3f46 --- /dev/null +++ b/pkg/runtime.v2/interface.go @@ -0,0 +1,33 @@ +/* +Copyright 2024 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtimev2 + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +type ReconcilerBuilder func(*builder.Builder, client.Client) *builder.Builder + +type Runtime interface { + NewObjects(ctx context.Context, trainJob *kubeflowv2.TrainJob) ([]client.Object, error) + EventHandlerRegistrars() []ReconcilerBuilder +} diff --git a/pkg/util.v2/testing/client.go b/pkg/util.v2/testing/client.go new file mode 100644 index 0000000000..51f450c69a --- /dev/null +++ b/pkg/util.v2/testing/client.go @@ -0,0 +1,43 @@ +package testing + +import ( + "context" + collectionsidxer "github.com/kubeflow/training-operator/pkg/runtime.v2/indexer" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +func NewClientBuilder(addToSchemes ...func(s *runtime.Scheme) error) *fake.ClientBuilder { + scm := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scm)) + utilruntime.Must(kubeflowv2.AddToScheme(scm)) + utilruntime.Must(jobsetv1alpha2.AddToScheme(scm)) + for i := range addToSchemes { + utilruntime.Must(addToSchemes[i](scm)) + } + return fake.NewClientBuilder(). + WithScheme(scm). + WithIndex(&kubeflowv2.TrainJob{}, collectionsidxer.TrainJobTrainingRuntimeRefKey, collectionsidxer.IndexTrainJobTrainingRuntimes) +} + +type builderIndexer struct { + *fake.ClientBuilder +} + +var _ client.FieldIndexer = (*builderIndexer)(nil) + +func (b *builderIndexer) IndexField(_ context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error { + b.ClientBuilder = b.ClientBuilder.WithIndex(obj, field, extractValue) + return nil +} + +func AsIndex(builder *fake.ClientBuilder) client.FieldIndexer { + return &builderIndexer{ClientBuilder: builder} +} diff --git a/pkg/util.v2/testing/wrapper.go b/pkg/util.v2/testing/wrapper.go new file mode 100644 index 0000000000..0938e8806f --- /dev/null +++ b/pkg/util.v2/testing/wrapper.go @@ -0,0 +1,104 @@ +package testing + +import ( + "github.com/kubeflow/training-operator/pkg/runtime.v2/framework/builder" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + jobsetv1alpha2 "sigs.k8s.io/jobset/api/jobset/v1alpha2" + + kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" +) + +type JobSetWrapper struct { + *builder.JobSetBuilder +} + +func MakeJobSetWrapper(t *testing.T, namespace, name string) *JobSetWrapper { + t.Helper() + jobSetTemplateSpec := kubeflowv2.JobSetTemplateSpec{ + Spec: jobsetv1alpha2.JobSetSpec{ + ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{}, + }, + } + return &JobSetWrapper{ + JobSetBuilder: builder.NewJobSetBuilder(client.ObjectKey{ + Namespace: namespace, + Name: name, + }, jobSetTemplateSpec), + } +} + +type TrainJobWrapper struct { + *kubeflowv2.TrainJob +} + +func MakeTrainJobWrapper(t *testing.T, namespace, name string) *TrainJobWrapper { + t.Helper() + return &TrainJobWrapper{ + TrainJob: &kubeflowv2.TrainJob{ + TypeMeta: metav1.TypeMeta{ + APIVersion: kubeflowv2.SchemeGroupVersion.Version, + Kind: "TrainJob", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: kubeflowv2.TrainJobSpec{}, + }, + } +} + +type ClusterTrainingRuntimeWrapper struct { + *kubeflowv2.ClusterTrainingRuntime +} + +func MakeTrainingRuntimeWrapper(t *testing.T, namespace, name string) *TrainingRuntimeWrapper { + t.Helper() + return &TrainingRuntimeWrapper{ + TrainingRuntime: &kubeflowv2.TrainingRuntime{ + TypeMeta: metav1.TypeMeta{ + APIVersion: kubeflowv2.SchemeGroupVersion.String(), + Kind: "TrainingRuntime", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: kubeflowv2.TrainingRuntimeSpec{}, + }, + } +} + +type TrainingRuntimeWrapper struct { + *kubeflowv2.TrainingRuntime +} + +func MakeClusterTrainingRuntimeWrapper(t *testing.T, name string) *ClusterTrainingRuntimeWrapper { + t.Helper() + return &ClusterTrainingRuntimeWrapper{ + ClusterTrainingRuntime: &kubeflowv2.ClusterTrainingRuntime{ + TypeMeta: metav1.TypeMeta{ + APIVersion: kubeflowv2.SchemeGroupVersion.String(), + Kind: "ClusterTrainingRuntime", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: kubeflowv2.TrainingRuntimeSpec{}, + }, + } +} + +type TrainingRuntimeSpecWrapper struct { + *kubeflowv2.TrainingRuntimeSpec +} + +func MakeTrainingRuntimeSpecWrapper(t *testing.T, spec kubeflowv2.TrainingRuntimeSpec) *TrainingRuntimeSpecWrapper { + t.Helper() + return &TrainingRuntimeSpecWrapper{ + TrainingRuntimeSpec: &spec, + } +} diff --git a/pkg/webhook.v2/webhook.go b/pkg/webhook.v2/webhook.go index 3d4970ef45..886fa8c65a 100644 --- a/pkg/webhook.v2/webhook.go +++ b/pkg/webhook.v2/webhook.go @@ -16,8 +16,12 @@ limitations under the License. package webhookv2 -import ctrl "sigs.k8s.io/controller-runtime" +import ( + ctrl "sigs.k8s.io/controller-runtime" -func Setup(ctrl.Manager) (string, error) { + runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" +) + +func Setup(ctrl.Manager, map[string]runtime.Runtime) (string, error) { return "", nil } diff --git a/test/integration/framework/framework.go b/test/integration/framework/framework.go index 97d15246dd..a0ea44f83c 100644 --- a/test/integration/framework/framework.go +++ b/test/integration/framework/framework.go @@ -18,6 +18,7 @@ package framework import ( "context" + runtimecore "github.com/kubeflow/training-operator/pkg/runtime.v2/core" "path/filepath" "github.com/onsi/ginkgo/v2" @@ -73,8 +74,13 @@ func (f *Framework) RunManager(cfg *rest.Config) (context.Context, client.Client }) gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred(), "failed to create manager") - failedCtrlName, err := controllerv2.SetupControllers(mgr) + runtimes, err := runtimecore.New(ctx, mgr.GetClient(), mgr.GetRESTMapper(), mgr.GetFieldIndexer(), mgr.GetScheme()) + gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred()) + gomega.ExpectWithOffset(1, runtimes).NotTo(gomega.BeNil()) + + failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes) gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred(), "controller", failedCtrlName) + gomega.ExpectWithOffset(1, failedCtrlName).To(gomega.BeEmpty()) go func() { defer ginkgo.GinkgoRecover()