From 9edc66685ccdbe3b3d2952df7768847950356891 Mon Sep 17 00:00:00 2001 From: Jeff Ohrstrom Date: Fri, 9 Aug 2024 11:29:50 -0400 Subject: [PATCH 1/3] add support for specifying cores for the adapters that can support it --- lib/ood_core/job/adapters/lsf/helper.rb | 1 + lib/ood_core/job/adapters/pbspro.rb | 8 ++++++++ lib/ood_core/job/adapters/slurm.rb | 1 + lib/ood_core/job/adapters/torque.rb | 8 ++++++++ lib/ood_core/job/script.rb | 11 +++++++++-- 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/lib/ood_core/job/adapters/lsf/helper.rb b/lib/ood_core/job/adapters/lsf/helper.rb index 0c74a95c7..7c368c3de 100644 --- a/lib/ood_core/job/adapters/lsf/helper.rb +++ b/lib/ood_core/job/adapters/lsf/helper.rb @@ -91,6 +91,7 @@ def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil? args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil? args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil? + args.concat ['-n', script.cores] unless script.cores.nil? # environment env = script.job_environment || {} diff --git a/lib/ood_core/job/adapters/pbspro.rb b/lib/ood_core/job/adapters/pbspro.rb index 3e57e6399..f51325675 100644 --- a/lib/ood_core/job/adapters/pbspro.rb +++ b/lib/ood_core/job/adapters/pbspro.rb @@ -269,6 +269,7 @@ def submit(script, after: [], afterok: [], afternotok: [], afterany: []) args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil? args.concat ["-A", script.accounting_id] unless script.accounting_id.nil? args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil? + ars.concat ppn(script) # Set dependencies depend = [] @@ -422,6 +423,13 @@ def directive_prefix '#PBS' end + # place holder for when we support both nodes and cpus. + def ppn(script) + return [] if script.cores.nil? + + ['-l', "ncpus=#{script.cpus}"] + end + private # Convert duration to seconds def duration_in_seconds(time) diff --git a/lib/ood_core/job/adapters/slurm.rb b/lib/ood_core/job/adapters/slurm.rb index 72f111210..a927f269b 100644 --- a/lib/ood_core/job/adapters/slurm.rb +++ b/lib/ood_core/job/adapters/slurm.rb @@ -526,6 +526,7 @@ def submit(script, after: [], afterok: [], afternotok: [], afterany: []) args.concat ['-a', script.job_array_request] unless script.job_array_request.nil? args.concat ['--qos', script.qos] unless script.qos.nil? args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil? + args.concat ['-n', script.cores] unless script.cores.nil? # ignore nodes, don't know how to do this for slurm # Set dependencies diff --git a/lib/ood_core/job/adapters/torque.rb b/lib/ood_core/job/adapters/torque.rb index f018ab111..e1b3e3346 100644 --- a/lib/ood_core/job/adapters/torque.rb +++ b/lib/ood_core/job/adapters/torque.rb @@ -160,6 +160,7 @@ def submit(script, after: [], afterok: [], afternotok: [], afterany: []) args.concat ['-t', script.job_array_request] unless script.job_array_request.nil? args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil? args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil? + args.concat ppn(script) # Set environment variables env = script.job_environment.to_h @@ -302,6 +303,13 @@ def directive_prefix '#QSUB' end + # place holder for when we support both nodes and cpus. + def ppn(script) + return [] if script.cores.nil? + + ['-l', "procs=#{script.cpus}"] + end + private # Convert duration to seconds def duration_in_seconds(time) diff --git a/lib/ood_core/job/script.rb b/lib/ood_core/job/script.rb index 1df265659..d0f961074 100644 --- a/lib/ood_core/job/script.rb +++ b/lib/ood_core/job/script.rb @@ -108,6 +108,10 @@ class Script # @return [Integer, nil] gpus per node attr_reader :gpus_per_node + # The core request for this job + # @return [Integer, nil] cores + attr_reader :cores + # Object detailing any native specifications that are implementation specific # @note Should not be used at all costs. # @return [Object, nil] native specifications @@ -151,7 +155,8 @@ def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil, output_path: nil, error_path: nil, reservation_id: nil, queue_name: nil, priority: nil, start_time: nil, wall_time: nil, accounting_id: nil, job_array_request: nil, - qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_) + qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, + cores: nil, **_) @content = content.to_s @submit_as_hold = submit_as_hold @@ -179,6 +184,7 @@ def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil, @gpus_per_node = gpus_per_node && gpus_per_node.to_i @native = native @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment + @cores = cores&.to_i end # Convert object to hash @@ -209,7 +215,8 @@ def to_h qos: qos, gpus_per_node: gpus_per_node, native: native, - copy_environment: copy_environment + cores: cores, + copy_environment: copy_environment, } end From b8981aab575fb313c6c2743c56116df755a4d305 Mon Sep 17 00:00:00 2001 From: Jeff Ohrstrom Date: Fri, 9 Aug 2024 11:37:39 -0400 Subject: [PATCH 2/3] correct this variable name --- lib/ood_core/job/adapters/pbspro.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ood_core/job/adapters/pbspro.rb b/lib/ood_core/job/adapters/pbspro.rb index f51325675..0fd780334 100644 --- a/lib/ood_core/job/adapters/pbspro.rb +++ b/lib/ood_core/job/adapters/pbspro.rb @@ -269,7 +269,7 @@ def submit(script, after: [], afterok: [], afternotok: [], afterany: []) args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil? args.concat ["-A", script.accounting_id] unless script.accounting_id.nil? args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil? - ars.concat ppn(script) + args.concat ppn(script) # Set dependencies depend = [] From 1f2b8ae1b46d7b4d8785abf61b80e06b92329b0a Mon Sep 17 00:00:00 2001 From: Jeff Ohrstrom Date: Fri, 6 Sep 2024 10:10:47 -0400 Subject: [PATCH 3/3] fujitsu support --- lib/ood_core/job/adapters/fujitsu_tcs.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ood_core/job/adapters/fujitsu_tcs.rb b/lib/ood_core/job/adapters/fujitsu_tcs.rb index eeccfb915..b4dbf230d 100644 --- a/lib/ood_core/job/adapters/fujitsu_tcs.rb +++ b/lib/ood_core/job/adapters/fujitsu_tcs.rb @@ -233,6 +233,7 @@ def submit(script, after: [], afterok: [], afternotok: [], afterany: []) args.concat ["-N", script.job_name] unless script.job_name.nil? args.concat ["-o", script.output_path] unless script.output_path.nil? + args.concat ['--mpi', "proc=#{script.cores}"] unless script.cores.nil? if script.error_path.nil? args.concat ["-j"] else