From d47881f586b3b2841b51ee74dd408f6fbd3e2480 Mon Sep 17 00:00:00 2001 From: James Cave Date: Wed, 16 Oct 2024 11:25:36 +0100 Subject: [PATCH] v2.2.1 --- README.md | 2 +- communicatingJobWrapper.sh | 17 +++++++++++------ communicatingJobWrapperSmpd.sh | 15 ++++++++++----- communicatingSubmitFcn.m | 8 ++++++-- getJobStateFcn.m | 14 ++++++++------ independentJobWrapper.sh | 2 +- independentSubmitFcn.m | 3 ++- private/createEnvironmentWrapper.m | 4 ---- private/createSubmitScript.m | 2 +- private/getRemoteConnection.m | 27 ++++++++++++++++++++------- 10 files changed, 60 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index a56819c..da31937 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ By modifying the plugins, you can add support for your own custom `AdditionalPro To manage work on the cluster, MATLAB calls the LSF command line utilities. For example, the `bsub` command to submit work and `bjobs` to query the state of submitted jobs. If your MATLAB session is running on a machine with the scheduler utilities available, the plugin scripts can call the utilities on the command line. -Scheduler utilties are typically available if your MATLAB session is running on the LSF cluster to which you want to submit. +Scheduler utilities are typically available if your MATLAB session is running on the LSF cluster to which you want to submit. If MATLAB cannot directly access the scheduler utilities on the command line, the plugin scripts create an SSH session to the cluster and run scheduler commands over that connection. To configure your cluster to submit scheduler commands via SSH, set the `ClusterHost` field of `AdditionalProperties` to the name of the cluster node to which MATLAB connects via SSH. diff --git a/communicatingJobWrapper.sh b/communicatingJobWrapper.sh index 9ceed9d..5f1a58a 100755 --- a/communicatingJobWrapper.sh +++ b/communicatingJobWrapper.sh @@ -19,7 +19,7 @@ # The following environment variables are set by LSF: # LSB_MCPU_HOSTS - list of hostnames with their associated number of processors allocated to this LSF job -# Copyright 2006-2022 The MathWorks, Inc. +# Copyright 2006-2024 The MathWorks, Inc. # If PARALLEL_SERVER_ environment variables are not set, assign any # available values with form MDCE_ for backwards compatibility @@ -29,6 +29,9 @@ PARALLEL_SERVER_MATLAB_ARGS=${PARALLEL_SERVER_MATLAB_ARGS:="${MDCE_MATLAB_ARGS}" PARALLEL_SERVER_NUM_THREADS=${PARALLEL_SERVER_NUM_THREADS:="${MDCE_NUM_THREADS}"} PARALLEL_SERVER_DEBUG=${PARALLEL_SERVER_DEBUG:="${MDCE_DEBUG}"} +# Other environment variables to forward +PARALLEL_SERVER_GENVLIST="${PARALLEL_SERVER_GENVLIST},HOME,USER" + # Echo the resources that the scheduler has allocated to this job: echo -e "The scheduler has allocated the following resources to this job (format is [hostname] [number of processors on host]):\n${LSB_MCPU_HOSTS:?"Host list undefined"}" @@ -43,12 +46,14 @@ if [ ! -z "${PARALLEL_SERVER_DEBUG}" ] && [ "${PARALLEL_SERVER_DEBUG}" != "false MPI_VERBOSE="${MPI_VERBOSE} -v -print-all-exitcodes" fi -# Unset the hostname variables to ensure they don't get forwarded by mpiexec -unset HOST HOSTNAME - # Construct the command to run. -CMD="\"${FULL_MPIEXEC}\" -bind-to core:${PARALLEL_SERVER_NUM_THREADS} ${MPI_VERBOSE} -n ${PARALLEL_SERVER_TOTAL_TASKS} \ - \"${PARALLEL_SERVER_MATLAB_EXE}\" ${PARALLEL_SERVER_MATLAB_ARGS}" +CMD="\"${FULL_MPIEXEC}\" \ + -genvlist ${PARALLEL_SERVER_GENVLIST} \ + -bind-to core:${PARALLEL_SERVER_NUM_THREADS} \ + ${MPI_VERBOSE} \ + -n ${PARALLEL_SERVER_TOTAL_TASKS} \ + \"${PARALLEL_SERVER_MATLAB_EXE}\" \ + ${PARALLEL_SERVER_MATLAB_ARGS}" # Echo the command so that it is shown in the output log. echo $CMD diff --git a/communicatingJobWrapperSmpd.sh b/communicatingJobWrapperSmpd.sh index 9de29ac..e3a1603 100755 --- a/communicatingJobWrapperSmpd.sh +++ b/communicatingJobWrapperSmpd.sh @@ -21,6 +21,9 @@ PARALLEL_SERVER_CMR=${PARALLEL_SERVER_CMR:="${MDCE_CMR}"} PARALLEL_SERVER_MATLAB_EXE=${PARALLEL_SERVER_MATLAB_EXE:="${MDCE_MATLAB_EXE}"} PARALLEL_SERVER_MATLAB_ARGS=${PARALLEL_SERVER_MATLAB_ARGS:="${MDCE_MATLAB_ARGS}"} +# Other environment variables to forward +PARALLEL_SERVER_GENVLIST="${PARALLEL_SERVER_GENVLIST},HOME,USER" + # Create full paths to mw_smpd/mw_mpiexec if needed FULL_SMPD=${PARALLEL_SERVER_CMR:+${PARALLEL_SERVER_CMR}/bin/}mw_smpd FULL_MPIEXEC=${PARALLEL_SERVER_CMR:+${PARALLEL_SERVER_CMR}/bin/}mw_mpiexec @@ -124,11 +127,13 @@ launchSmpds() { ######################################################################################### runMpiexec() { - ENVS_TO_FORWARD="PARALLEL_SERVER_DECODE_FUNCTION,PARALLEL_SERVER_STORAGE_LOCATION,PARALLEL_SERVER_STORAGE_CONSTRUCTOR,PARALLEL_SERVER_JOB_LOCATION,PARALLEL_SERVER_DEBUG,PARALLEL_SERVER_LICENSE_NUMBER,MLM_WEB_LICENSE,MLM_WEB_USER_CRED,MLM_WEB_ID" - LEGACY_ENVS_TO_FORWARD="MDCE_DECODE_FUNCTION,MDCE_STORAGE_LOCATION,MDCE_STORAGE_CONSTRUCTOR,MDCE_JOB_LOCATION,MDCE_DEBUG,MDCE_LICENSE_NUMBER" - CMD="\"${FULL_MPIEXEC}\" -smpd -phrase MATLAB -port ${SMPD_PORT} \ - -l ${MACHINE_ARG} -genvlist $ENVS_TO_FORWARD,$LEGACY_ENVS_TO_FORWARD \ - \"${PARALLEL_SERVER_MATLAB_EXE}\" ${PARALLEL_SERVER_MATLAB_ARGS}" + CMD="\"${FULL_MPIEXEC}\" -smpd \ + -phrase MATLAB \ + -port ${SMPD_PORT} \ + -l ${MACHINE_ARG} \ + -genvlist $PARALLEL_SERVER_GENVLIST \ + \"${PARALLEL_SERVER_MATLAB_EXE}\" \ + ${PARALLEL_SERVER_MATLAB_ARGS}" # As a debug stage: echo the command ... echo $CMD diff --git a/communicatingSubmitFcn.m b/communicatingSubmitFcn.m index 313478b..1047aa4 100644 --- a/communicatingSubmitFcn.m +++ b/communicatingSubmitFcn.m @@ -6,7 +6,7 @@ function communicatingSubmitFcn(cluster, job, environmentProperties) % % See also parallel.cluster.generic.communicatingDecodeFcn. -% Copyright 2010-2023 The MathWorks, Inc. +% Copyright 2010-2024 The MathWorks, Inc. % Store the current filename for the errors, warnings and dctSchedulerMessages. currFilename = mfilename; @@ -70,7 +70,8 @@ function communicatingSubmitFcn(cluster, job, environmentProperties) storageLocation = [storageLocation, fileSeparator]; end end -variables = {'PARALLEL_SERVER_DECODE_FUNCTION', decodeFunction; ... +variables = { ... + 'PARALLEL_SERVER_DECODE_FUNCTION', decodeFunction; ... 'PARALLEL_SERVER_STORAGE_CONSTRUCTOR', environmentProperties.StorageConstructor; ... 'PARALLEL_SERVER_JOB_LOCATION', environmentProperties.JobLocation; ... 'PARALLEL_SERVER_MATLAB_EXE', environmentProperties.MatlabExecutable; ... @@ -91,6 +92,9 @@ function communicatingSubmitFcn(cluster, job, environmentProperties) % Trim the environment variables of empty values. nonEmptyValues = cellfun(@(x) ~isempty(strtrim(x)), variables(:,2)); variables = variables(nonEmptyValues, :); +% List of all the variables to forward through mpiexec to the workers +variables = [variables; ... + {'PARALLEL_SERVER_GENVLIST', strjoin(variables(:,1), ',')}]; % The job directory as accessed by this machine localJobDirectory = cluster.getJobFolder(job); diff --git a/getJobStateFcn.m b/getJobStateFcn.m index ac5746e..b90667c 100644 --- a/getJobStateFcn.m +++ b/getJobStateFcn.m @@ -4,7 +4,7 @@ % Set your cluster's PluginScriptsLocation to the parent folder of this % function to run it when you query the state of a job. -% Copyright 2010-2022 The MathWorks, Inc. +% Copyright 2010-2024 The MathWorks, Inc. % Store the current filename for the errors, warnings and % dctSchedulerMessages @@ -61,18 +61,20 @@ clusterState = iExtractJobState(cmdOut, numSubmittedTasks); dctSchedulerMessage(6, '%s: State %s was extracted from cluster output.', currFilename, clusterState); -% If we could determine the cluster's state, we'll use that, otherwise -% stick with MATLAB's job state. +% If we could determine the cluster's state, we'll use that. Otherwise, we assume +% the scheduler is no longer tracking the job because the job has terminated. if ~strcmp(clusterState, 'unknown') state = clusterState; +else + state = 'finished'; end if ~cluster.HasSharedFilesystem - % Decide what to do with mirroring based on the cluster's version of job state and whether or not - % the job is currently being mirrored: + % Decide what to do with mirroring based on the cluster's version of job + % state and whether or not the job is currently being mirrored: % If job is not being mirrored, and job is not finished, resume the mirror % If job is not being mirrored, and job is finished, do the last mirror - % If the job is being mirrored, and job is finished, do the last mirror. + % If the job is being mirrored, and job is finished, do the last mirror % Otherwise (if job is not finished, and we are mirroring), do nothing remoteConnection = getRemoteConnection(cluster); isBeingMirrored = remoteConnection.isJobUsingConnection(job.ID); diff --git a/independentJobWrapper.sh b/independentJobWrapper.sh index bf45b5e..e661ade 100755 --- a/independentJobWrapper.sh +++ b/independentJobWrapper.sh @@ -5,7 +5,7 @@ # PARALLEL_SERVER_MATLAB_EXE - the MATLAB executable to use # PARALLEL_SERVER_MATLAB_ARGS - the MATLAB args to use -# Copyright 2010-2022 The MathWorks, Inc. +# Copyright 2010-2024 The MathWorks, Inc. # If PARALLEL_SERVER_ environment variables are not set, assign any # available values with form MDCE_ for backwards compatibility diff --git a/independentSubmitFcn.m b/independentSubmitFcn.m index df2f999..d7d031c 100644 --- a/independentSubmitFcn.m +++ b/independentSubmitFcn.m @@ -77,7 +77,8 @@ function independentSubmitFcn(cluster, job, environmentProperties) storageLocation = [storageLocation, fileSeparator]; end end -variables = {'PARALLEL_SERVER_DECODE_FUNCTION', decodeFunction; ... +variables = { ... + 'PARALLEL_SERVER_DECODE_FUNCTION', decodeFunction; ... 'PARALLEL_SERVER_STORAGE_CONSTRUCTOR', environmentProperties.StorageConstructor; ... 'PARALLEL_SERVER_JOB_LOCATION', environmentProperties.JobLocation; ... 'PARALLEL_SERVER_MATLAB_EXE', environmentProperties.MatlabExecutable; ... diff --git a/private/createEnvironmentWrapper.m b/private/createEnvironmentWrapper.m index d258155..4f4fc68 100644 --- a/private/createEnvironmentWrapper.m +++ b/private/createEnvironmentWrapper.m @@ -31,10 +31,6 @@ function createEnvironmentWrapper(outputFilename, quotedWrapperPath, environment % Turn off command echoing fprintf(fid, '@echo off\n'); - % On Windows must specify the list of variables to forward via mpiexec - genvlist = strjoin(environmentVariables(:,1), ','); - environmentVariables = [environmentVariables; {'PARALLEL_SERVER_GENVLIST', genvlist}]; - formatSpec = 'set %s=%s\n'; end diff --git a/private/createSubmitScript.m b/private/createSubmitScript.m index 1919f62..71cb555 100644 --- a/private/createSubmitScript.m +++ b/private/createSubmitScript.m @@ -2,7 +2,7 @@ function createSubmitScript(outputFilename, jobName, quotedLogFile, ... quotedWrapperPath, additionalSubmitArgs, clusterOS, jobArrayString) % Create a script that runs the LSF bsub command. -% Copyright 2010-2023 The MathWorks, Inc. +% Copyright 2010-2024 The MathWorks, Inc. if nargin < 7 jobArrayString = []; diff --git a/private/getRemoteConnection.m b/private/getRemoteConnection.m index 137132d..d5fa08c 100644 --- a/private/getRemoteConnection.m +++ b/private/getRemoteConnection.m @@ -4,7 +4,7 @@ % getRemoteConnection will either retrieve a RemoteClusterAccess from the % cluster's UserData or it will create a new RemoteClusterAccess. -% Copyright 2010-2023 The MathWorks, Inc. +% Copyright 2010-2024 The MathWorks, Inc. % Store the current filename for the dctSchedulerMessages currFilename = mfilename; @@ -70,7 +70,7 @@ ~strcmpi(remoteConnection.Hostname, clusterHost) % The connection stored in the user data does not match the cluster host requested warning('parallelexamples:GenericLSF:DifferentRemoteParameters', ... - ['The current cluster is already using cluster host.\n', ... + ['The current cluster is already using cluster host %s.\n', ... 'The existing connection to %s will be replaced.'], ... remoteConnection.Hostname, remoteConnection.Hostname); cluster.UserData.RemoteConnection = []; @@ -123,7 +123,7 @@ % (true/false). % Use the UI for prompts if MATLAB has been started with the desktop enabled -useUI = desktop('-inuse'); +useUI = iShouldUseUI(); username = iGetUsername(cluster, useUI); % Decide which authentication mode to use @@ -192,6 +192,17 @@ end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +function useUI = iShouldUseUI() +if verLessThan('matlab', '9.11') + % Prior to R2021b, check for Java AWT components + useUI = isempty(javachk('awt')); +else + % From R2021b onwards, can use the desktop function + useUI = desktop('-inuse'); +end +end + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function username = iGetUsername(cluster, useUI) @@ -217,7 +228,8 @@ end % useUI == false -username = input(sprintf('Enter the username for %s:\n', cluster.AdditionalProperties.ClusterHost), 's'); +msg = sprintf('Enter the username for %s:\n ', cluster.AdditionalProperties.ClusterHost); +username = input(msg, 's'); end @@ -244,7 +256,7 @@ % useUI == false validYesNoResponse = {'y', 'n'}; -passphraseMessage = 'Does the identity file require a password? (y or n)\n'; +passphraseMessage = sprintf('Does the identity file require a password? (y or n)\n '); passphraseResponse = iLoopUntilValidStringInput(passphraseMessage, validYesNoResponse); identityFileHasPassphrase = strcmpi(passphraseResponse, 'y'); @@ -269,7 +281,8 @@ end identityFile = fullfile(pathname, filename); else - identityFile = input(sprintf('Please enter the full path to the Identity File to use:\n'), 's'); + msg = sprintf('Please enter the full path to the Identity File to use:\n '); + identityFile = input(msg, 's'); end end @@ -293,7 +306,7 @@ else validResponses = {'1', '2', '3'}; displayItems = [validResponses; options]; - identityFileMessage = [promptMessage, newline, sprintf('%s) %s\n', displayItems{:})]; + identityFileMessage = [promptMessage, newline, sprintf('%s) %s\n', displayItems{:}), ' ']; response = iLoopUntilValidStringInput(identityFileMessage, validResponses); switch response case '1'