Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Improve how movie and batch seek to clean up on premature exits #6016

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/rst/source/batch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ The **batch** module creates several hidden script files that are used in the ge
(initializes variables related to the overall batch job and includes the contents of the optional *includefile*),
*batch_preflight* (optional since it derives from **-Sb** and computes or prepares needed data files), *batch_postflight*
(optional since it derives from **-Sf** and processes files once all the batch job complete), *batch_job*
(accepts a job counter argument and processes data for those parameters), and *batch_cleanup* (removes temporary
(accepts a job counter argument and processes data for those parameters), and a hidden *batch_cleanup* (removes temporary
files at the end of the process). For each job, there is a separate *batch_params_######* script that provides
job-specific variables (e.g., job number and anything given via **-T**). The *preflight* and *postflight* scripts
have access to the information in *batch_init*, while the *batch_job* script in addition has access to the job-specific
Expand Down
2 changes: 1 addition & 1 deletion doc/rst/source/movie.rst
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ scripting language used): *movie_init* (initializes variables related to canvas
and includes the contents of the optional *includefile*), *movie_preflight* (optional since it derives
from **-Sb** and computes needed data files and possibly a background layer), *movie_postflight*
(optional since it derives from **-Sf** and builds a foreground layer), *movie_frame* (accepts a frame counter
argument and builds the frame image), and *movie_cleanup* (removes temporary files at the end of the
argument and builds the frame image), and a hidden *movie_cleanup* (removes temporary files at the end of the
run). For each frame there is a separate *movie_params_######* script that provides frame-specific
variables (e.g., frame number and anything given via **-T**). The pre- and post-flight scripts have
access to the information in *movie_init* while the frame script in addition has access to the frame-specific
Expand Down
132 changes: 75 additions & 57 deletions src/batch.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ static int parse (struct GMT_CTRL *GMT, struct BATCH_CTRL *Ctrl, struct GMT_OPTI
#define Return(code) {Free_Ctrl (GMT, Ctrl); gmt_end_module (GMT, GMT_cpy); bailout (code);}

EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
int error = 0, precision;
int error = GMT_NOERROR, current_error, precision;
int (*run_script)(const char *); /* pointer to system function or a dummy */

unsigned int n_values = 0, n_jobs = 0, job, i_job, col, k, n_cores_unused, n_to_run;
Expand All @@ -432,7 +432,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
char init_file[PATH_MAX] = {""}, state_tag[GMT_LEN16] = {""}, state_prefix[GMT_LEN64] = {""}, param_file[PATH_MAX] = {""};
char pre_file[PATH_MAX] = {""}, post_file[PATH_MAX] = {""}, main_file[PATH_MAX] = {""}, line[PATH_MAX] = {""}, tmpwpath[PATH_MAX] = {""};
char string[GMT_LEN128] = {""}, cmd[GMT_LEN256] = {""}, cleanup_file[PATH_MAX] = {""}, cwd[PATH_MAX] = {""}, conf_file[PATH_MAX];
char completion_file[PATH_MAX] = {""}, topdir[PATH_MAX] = {""}, workdir[PATH_MAX] = {""}, datadir[PATH_MAX] = {""};
char completion_file[PATH_MAX] = {""}, topdir[PATH_MAX] = {""}, workdir[PATH_MAX] = {""}, datadir[PATH_MAX] = {""}, zap_workdir[PATH_MAX] = {""};

double percent = 0.0;

Expand Down Expand Up @@ -552,15 +552,58 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if (Ctrl->In.mode == GMT_DOS_MODE) /* On Windows to do remove a file in a subdir one need to use back slashes */
gmt_strrepc (tmpwpath, '/', '\\');

/* Build file names */
if (Ctrl->S[BATCH_PREFLIGHT].active)
sprintf (pre_file, "batch_preflight.%s", extension[Ctrl->In.mode]);
if (Ctrl->S[BATCH_POSTFLIGHT].active)
sprintf (post_file, "batch_postflight.%s", extension[Ctrl->In.mode]);
sprintf (init_file, "batch_init.%s", extension[Ctrl->In.mode]);
sprintf (main_file, "batch_job.%s", extension[Ctrl->In.mode]);

/* Prepare the cleanup script */
sprintf (cleanup_file, "%s/batch_cleanup_%d.%s", API->tmp_dir, (int)getpid(), extension[Ctrl->In.mode]);
GMT_Report (API, GMT_MSG_INFORMATION, "Create cleanup script %s\n", cleanup_file);
if ((fp = fopen (cleanup_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create cleanup file %s - exiting\n", cleanup_file);
Return (GMT_ERROR_ON_FOPEN);
}
gmt_set_script (fp, Ctrl->In.mode); /* Write 1st line of a script */
sprintf (zap_workdir, "%s %s", rmdir[Ctrl->In.mode], tmpwpath);
if (Ctrl->W.active) { /* Want to delete the entire work directory */
gmt_set_comment (fp, Ctrl->In.mode, "Cleanup script removes working directory with job files");
/* Delete the entire working directory with batch jobs and tmp files */
fprintf (fp, "%s %s\n", rmdir[Ctrl->In.mode], tmpwpath);
}
else { /* Just delete the remaining script files */
/* On Windows to do remove a file in a subdir one need to use back slashes */
char dir_sep_ = (Ctrl->In.mode == GMT_DOS_MODE) ? '\\' : '/';
GMT_Report (API, GMT_MSG_INFORMATION, "%u job product sets saved in directory: %s\n", n_jobs, workdir);
if (Ctrl->S[BATCH_PREFLIGHT].active) /* Remove the preflight script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, pre_file);
if (Ctrl->S[BATCH_POSTFLIGHT].active) /* Remove the postflight script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, post_file);
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, init_file); /* Delete the init script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, main_file); /* Delete the main script */
}
fclose (fp);
#ifndef WIN32 /* Set executable bit if not Windows cmd */
if (chmod (cleanup_file, S_IRWXU)) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to make cleanup script %s executable - exiting\n", cleanup_file);
Return (GMT_RUNTIME_ERROR);
}
#endif

/* Here the cleanup_script has been completed and from now on we do not Return but set error code and jump to
* the end of the program at tag clean_then_die: */

/* Create the initialization file with settings common to all jobs */

n_written = (n_jobs > 0); /* Know the number of jobs already */
sprintf (init_file, "batch_init.%s", extension[Ctrl->In.mode]);
GMT_Report (API, GMT_MSG_INFORMATION, "Create parameter initiation script %s\n", init_file);
if ((fp = fopen (init_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create file %s - exiting\n", init_file);
batch_close_files (Ctrl);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}

sprintf (string, "Static parameters set for processing sequence %s", Ctrl->N.prefix);
Expand Down Expand Up @@ -589,12 +632,13 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
GMT_Report (API, GMT_MSG_ERROR, "Your preflight file %s is not in GMT modern node - exiting\n", pre_file);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
GMT_Report (API, GMT_MSG_INFORMATION, "Create preflight script %s and execute it\n", pre_file);
if ((fp = fopen (pre_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create preflight script %s - exiting\n", pre_file);
fclose (Ctrl->In.fp);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}
gmt_set_script (fp, Ctrl->In.mode); /* Write 1st line of a script */
gmt_set_comment (fp, Ctrl->In.mode, "Preflight script");
Expand Down Expand Up @@ -622,15 +666,15 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if (chmod (pre_file, S_IRWXU)) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to make preflight script %s executable - exiting.\n", pre_file);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
#endif
/* Run the pre-flight now which may or may not create a <timefile> needed later via -T, as well as needed data files */
sprintf (cmd, "%s %s", sys_cmd_wait[Ctrl->In.mode], pre_file);
if ((error = system (cmd))) {
GMT_Report (API, GMT_MSG_ERROR, "Running preflight script %s returned error %d - exiting.\n", pre_file, error);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
}

Expand All @@ -641,12 +685,12 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((D = GMT_Read_Data (API, GMT_IS_DATASET, GMT_IS_FILE, GMT_IS_NONE, GMT_READ_NORMAL, NULL, Ctrl->T.file, NULL)) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to read time file: %s - exiting\n", Ctrl->T.file);
fclose (Ctrl->In.fp);
Return (API->error);
error = API->error; goto clean_then_die;
}
if (D->n_segments > 1) { /* We insist on a simple file structure with a single segment */
GMT_Report (API, GMT_MSG_ERROR, "Your time file %s has more than one segment - reformat first\n", Ctrl->T.file);
fclose (Ctrl->In.fp);
Return (API->error);
error = API->error; goto clean_then_die;
}
n_jobs = (unsigned int)D->n_records; /* Number of records means number of jobs */
n_values = (unsigned int)D->n_columns; /* The number of per-job parameters we need to place into the per-job parameter files */
Expand All @@ -656,7 +700,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
char output[GMT_VF_LEN] = {""}, cmd[GMT_LEN128] = {""};
unsigned int V = GMT->current.setting.verbose;
if (GMT_Open_VirtualFile (API, GMT_IS_DATASET, GMT_IS_NONE, GMT_OUT|GMT_IS_REFERENCE, NULL, output) == GMT_NOTSET) {
Return (API->error);
error = API->error; goto clean_then_die;
}
if (GMT->common.f.active[GMT_IN])
sprintf (cmd, "-T%s -o1 -f%s --GMT_HISTORY=readonly T = %s", Ctrl->T.file, GMT->common.f.string, output);
Expand All @@ -665,11 +709,11 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
GMT_Report (API, GMT_MSG_INFORMATION, "Calling gmtmath with args %s\n", cmd);
GMT->current.setting.verbose = GMT_MSG_ERROR; /* So we don't get unwanted verbosity from gmtmath */
if (GMT_Call_Module (API, "gmtmath", GMT_MODULE_CMD, cmd)) {
Return (API->error); /* Some sort of failure */
error = API->error; goto clean_then_die;
}
GMT->current.setting.verbose = V; /* Restore */
if ((D = GMT_Read_VirtualFile (API, output)) == NULL) { /* Load in the data array */
Return (API->error); /* Some sort of failure */
error = API->error; goto clean_then_die;
}
n_jobs = (unsigned int)D->n_records; /* Number of records means number of jobs */
n_values = (unsigned int)D->n_columns; /* The number of per-job parameters we need to place into the per-job parameter files */
Expand All @@ -685,7 +729,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if (n_jobs == 0) { /* So not good... */
GMT_Report (API, GMT_MSG_ERROR, "No jobs specified! - exiting.\n");
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}

if (!n_written) { /* Rewrite the init file to place the BATCH_NJOBS there */
Expand All @@ -694,7 +738,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((fp = fopen (init_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create file %s - exiting\n", init_file);
batch_close_files (Ctrl);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}

sprintf (string, "Static parameters set for processing sequence %s", Ctrl->N.prefix);
Expand Down Expand Up @@ -730,7 +774,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((fp = fopen (post_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create postflight file %s - exiting\n", post_file);
fclose (Ctrl->In.fp);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}
gmt_set_script (fp, Ctrl->In.mode); /* Write 1st line of a script */
gmt_set_comment (fp, Ctrl->In.mode, "Postflight script");
Expand Down Expand Up @@ -759,7 +803,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if (chmod (post_file, S_IRWXU)) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to make postflight script %s executable - exiting\n", post_file);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
#endif
}
Expand All @@ -776,7 +820,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((fp = fopen (param_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create job parameter file %s - exiting\n", param_file);
fclose (Ctrl->In.fp);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}
sprintf (state_prefix, "Parameter file for job %s", state_tag);
gmt_set_comment (fp, Ctrl->In.mode, state_prefix);
Expand Down Expand Up @@ -809,12 +853,11 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {

/* Now build the main loop script from the mainscript */

sprintf (main_file, "batch_job.%s", extension[Ctrl->In.mode]);
GMT_Report (API, GMT_MSG_INFORMATION, "Create main batch job script %s\n", main_file);
if ((fp = fopen (main_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create loop job script file %s - exiting\n", main_file);
fclose (Ctrl->In.fp);
Return (GMT_ERROR_ON_FOPEN);
error = GMT_ERROR_ON_FOPEN; goto clean_then_die;
}
gmt_set_script (fp, Ctrl->In.mode); /* Write 1st line of a script */
gmt_set_comment (fp, Ctrl->In.mode, "Main job loop script");
Expand Down Expand Up @@ -859,39 +902,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
#ifndef WIN32 /* Set executable bit if not Windows cmd */
if (chmod (main_file, S_IRWXU)) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to make script %s executable - exiting\n", main_file);
Return (GMT_RUNTIME_ERROR);
}
#endif

/* Prepare the cleanup script */
sprintf (cleanup_file, "batch_cleanup.%s", extension[Ctrl->In.mode]);
GMT_Report (API, GMT_MSG_INFORMATION, "Create cleanup script %s\n", cleanup_file);
if ((fp = fopen (cleanup_file, "w")) == NULL) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to create cleanup file %s - exiting\n", cleanup_file);
Return (GMT_ERROR_ON_FOPEN);
}
gmt_set_script (fp, Ctrl->In.mode); /* Write 1st line of a script */
if (Ctrl->W.active) { /* Want to delete the entire work directory */
gmt_set_comment (fp, Ctrl->In.mode, "Cleanup script removes working directory with job files");
/* Delete the entire working directory with batch jobs and tmp files */
fprintf (fp, "%s %s\n", rmdir[Ctrl->In.mode], tmpwpath);
}
else { /* Just delete the remaining script files */
/* On Windows to do remove a file in a subdir one need to use back slashes */
char dir_sep_ = (Ctrl->In.mode == GMT_DOS_MODE) ? '\\' : '/';
GMT_Report (API, GMT_MSG_INFORMATION, "%u job product sets saved in directory: %s\n", n_jobs, workdir);
if (Ctrl->S[BATCH_PREFLIGHT].active) /* Remove the preflight script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, pre_file);
if (Ctrl->S[BATCH_POSTFLIGHT].active) /* Remove the postflight script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, post_file);
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, init_file); /* Delete the init script */
fprintf (fp, "%s %s%c%s\n", rmfile[Ctrl->In.mode], tmpwpath, dir_sep_, main_file); /* Delete the main script */
}
fclose (fp);
#ifndef WIN32 /* Set executable bit if not Windows cmd */
if (chmod (cleanup_file, S_IRWXU)) {
GMT_Report (API, GMT_MSG_ERROR, "Unable to make cleanup script %s executable - exiting\n", cleanup_file);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
#endif

Expand All @@ -907,7 +918,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((error = run_script (cmd))) {
GMT_Report (API, GMT_MSG_ERROR, "Running master script %s for argument %s returned error %d - exiting.\n", main_file, state_tag, error);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
Return (GMT_NOERROR); /* We are done */
}
Expand Down Expand Up @@ -935,7 +946,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
GMT_Report (API, GMT_MSG_DEBUG, "Launch script for job %*.*d\n", precision, precision, job);
if ((error = system (cmd))) {
GMT_Report (API, GMT_MSG_ERROR, "Running script %s returned error %d - aborting.\n", cmd, error);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
status[job].started = true; /* We have now launched this job job */
job++; /* Advance to next job for next launch */
Expand Down Expand Up @@ -972,10 +983,14 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
if ((error = run_script (cmd))) {
GMT_Report (API, GMT_MSG_ERROR, "Running postflight script %s returned error %d - exiting.\n", post_file, error);
fclose (Ctrl->In.fp);
Return (GMT_RUNTIME_ERROR);
error = GMT_RUNTIME_ERROR; goto clean_then_die;
}
}

clean_then_die:

current_error = error; /* Since we may jump here because of an issue earlier */

if (!Ctrl->Q.active) {
/* Run cleanup script at the end */
if (Ctrl->In.mode == GMT_DOS_MODE)
Expand All @@ -991,7 +1006,7 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
}

/* Finally, delete the clean-up script separately since under DOS we got complaints when we had it delete itself (which works under *nix) */
if (!Ctrl->Q.active && gmt_remove_file (GMT, cleanup_file)) { /* Delete the cleanup script itself */
if (gmt_remove_file (GMT, cleanup_file)) { /* Delete the cleanup script itself */
GMT_Report (API, GMT_MSG_ERROR, "Unable to delete the cleanup script %s.\n", cleanup_file);
Return (GMT_RUNTIME_ERROR);
}
Expand All @@ -1008,5 +1023,8 @@ EXTERN_MSC int GMT_batch (void *V_API, int mode, void *args) {
Return (error);
}

Return (GMT_NOERROR);
if (current_error) /* Force removal of work dir since job failed */
system (zap_workdir);

Return (current_error);
}
Loading