From 7d82c8495eaa598d2094a6df4778cdb3750cafee Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Tue, 26 Dec 2023 09:19:06 +0100 Subject: [PATCH] fix: Correct vacuuming scheduler logic * Ensure we run vacuuming once a week on Monday after 2hr * Strip newlines and spaces from lastjobsupdate file * Change most of log lines to debug mode for DB update Signed-off-by: Mahendra Paipuri --- pkg/jobstats/db.go | 17 +++++++++-------- pkg/jobstats/slurm.go | 10 +++++----- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/pkg/jobstats/db.go b/pkg/jobstats/db.go index da4bf116..505ff8d2 100644 --- a/pkg/jobstats/db.go +++ b/pkg/jobstats/db.go @@ -162,7 +162,7 @@ func NewJobStatsDB( if _, err := os.Stat(dataPath); err != nil { level.Info(logger).Log("msg", "Data path directory does not exist. Creating...", "path", dataPath) if err := os.Mkdir(dataPath, 0750); err != nil { - level.Error(logger).Log("msg", "Could not create data path directory.", "path", dataPath, "err", err) + level.Error(logger).Log("msg", "Could not create data path directory", "path", dataPath, "err", err) return nil, err } goto updatetime @@ -174,7 +174,8 @@ func NewJobStatsDB( level.Error(logger).Log("msg", "Failed to read lastjobsupdatetime file", "err", err) goto updatetime } else { - lastJobsUpdateTime, err = time.Parse(dateFormat, string(lastUpdateTimeString)) + // Trim any spaces and new lines + lastJobsUpdateTime, err = time.Parse(dateFormat, strings.TrimSuffix(strings.TrimSpace(string(lastUpdateTimeString)), "\n")) if err != nil { level.Error(logger).Log("msg", "Failed to parse time string in lastjobsupdatetime file", "time", lastUpdateTimeString, "err", err) goto updatetime @@ -189,7 +190,7 @@ func NewJobStatsDB( updatetime: lastJobsUpdateTime, err = time.Parse("2006-01-02", lastJobsUpdateTimeString) if err != nil { - level.Error(logger).Log("msg", "Failed to parse time string.", "time", lastJobsUpdateTimeString, "err", err) + level.Error(logger).Log("msg", "Failed to parse time string", "time", lastJobsUpdateTimeString, "err", err) return nil, err } @@ -340,18 +341,18 @@ func (j *jobStatsDB) getJobStats(startTime, endTime time.Time) error { } // Insert data into DB - level.Info(j.logger).Log("msg", "Inserting jobs into DB") + level.Debug(j.logger).Log("msg", "Inserting jobs into DB") j.insertJobsInDB(stmt, jobs) - level.Info(j.logger).Log("msg", "Finished inserting jobs into DB") + level.Debug(j.logger).Log("msg", "Finished inserting jobs into DB") // Delete older entries - level.Info(j.logger).Log("msg", "Deleting old jobs") + level.Debug(j.logger).Log("msg", "Deleting old jobs") err = j.deleteOldJobs(tx) if err != nil { level.Error(j.logger).Log("msg", "Failed to delete old job entries", "err", err) return err } - level.Info(j.logger).Log("msg", "Finished deleting old jobs in DB") + level.Debug(j.logger).Log("msg", "Finished deleting old jobs in DB") // Commit changes err = tx.Commit() @@ -383,7 +384,7 @@ func (j *jobStatsDB) vacuumDB() error { nextVacuumTime := j.lastDBVacuumTime.Add(time.Duration(168) * time.Hour) // Check if we are on Monday at 02hr and **after** nextVacuumTime - if weekday != "Monday" && hours != 2 && time.Now().Compare(nextVacuumTime) == -1 { + if weekday != "Monday" || hours != 02 || time.Now().Compare(nextVacuumTime) == -1 { return nil } diff --git a/pkg/jobstats/slurm.go b/pkg/jobstats/slurm.go index 2b536ff7..d7398a9c 100644 --- a/pkg/jobstats/slurm.go +++ b/pkg/jobstats/slurm.go @@ -99,10 +99,10 @@ sudomode: func NewSlurmScheduler(logger log.Logger) (Batch, error) { execMode, err := preflightChecks(logger) if err != nil { - level.Error(logger).Log("msg", "Failed to create Slurm batch scheduler for retreiving jobs.", "err", err) + level.Error(logger).Log("msg", "Failed to setup Slurm batch scheduler for retreiving jobs", "err", err) return nil, err } - level.Info(logger).Log("msg", "Jobs from slurm batch scheduler will be retrieved.") + level.Info(logger).Log("msg", "Jobs from slurm batch scheduler will be retrieved") return &slurmScheduler{ logger: logger, execMode: execMode, @@ -115,16 +115,16 @@ func (s *slurmScheduler) GetJobs(start time.Time, end time.Time) ([]BatchJob, er startTime := start.Format(s.slurmDateFormat) endTime := end.Format(s.slurmDateFormat) - level.Info(s.logger).Log("msg", "Retrieving Slurm jobs", "start", startTime, "end", endTime) - + // Execute sacct command between start and end times sacctOutput, err := runSacctCmd(s.execMode, startTime, endTime, s.logger) if err != nil { level.Error(s.logger).Log("msg", "Failed to execute SLURM sacct command", "err", err) return []BatchJob{}, err } + // Parse sacct output and create BatchJob structs slice jobs, numJobs := parseSacctCmdOutput(string(sacctOutput), *slurmWalltimeCutoff, s.logger) - level.Info(s.logger).Log("msg", "Number of Slurm jobs.", "njobs", numJobs) + level.Info(s.logger).Log("msg", "Retrieved Slurm jobs", "start", startTime, "end", endTime, "njobs", numJobs) return jobs, nil }