Skip to content

Commit

Permalink
fix: Correct vacuuming scheduler logic
Browse files Browse the repository at this point in the history
* Ensure we run vacuuming once a week on Monday after 2hr

* Strip newlines and spaces from lastjobsupdate file

* Change most of log lines to debug mode for DB update

Signed-off-by: Mahendra Paipuri <[email protected]>
  • Loading branch information
mahendrapaipuri committed Dec 26, 2023
1 parent b7d7397 commit 7d82c84
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
17 changes: 9 additions & 8 deletions pkg/jobstats/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func NewJobStatsDB(
if _, err := os.Stat(dataPath); err != nil {
level.Info(logger).Log("msg", "Data path directory does not exist. Creating...", "path", dataPath)
if err := os.Mkdir(dataPath, 0750); err != nil {
level.Error(logger).Log("msg", "Could not create data path directory.", "path", dataPath, "err", err)
level.Error(logger).Log("msg", "Could not create data path directory", "path", dataPath, "err", err)
return nil, err
}
goto updatetime
Expand All @@ -174,7 +174,8 @@ func NewJobStatsDB(
level.Error(logger).Log("msg", "Failed to read lastjobsupdatetime file", "err", err)
goto updatetime
} else {
lastJobsUpdateTime, err = time.Parse(dateFormat, string(lastUpdateTimeString))
// Trim any spaces and new lines
lastJobsUpdateTime, err = time.Parse(dateFormat, strings.TrimSuffix(strings.TrimSpace(string(lastUpdateTimeString)), "\n"))
if err != nil {
level.Error(logger).Log("msg", "Failed to parse time string in lastjobsupdatetime file", "time", lastUpdateTimeString, "err", err)
goto updatetime
Expand All @@ -189,7 +190,7 @@ func NewJobStatsDB(
updatetime:
lastJobsUpdateTime, err = time.Parse("2006-01-02", lastJobsUpdateTimeString)
if err != nil {
level.Error(logger).Log("msg", "Failed to parse time string.", "time", lastJobsUpdateTimeString, "err", err)
level.Error(logger).Log("msg", "Failed to parse time string", "time", lastJobsUpdateTimeString, "err", err)
return nil, err
}

Expand Down Expand Up @@ -340,18 +341,18 @@ func (j *jobStatsDB) getJobStats(startTime, endTime time.Time) error {
}

// Insert data into DB
level.Info(j.logger).Log("msg", "Inserting jobs into DB")
level.Debug(j.logger).Log("msg", "Inserting jobs into DB")
j.insertJobsInDB(stmt, jobs)
level.Info(j.logger).Log("msg", "Finished inserting jobs into DB")
level.Debug(j.logger).Log("msg", "Finished inserting jobs into DB")

// Delete older entries
level.Info(j.logger).Log("msg", "Deleting old jobs")
level.Debug(j.logger).Log("msg", "Deleting old jobs")
err = j.deleteOldJobs(tx)
if err != nil {
level.Error(j.logger).Log("msg", "Failed to delete old job entries", "err", err)
return err
}
level.Info(j.logger).Log("msg", "Finished deleting old jobs in DB")
level.Debug(j.logger).Log("msg", "Finished deleting old jobs in DB")

// Commit changes
err = tx.Commit()
Expand Down Expand Up @@ -383,7 +384,7 @@ func (j *jobStatsDB) vacuumDB() error {
nextVacuumTime := j.lastDBVacuumTime.Add(time.Duration(168) * time.Hour)

// Check if we are on Monday at 02hr and **after** nextVacuumTime
if weekday != "Monday" && hours != 2 && time.Now().Compare(nextVacuumTime) == -1 {
if weekday != "Monday" || hours != 02 || time.Now().Compare(nextVacuumTime) == -1 {
return nil
}

Expand Down
10 changes: 5 additions & 5 deletions pkg/jobstats/slurm.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ sudomode:
func NewSlurmScheduler(logger log.Logger) (Batch, error) {
execMode, err := preflightChecks(logger)
if err != nil {
level.Error(logger).Log("msg", "Failed to create Slurm batch scheduler for retreiving jobs.", "err", err)
level.Error(logger).Log("msg", "Failed to setup Slurm batch scheduler for retreiving jobs", "err", err)
return nil, err
}
level.Info(logger).Log("msg", "Jobs from slurm batch scheduler will be retrieved.")
level.Info(logger).Log("msg", "Jobs from slurm batch scheduler will be retrieved")
return &slurmScheduler{
logger: logger,
execMode: execMode,
Expand All @@ -115,16 +115,16 @@ func (s *slurmScheduler) GetJobs(start time.Time, end time.Time) ([]BatchJob, er
startTime := start.Format(s.slurmDateFormat)
endTime := end.Format(s.slurmDateFormat)

level.Info(s.logger).Log("msg", "Retrieving Slurm jobs", "start", startTime, "end", endTime)

// Execute sacct command between start and end times
sacctOutput, err := runSacctCmd(s.execMode, startTime, endTime, s.logger)
if err != nil {
level.Error(s.logger).Log("msg", "Failed to execute SLURM sacct command", "err", err)
return []BatchJob{}, err
}

// Parse sacct output and create BatchJob structs slice
jobs, numJobs := parseSacctCmdOutput(string(sacctOutput), *slurmWalltimeCutoff, s.logger)
level.Info(s.logger).Log("msg", "Number of Slurm jobs.", "njobs", numJobs)
level.Info(s.logger).Log("msg", "Retrieved Slurm jobs", "start", startTime, "end", endTime, "njobs", numJobs)
return jobs, nil
}

Expand Down

0 comments on commit 7d82c84

Please sign in to comment.