From 041a0ac6dca11b12ab58598e9bd751d00df6879b Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Wed, 3 Jan 2024 17:18:19 +0100 Subject: [PATCH 01/11] fix: Correct json tag in struct def Signed-off-by: Mahendra Paipuri --- pkg/jobstats/base/base.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/jobstats/base/base.go b/pkg/jobstats/base/base.go index d9d989f9..eb259820 100644 --- a/pkg/jobstats/base/base.go +++ b/pkg/jobstats/base/base.go @@ -20,7 +20,7 @@ type BatchJob struct { Jobid string `json:"jobid"` Jobuuid string `json:"id"` Partition string `json:"partition"` - QoS string `jsob:"qos"` + QoS string `json:"qos"` Account string `json:"account"` Grp string `json:"group"` Gid string `json:"gid"` From bd3a949b4e1d225cfef686f69bf01248ef7897de Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Wed, 3 Jan 2024 17:18:36 +0100 Subject: [PATCH 02/11] chore: Remove unnecessary arg in receiver Signed-off-by: Mahendra Paipuri --- pkg/jobstats/db/db.go | 4 ++-- pkg/jobstats/db/db_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/jobstats/db/db.go b/pkg/jobstats/db/db.go index 1147cba0..9af8b87b 100644 --- a/pkg/jobstats/db/db.go +++ b/pkg/jobstats/db/db.go @@ -332,7 +332,7 @@ func (j *jobStatsDB) getJobStats(startTime, endTime time.Time) error { // Set pragma statements j.setPragmaDirectives() - stmt, err := j.prepareInsertStatement(tx, len(jobs)) + stmt, err := j.prepareInsertStatement(tx) if err != nil { level.Error(j.logger).Log("msg", "Failed to prepare insert job statement in DB", "err", err) return err @@ -415,7 +415,7 @@ func (j *jobStatsDB) deleteOldJobs(tx *sql.Tx) error { } // Make and return prepare statement for inserting entries -func (j *jobStatsDB) prepareInsertStatement(tx *sql.Tx, numJobs int) (*sql.Stmt, error) { +func (j *jobStatsDB) prepareInsertStatement(tx *sql.Tx) (*sql.Stmt, error) { placeHolderString := fmt.Sprintf( "(%s)", strings.Join(strings.Split(strings.Repeat("?", len(base.BatchJobFieldNames)), ""), ","), diff --git a/pkg/jobstats/db/db_test.go b/pkg/jobstats/db/db_test.go index f407caa0..5048b453 100644 --- a/pkg/jobstats/db/db_test.go +++ b/pkg/jobstats/db/db_test.go @@ -52,7 +52,7 @@ func prepareMockConfig(tmpDir string) *Config { func populateDBWithMockData(db *sql.DB, j *jobStatsDB) { tx, _ := db.Begin() - stmt, _ := j.prepareInsertStatement(tx, len(mockJobs)) + stmt, _ := j.prepareInsertStatement(tx) j.insertJobs(stmt, mockJobs) tx.Commit() } @@ -261,7 +261,7 @@ func TestJobStatsDeleteOldJobs(t *testing.T) { }, } tx, _ := j.db.Begin() - stmt, err := j.prepareInsertStatement(tx, len(jobs)) + stmt, err := j.prepareInsertStatement(tx) if err != nil { t.Errorf("Failed to prepare SQL statements") } From 575a0070fb0584615dca958183b579cd37782d2f Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Wed, 3 Jan 2024 17:20:09 +0100 Subject: [PATCH 03/11] feat: Use query builder for building queries * Simplified the logic of fetching jobs from DB * Admin user can fetch all jobs of any user by setting special X-Dashboard-User header * Use ISO date format used by Grafana * Update and add new tests Signed-off-by: Mahendra Paipuri --- Makefile | 1 + .../e2e-test-stats-server-admin-query-all.txt | 1 + .../e2e-test-stats-server-admin-query.txt | 2 +- .../e2e-test-stats-server-jobid-query.txt | 2 +- ...-test-stats-server-jobuuid-jobid-query.txt | 2 +- .../e2e-test-stats-server-jobuuid-query.txt | 2 +- pkg/jobstats/server/server.go | 264 ++++++++---------- pkg/jobstats/server/server_test.go | 29 +- scripts/e2e-test.sh | 15 +- 9 files changed, 147 insertions(+), 171 deletions(-) create mode 100644 pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt diff --git a/Makefile b/Makefile index 8d5be796..50fa7511 100644 --- a/Makefile +++ b/Makefile @@ -127,6 +127,7 @@ test-e2e: build pkg/collector/fixtures/sys/.unpacked pkg/collector/fixtures/proc ./scripts/e2e-test.sh -s stats-jobid-query ./scripts/e2e-test.sh -s stats-jobuuid-jobid-query ./scripts/e2e-test.sh -s stats-admin-query + ./scripts/e2e-test.sh -s stats-admin-query-all endif .PHONY: skip-test-e2e diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt new file mode 100644 index 00000000..c54dfa1c --- /dev/null +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt @@ -0,0 +1 @@ +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"},{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"14508","id":"88a46e84-ffce-52ea-37e9-47b39d9ccfb3","partition":"part1","qos":"qos1","account":"acc4","group":"grp4","gid":"1004","user":"usr4","uid":"1004","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1004","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr4"},{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"},{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt index f6f8f998..fdb3df92 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","QoS":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","QoS":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt index acea15b9..1c4637a6 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","QoS":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-jobid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-jobid-query.txt index e355419a..d75868ae 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-jobid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-jobid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","QoS":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","QoS":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt index 3a8dcd02..ff19904e 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","QoS":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} diff --git a/pkg/jobstats/server/server.go b/pkg/jobstats/server/server.go index 6e84e80b..9d609462 100644 --- a/pkg/jobstats/server/server.go +++ b/pkg/jobstats/server/server.go @@ -39,14 +39,39 @@ type JobstatsServer struct { dbConfig db.Config adminUsers []string Accounts func(string, string, log.Logger) ([]base.Account, error) - Jobs func(string, string, []string, []string, []string, string, string, log.Logger) ([]base.BatchJob, error) + Jobs func(Query, log.Logger) ([]base.BatchJob, error) HealthCheck func(log.Logger) bool } -var ( - dbConn *sql.DB - dateFormat = "2006-01-02T15:04:05" -) +// Query builder struct +type Query struct { + builder strings.Builder + params []string +} + +// Add query to builder +func (q *Query) query(s string) { + q.builder.WriteString(s) +} + +// Add parameter and its placeholder +func (q *Query) param(val string) { + q.builder.WriteString("?") + q.params = append(q.params, val) +} + +// Add multiple parameters and its placeholder +func (q *Query) multiParam(val []string) { + q.builder.WriteString(fmt.Sprintf("(%s)", strings.Join(strings.Split(strings.Repeat("?", len(val)), ""), ","))) + q.params = append(q.params, val...) +} + +// Get current query string and its parameters +func (q *Query) get() (string, []string) { + return q.builder.String(), q.params +} + +var dbConn *sql.DB // Create new Jobstats server func NewJobstatsServer(c *Config) (*JobstatsServer, func(), error) { @@ -122,12 +147,12 @@ func (s *JobstatsServer) Shutdown(ctx context.Context, wg *sync.WaitGroup) error } // Get current user from the header -func (s *JobstatsServer) getUser(r *http.Request) string { +func (s *JobstatsServer) getUser(r *http.Request) (string, string) { // Check if username header is available - user := r.Header.Get("X-Grafana-User") - if user == "" { + loggedUser := r.Header.Get("X-Grafana-User") + if loggedUser == "" { level.Warn(s.logger).Log("msg", "Header X-Grafana-User not found") - return user + return loggedUser, loggedUser } // If current user is in list of admin users, get "actual" user from @@ -135,15 +160,17 @@ func (s *JobstatsServer) getUser(r *http.Request) string { // as their username. // For admin users who can look at dashboard of "any" user this will be the // username of the "impersonated" user and we take it into account - if slices.Contains(s.adminUsers, user) { + // Besides, X-Dashboard-User can have a special user "all" that will return jobs + // of all users + if slices.Contains(s.adminUsers, loggedUser) { if dashboardUser := r.Header.Get("X-Dashboard-User"); dashboardUser != "" { level.Info(s.logger).Log( - "msg", "Admin user accessing dashboards", "currentUser", user, "impersonatedUser", dashboardUser, + "msg", "Admin user accessing dashboards", "loggedUser", loggedUser, "dashboardUser", dashboardUser, ) - user = dashboardUser + return loggedUser, dashboardUser } } - return user + return loggedUser, loggedUser } // Set response headers @@ -160,7 +187,7 @@ func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) // Get current user from header - user := s.getUser(r) + user, _ := s.getUser(r) // If no user found, return empty response if user == "" { response = base.AccountsResponse{ @@ -237,60 +264,76 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { s.setHeaders(w) w.WriteHeader(http.StatusOK) - // Get current user from header - user := s.getUser(r) + // Initialise utility vars + checkQueryWindow := true // Check query window size + defaultQueryWindow := time.Duration(168) * time.Hour // One week + + // Get current logged user and dashboard user from headers + loggedUser, dashboardUser := s.getUser(r) // If no user found, return empty response - if user == "" { + if loggedUser == "" { s.jobsErrorResponse("User Error", "No user identified", w) return } - // Get query parameters - accounts := r.URL.Query()["account"] - // Add accounts from var-account query parameter as well - // This is to take into account query string made by grafana when getting - // from variables - accounts = append(accounts, r.URL.Query()["var-account"]...) + // Initialise query builder + q := Query{} + q.query(fmt.Sprintf("SELECT * FROM %s", s.dbConfig.JobstatsDBTable)) - // Check if jobuuid or var-jobuuid is present in query params - jobuuids := r.URL.Query()["jobuuid"] - jobuuids = append(jobuuids, r.URL.Query()["var-jobuuid"]...) + // Add dummy condition at the beginning + q.query(" WHERE id > ") + q.param("0") - // Similarly check for jobid or var-jobid - jobids := r.URL.Query()["jobid"] - jobids = append(jobids, r.URL.Query()["var-jobid"]...) + // Check if logged user is in admin users and if we are quering for "all" jobs + // If that is not the case, add user condition in query + if !slices.Contains(s.adminUsers, loggedUser) || dashboardUser != "all" { + q.query(" AND Usr = ") + q.param(dashboardUser) + } + + // Get account query parameters if any + if accounts := r.URL.Query()["account"]; len(accounts) > 0 { + q.query(" AND Account IN ") + q.multiParam(accounts) + } + + // Check if jobuuid present in query params and add them + // If any of jobuuid or jobid query params are present + // do not check query window as we are fetching a specific job(s) + // Consequently set defaultQueryWindow to maximum which is retention period + if jobuuids := r.URL.Query()["jobuuid"]; len(jobuuids) > 0 { + q.query(" AND Jobuuid IN ") + q.multiParam(jobuuids) + checkQueryWindow = false + defaultQueryWindow = s.dbConfig.RetentionPeriod + } + + // Similarly check for jobid + if jobids := r.URL.Query()["jobid"]; len(jobids) > 0 { + q.query(" AND Jobid IN ") + q.multiParam(jobids) + checkQueryWindow = false + defaultQueryWindow = s.dbConfig.RetentionPeriod + } var from, to string - var checkQueryWindow = true // If no from provided, use 1 week from now as from if from = r.URL.Query().Get("from"); from == "" { - // If query is made for particular jobs and from is not present, use the - // default retention period as from as we need to look in whole DB - if len(jobuuids) > 0 || len(jobids) > 0 { - from = time.Now().Add(-s.dbConfig.RetentionPeriod).Format(dateFormat) - checkQueryWindow = false - } else { - from = time.Now().Add(-time.Duration(168) * time.Hour).Format(dateFormat) - } + from = time.Now().Add(-defaultQueryWindow).Format(time.RFC3339) } if to = r.URL.Query().Get("to"); to == "" { - to = time.Now().Format(dateFormat) - } - - // If jobuuids or jobids is present, turn off checkQueryWindow check - if len(jobuuids) > 0 || len(jobids) > 0 { - checkQueryWindow = false + to = time.Now().Format(time.RFC3339) } // Convert "from" and "to" to time.Time and return error response if they // cannot be parsed - fromTime, err := time.Parse(dateFormat, from) + fromTime, err := time.Parse(time.RFC3339, from) if err != nil { level.Error(s.logger).Log("msg", "Failed to parse from datestring", "from", from, "err", err) s.jobsErrorResponse("Internal server error", "Malformed 'from' time string", w) return } - toTime, err := time.Parse(dateFormat, to) + toTime, err := time.Parse(time.RFC3339, to) if err != nil { level.Error(s.logger).Log("msg", "Failed to parse from datestring", "to", to, "err", err) s.jobsErrorResponse("Internal server error", "Malformed 'to' time string", w) @@ -309,10 +352,18 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { return } + // Add from and to to query + if checkQueryWindow { + q.query(" AND Start BETWEEN ") + q.param(from) + q.query(" AND ") + q.param(to) + } + // Get all user jobs in the given time window - jobs, err := s.Jobs(s.dbConfig.JobstatsDBTable, user, accounts, jobids, jobuuids, from, to, s.logger) + jobs, err := s.Jobs(q, s.logger) if err != nil { - level.Error(s.logger).Log("msg", "Failed to fetch jobs", "user", user, "err", err) + level.Error(s.logger).Log("msg", "Failed to fetch jobs", "loggedUser", loggedUser, "err", err) s.jobsErrorResponse("Internal server error", "Failed to fetch user jobs", w) return } @@ -377,131 +428,53 @@ func fetchAccounts(dbTable string, user string, logger log.Logger) ([]base.Accou // Get user jobs within a given time window func fetchJobs( - dbTable string, - user string, - accounts []string, - jobids []string, - jobuuids []string, - from string, - to string, + query Query, logger log.Logger, ) ([]base.BatchJob, error) { var numJobs int - - // Requested jobuuids for logging - queryAccounts := strings.Join(accounts, ",") - queryJobuuids := strings.Join(jobuuids, ",") - queryJobids := strings.Join(jobids, ",") - - // Placeholders - accountPlaceholder := strings.Join(strings.Split(strings.Repeat("?", len(accounts)), ""), ",") - jobuuidPlaceholder := strings.Join(strings.Split(strings.Repeat("?", len(jobuuids)), ""), ",") - jobidPlaceholder := strings.Join(strings.Split(strings.Repeat("?", len(jobids)), ""), ",") - - // Make correct query strings based on queried jobuuid and jobid - var queryStmtString string - var countStmtString string - if len(jobuuids) > 0 && len(jobids) > 0 { - countStmtString = fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND (Jobuuid IN (%s) OR Jobid IN (%s))", - dbTable, - accountPlaceholder, - jobuuidPlaceholder, - jobidPlaceholder, - ) - queryStmtString = fmt.Sprintf("SELECT %s FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND (Jobuuid IN (%s) OR Jobid IN (%s))", - strings.Join(base.BatchJobFieldNames, ","), - dbTable, - accountPlaceholder, - jobuuidPlaceholder, - jobidPlaceholder, - ) - } else if len(jobuuids) > 0 && len(jobids) == 0 { - countStmtString = fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND Jobuuid IN (%s)", - dbTable, - accountPlaceholder, - jobuuidPlaceholder, - ) - queryStmtString = fmt.Sprintf("SELECT %s FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND Jobuuid IN (%s)", - strings.Join(base.BatchJobFieldNames, ","), - dbTable, - accountPlaceholder, - jobuuidPlaceholder, - ) - } else if len(jobuuids) == 0 && len(jobids) > 0 { - countStmtString = fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND Jobid IN (%s)", - dbTable, - accountPlaceholder, - jobidPlaceholder, - ) - queryStmtString = fmt.Sprintf("SELECT %s FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s) AND Jobid IN (%s)", - strings.Join(base.BatchJobFieldNames, ","), - dbTable, - accountPlaceholder, - jobidPlaceholder, - ) - } else { - countStmtString = fmt.Sprintf( - "SELECT COUNT(*) FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s)", - dbTable, - accountPlaceholder, - ) - queryStmtString = fmt.Sprintf("SELECT %s FROM %s WHERE Usr = ? AND Start BETWEEN ? AND ? AND Account IN (%s)", - strings.Join(base.BatchJobFieldNames, ","), - dbTable, - accountPlaceholder, - ) - } + // Get query string and params + queryString, queryParams := query.get() // Prepare SQL statements - countStmt, err := dbConn.Prepare(countStmtString) + countStmt, err := dbConn.Prepare(strings.Replace(queryString, "*", "COUNT(*)", 1)) if err != nil { level.Error(logger).Log( - "msg", "Failed to prepare count SQL statement for jobs", "user", user, - "accounts", queryAccounts, "jobuuids", queryJobuuids, "jobids", queryJobids, - "from", from, "to", to, "err", err, + "msg", "Failed to prepare count SQL statement for jobs", "query", queryString, + "queryParams", strings.Join(queryParams, ","), "err", err, ) return nil, err } defer countStmt.Close() - queryStmt, err := dbConn.Prepare(queryStmtString) + queryStmt, err := dbConn.Prepare(strings.Replace(queryString, "*", strings.Join(base.BatchJobFieldNames, ","), 1)) if err != nil { level.Error(logger).Log( - "msg", "Failed to prepare query SQL statement for jobs", "user", user, - "accounts", queryAccounts, "jobuuids", queryJobuuids, "jobids", queryJobids, - "from", from, "to", to, "err", err, + "msg", "Failed to prepare query SQL statement for jobs", "query", queryString, + "queryParams", strings.Join(queryParams, ","), "err", err, ) return nil, err } defer queryStmt.Close() - // Prepare query args - var args = []any{user, from, to} - for _, account := range accounts { - args = append(args, account) - } - for _, jobuuid := range jobuuids { - args = append(args, jobuuid) - } - for _, jobid := range jobids { - args = append(args, jobid) + // queryParams has to be an inteface. Do casting here + qParams := make([]interface{}, len(queryParams)) + for i, v := range queryParams { + qParams[i] = v } // First make a query to get number of rows that will be returned by query - _ = countStmt.QueryRow(args...).Scan(&numJobs) + _ = countStmt.QueryRow(qParams...).Scan(&numJobs) if err != nil { level.Error(logger).Log("msg", "Failed to execute count SQL statement for jobs query", - "user", user, "accounts", queryAccounts, "jobuuids", queryJobuuids, "jobids", queryJobids, - "from", from, "to", to, "err", err, + "query", queryString, "queryParams", strings.Join(queryParams, ","), "err", err, ) return nil, err } - rows, err := queryStmt.Query(args...) + rows, err := queryStmt.Query(qParams...) if err != nil { level.Error(logger).Log("msg", "Failed to execute query SQL statement for jobs query", - "user", user, "accounts", queryAccounts, "jobuuids", queryJobuuids, "jobids", queryJobids, - "from", from, "to", to, "err", err, + "query", queryString, "queryParams", strings.Join(queryParams, ","), "err", err, ) return nil, err } @@ -568,9 +541,8 @@ func fetchJobs( rowIdx++ } level.Debug(logger).Log( - "msg", "Jobs found for user", "user", user, - "numjobs", len(jobs), "accounts", queryAccounts, "jobuuids", queryJobuuids, "jobids", queryJobids, - "from", from, "to", to, + "msg", "Jobs found for user", "numjobs", len(jobs), "query", queryString, + "queryParams", strings.Join(queryParams, ","), ) return jobs, nil } diff --git a/pkg/jobstats/server/server_test.go b/pkg/jobstats/server/server_test.go index 1f50a3ed..624f2221 100644 --- a/pkg/jobstats/server/server_test.go +++ b/pkg/jobstats/server/server_test.go @@ -10,11 +10,13 @@ import ( "github.com/go-kit/log" "github.com/mahendrapaipuri/batchjob_monitoring/pkg/jobstats/base" + "github.com/mahendrapaipuri/batchjob_monitoring/pkg/jobstats/db" ) func setupServer() *JobstatsServer { logger := log.NewNopLogger() server, _, _ := NewJobstatsServer(&Config{Logger: logger}) + server.dbConfig = db.Config{JobstatsDBTable: "jobs"} server.Accounts = getMockAccounts server.Jobs = getMockJobs return server @@ -25,16 +27,10 @@ func getMockAccounts(dbTable string, user string, logger log.Logger) ([]base.Acc } func getMockJobs( - dbTable string, - user string, - accounts []string, - jobids []string, - jobuuids []string, - from string, - to string, + query Query, logger log.Logger, ) ([]base.BatchJob, error) { - return []base.BatchJob{{Jobid: "1000", Usr: user}, {Jobid: "10001", Usr: user}}, nil + return []base.BatchJob{{Jobid: "1000", Usr: "user"}, {Jobid: "10001", Usr: "user"}}, nil } // Test /api/accounts when no user header found @@ -160,7 +156,7 @@ func TestJobsHandlerWithUserHeader(t *testing.T) { } // Expected result - expectedJobs, _ := getMockJobs("mockDB", currentUser, []string{"foo", "bar"}, []string{}, []string{}, "", "", server.logger) + expectedJobs, _ := getMockJobs(Query{}, server.logger) // Unmarshal byte into structs. var response base.JobsResponse @@ -181,9 +177,8 @@ func TestJobsHandlerWithUserHeaderAndAdmin(t *testing.T) { // Create request req := httptest.NewRequest(http.MethodGet, "/api/jobs", nil) // Add user header - currentUser := "foo" req.Header.Set("X-Grafana-User", server.adminUsers[0]) - req.Header.Set("X-Dashboard-User", currentUser) + req.Header.Set("X-Dashboard-User", "foo") // Start recorder w := httptest.NewRecorder() @@ -198,7 +193,7 @@ func TestJobsHandlerWithUserHeaderAndAdmin(t *testing.T) { } // Expected result - expectedJobs, _ := getMockJobs("mockDB", currentUser, []string{"foo", "bar"}, []string{}, []string{}, "", "", server.logger) + expectedJobs, _ := getMockJobs(Query{}, server.logger) // Unmarshal byte into structs. var response base.JobsResponse @@ -260,8 +255,8 @@ func TestJobsHandlerWithQueryWindowExceeded(t *testing.T) { req.Header.Set("X-Grafana-User", "foo") // Add from query parameter q := req.URL.Query() - q.Add("from", "2023-01-01T00:00:00") - q.Add("to", "2023-06-01T00:00:00") + q.Add("from", "2023-01-01T00:00:00Z") + q.Add("to", "2023-06-01T00:00:00Z") req.URL.RawQuery = q.Encode() // Start recorder @@ -301,8 +296,8 @@ func TestJobsHandlerWithJobuuidsQueryParams(t *testing.T) { req.Header.Set("X-Grafana-User", "foo") // Add from query parameter q := req.URL.Query() - q.Add("from", "2023-01-01T00:00:00") - q.Add("to", "2023-06-01T00:00:00") + q.Add("from", "2023-01-01T00:00:00Z") + q.Add("to", "2023-06-01T00:00:00Z") q.Add("jobuuid", "foo-bar") req.URL.RawQuery = q.Encode() @@ -319,7 +314,7 @@ func TestJobsHandlerWithJobuuidsQueryParams(t *testing.T) { } // Expected result - expectedJobs, _ := getMockJobs("mockDB", "foo", []string{"foo", "bar"}, []string{}, []string{}, "", "", server.logger) + expectedJobs, _ := getMockJobs(Query{}, server.logger) // Unmarshal byte into structs. var response base.JobsResponse diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 2ea0ba74..940c1392 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -102,6 +102,10 @@ then then desc="/api/jobs end point test for admin query" fixture='pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt' + elif [ "${scenario}" = "stats-admin-query-all" ] + then + desc="/api/jobs end point test for admin query for all jobs" + fixture='pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt' fi logfile="${tmpdir}/batchjob_stats_server.log" @@ -264,16 +268,19 @@ then get -H "X-Grafana-User: usr1" "127.0.0.1:${port}/api/accounts" > "${fixture_output}" elif [ "${scenario}" = "stats-jobuuid-query" ] then - get -H "X-Grafana-User: usr2" "127.0.0.1:${port}/api/jobs?jobuuid=baee651d-df44-af2c-fa09-50f5523b5e19&account=acc2&from=2023-02-21T00:00:00&to=2023-02-21T23:59:59" > "${fixture_output}" + get -H "X-Grafana-User: usr2" "127.0.0.1:${port}/api/jobs?jobuuid=baee651d-df44-af2c-fa09-50f5523b5e19&account=acc2&from=2023-02-21T00:00:00Z&to=2023-02-21T23:59:59Z" > "${fixture_output}" elif [ "${scenario}" = "stats-jobid-query" ] then - get -H "X-Grafana-User: usr8" "127.0.0.1:${port}/api/jobs?jobid=1479763&account=acc1&from=2023-02-21T00:00:00&to=2023-02-22T00:00:00" > "${fixture_output}" + get -H "X-Grafana-User: usr8" "127.0.0.1:${port}/api/jobs?jobid=1479763&account=acc1&from=2023-02-21T00:00:00Z&to=2023-02-22T00:00:00Z" > "${fixture_output}" elif [ "${scenario}" = "stats-jobuuid-jobid-query" ] then - get -H "X-Grafana-User: usr15" "127.0.0.1:${port}/api/jobs?jobuuid=e653f045-73b7-c928-e8df-00c4083cb9bc&jobid=11508&jobid=81510&account=acc1&from=2023-02-21T00:00:00&to=2023-02-22T00:00:00" > "${fixture_output}" + get -H "X-Grafana-User: usr15" "127.0.0.1:${port}/api/jobs?jobuuid=e653f045-73b7-c928-e8df-00c4083cb9bc&jobid=11508&jobid=81510&account=acc1&from=2023-02-21T00:00:00Z&to=2023-02-22T00:00:00Z" > "${fixture_output}" elif [ "${scenario}" = "stats-admin-query" ] then - get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: usr3" "127.0.0.1:${port}/api/jobs?account=acc3&from=2023-01-21T15:49:06&to=2023-02-27T15:49:06" > "${fixture_output}" + get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: usr3" "127.0.0.1:${port}/api/jobs?account=acc3&from=2023-01-21T15:49:06Z&to=2023-02-27T15:49:06Z" > "${fixture_output}" + elif [ "${scenario}" = "stats-admin-query-all" ] + then + get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: all" "127.0.0.1:${port}/api/jobs?from=2023-01-21T15:49:06Z&to=2023-02-27T15:49:06Z" > "${fixture_output}" fi fi From b574eb92210247fa7d89b95d6d50b8ab85cde34d Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 12:59:21 +0100 Subject: [PATCH 04/11] refactor: Include time zone in slurm job times * We need to have time zone information to be able to convert times between unix and dateformats easily * Set SLURM_TIME_FORMAT env var for sacct command to include timezone in output * Include time stamps in BatchJob struct as we need them to generate Grafana URLs * Add env arg to os execute helpers to be able to set env vars to subprocesses * Update functions using Execute calls to include env vars arg Signed-off-by: Mahendra Paipuri --- internal/helpers/helpers.go | 44 ++++++++++++++++++++++++--- internal/helpers/helpers_test.go | 10 +++--- pkg/collector/helper.go | 2 +- pkg/collector/ipmi.go | 12 ++++---- pkg/jobstats/base/base.go | 3 ++ pkg/jobstats/helper/helper.go | 9 ++++++ pkg/jobstats/schedulers/slurm.go | 36 +++++++++++++--------- pkg/jobstats/schedulers/slurm_test.go | 24 +++++++++------ 8 files changed, 101 insertions(+), 39 deletions(-) diff --git a/internal/helpers/helpers.go b/internal/helpers/helpers.go index fe30f243..55458659 100644 --- a/internal/helpers/helpers.go +++ b/internal/helpers/helpers.go @@ -2,6 +2,7 @@ package helpers import ( "context" + "os" "os/exec" "strings" "syscall" @@ -22,13 +23,24 @@ func GetUuidFromString(stringSlice []string) (string, error) { } // Execute command and return stdout/stderr -func Execute(cmd string, args []string, logger log.Logger) ([]byte, error) { +func Execute(cmd string, args []string, env []string, logger log.Logger) ([]byte, error) { level.Debug(logger).Log("msg", "Executing", "command", cmd, "args", strings.Join(args, " ")) execCmd := exec.Command(cmd, args...) + + // If env is not nil pointer, add env vars into subprocess cmd + if env != nil { + execCmd.Env = append(os.Environ(), env...) + } + + // Attach a separate terminal less session to the subprocess + // This is to avoid prompting for password when we run command with sudo + // Ref: https://stackoverflow.com/questions/13432947/exec-external-program-script-and-detect-if-it-requests-user-input if cmd == "sudo" { execCmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} } + + // Execute command out, err := execCmd.CombinedOutput() if err != nil { level.Error(logger). @@ -38,12 +50,21 @@ func Execute(cmd string, args []string, logger log.Logger) ([]byte, error) { } // Execute command as a given UID and GID and return stdout/stderr -func ExecuteAs(cmd string, args []string, uid int, gid int, logger log.Logger) ([]byte, error) { +func ExecuteAs(cmd string, args []string, uid int, gid int, env []string, logger log.Logger) ([]byte, error) { level.Debug(logger). Log("msg", "Executing as user", "command", cmd, "args", strings.Join(args, " "), "uid", uid, "gid", gid) execCmd := exec.Command(cmd, args...) + + // Set uid and gid for process execCmd.SysProcAttr = &syscall.SysProcAttr{} execCmd.SysProcAttr.Credential = &syscall.Credential{Uid: uint32(uid), Gid: uint32(gid)} + + // If env is not nil pointer, add env vars into subprocess cmd + if env != nil { + execCmd.Env = append(os.Environ(), env...) + } + + // Execute command out, err := execCmd.CombinedOutput() if err != nil { level.Error(logger). @@ -53,7 +74,7 @@ func ExecuteAs(cmd string, args []string, uid int, gid int, logger log.Logger) ( } // Execute command with timeout and return stdout/stderr -func ExecuteWithTimeout(cmd string, args []string, timeout int, logger log.Logger) ([]byte, error) { +func ExecuteWithTimeout(cmd string, args []string, timeout int, env []string, logger log.Logger) ([]byte, error) { level.Debug(logger). Log("msg", "Executing with timeout", "command", cmd, "args", strings.Join(args, " "), "timeout", timeout) @@ -65,6 +86,12 @@ func ExecuteWithTimeout(cmd string, args []string, timeout int, logger log.Logge } execCmd := exec.CommandContext(ctx, cmd, args...) + + // If env is not nil pointer, add env vars into subprocess cmd + if env != nil { + execCmd.Env = append(os.Environ(), env...) + } + // Attach a separate terminal less session to the subprocess // This is to avoid prompting for password when we run command with sudo // Ref: https://stackoverflow.com/questions/13432947/exec-external-program-script-and-detect-if-it-requests-user-input @@ -75,6 +102,7 @@ func ExecuteWithTimeout(cmd string, args []string, timeout int, logger log.Logge // The signal to send to the children when parent receives a kill signal // execCmd.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGTERM} + // Execute command out, err := execCmd.CombinedOutput() if err != nil { level.Error(logger). @@ -84,7 +112,7 @@ func ExecuteWithTimeout(cmd string, args []string, timeout int, logger log.Logge } // Execute command with timeout as a given UID and GID and return stdout/stderr -func ExecuteAsWithTimeout(cmd string, args []string, uid int, gid int, timeout int, logger log.Logger) ([]byte, error) { +func ExecuteAsWithTimeout(cmd string, args []string, uid int, gid int, timeout int, env []string, logger log.Logger) ([]byte, error) { level.Debug(logger). Log("msg", "Executing with timeout as user", "command", cmd, "args", strings.Join(args, " "), "uid", uid, "gid", gid, "timout") @@ -96,9 +124,17 @@ func ExecuteAsWithTimeout(cmd string, args []string, uid int, gid int, timeout i } execCmd := exec.CommandContext(ctx, cmd, args...) + + // If env is not nil pointer, add env vars into subprocess cmd + if env != nil { + execCmd.Env = append(os.Environ(), env...) + } + + // Set uid and gid for the process execCmd.SysProcAttr = &syscall.SysProcAttr{} execCmd.SysProcAttr.Credential = &syscall.Credential{Uid: uint32(uid), Gid: uint32(gid)} + // Execute command out, err := execCmd.CombinedOutput() if err != nil { level.Error(logger). diff --git a/internal/helpers/helpers_test.go b/internal/helpers/helpers_test.go index 4dfb3fde..03bdaa82 100644 --- a/internal/helpers/helpers_test.go +++ b/internal/helpers/helpers_test.go @@ -22,16 +22,16 @@ func TestGetUuid(t *testing.T) { func TestExecute(t *testing.T) { // Test successful command execution - out, err := Execute("echo", []string{"test"}, log.NewNopLogger()) + out, err := Execute("bash", []string{"-c", "echo ${VAR1} ${VAR2}"}, []string{"VAR1=1", "VAR2=2"}, log.NewNopLogger()) if err != nil { t.Errorf("Failed to execute command %s", err) } - if strings.TrimSpace(string(out)) != "test" { - t.Errorf("Expected output \"test\". Got \"%s\"", string(out)) + if strings.TrimSpace(string(out)) != "1 2" { + t.Errorf("Expected output \"1 2\". Got \"%s\"", string(out)) } // Test failed command execution - out, err = Execute("exit", []string{"1"}, log.NewNopLogger()) + out, err = Execute("exit", []string{"1"}, nil, log.NewNopLogger()) if err == nil { t.Errorf("Expected to fail command execution. Got output %s", out) } @@ -39,7 +39,7 @@ func TestExecute(t *testing.T) { func TestExecuteWithTimeout(t *testing.T) { // Test successful command execution - _, err := ExecuteWithTimeout("sleep", []string{"5"}, 2, log.NewNopLogger()) + _, err := ExecuteWithTimeout("sleep", []string{"5"}, 2, nil, log.NewNopLogger()) if err == nil { t.Errorf("Expected command timeout") } diff --git a/pkg/collector/helper.go b/pkg/collector/helper.go index e4504446..efde9450 100644 --- a/pkg/collector/helper.go +++ b/pkg/collector/helper.go @@ -108,7 +108,7 @@ func GetNvidiaGPUDevices(nvidiaSmiPath string, logger log.Logger) (map[int]Devic // Execute nvidia-smi command to get available GPUs args := []string{"--query-gpu=index,name,uuid", "--format=csv"} - nvidiaSmiOutput, err := helpers.Execute(nvidiaSmiPath, args, logger) + nvidiaSmiOutput, err := helpers.Execute(nvidiaSmiPath, args, nil, logger) if err != nil { level.Error(logger). Log("msg", "nvidia-smi command to get list of devices failed", "err", err) diff --git a/pkg/collector/ipmi.go b/pkg/collector/ipmi.go index 91b700bd..88c3437e 100644 --- a/pkg/collector/ipmi.go +++ b/pkg/collector/ipmi.go @@ -96,7 +96,7 @@ func NewIPMICollector(logger log.Logger) (Collector, error) { cmdSlice := strings.Split(*ipmiDcmiCmd, " ") // Verify if running ipmiDcmiCmd works - if _, err := helpers.Execute(cmdSlice[0], cmdSlice[1:], logger); err == nil { + if _, err := helpers.Execute(cmdSlice[0], cmdSlice[1:], nil, logger); err == nil { execMode = "native" goto outside } @@ -104,7 +104,7 @@ func NewIPMICollector(logger log.Logger) (Collector, error) { // If ipmiDcmiCmd failed to run and if sudo is not already present in command, // add sudo to command and execute. If current user has sudo rights it will be a success if cmdSlice[0] != "sudo" { - if _, err := helpers.ExecuteWithTimeout("sudo", cmdSlice, 2, logger); err == nil { + if _, err := helpers.ExecuteWithTimeout("sudo", cmdSlice, 2, nil, logger); err == nil { execMode = "sudo" goto outside } @@ -112,7 +112,7 @@ func NewIPMICollector(logger log.Logger) (Collector, error) { // As last attempt, run the command as root user by forking subprocess // as root. If there is setuid cap on the process, it will be a success - if _, err := helpers.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, logger); err == nil { + if _, err := helpers.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, nil, logger); err == nil { execMode = "cap" goto outside } @@ -152,11 +152,11 @@ func (c *impiCollector) Update(ch chan<- prometheus.Metric) error { // Execute ipmi-dcmi command cmdSlice := strings.Split(*ipmiDcmiCmd, " ") if c.execMode == "cap" { - stdOut, err = helpers.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, c.logger) + stdOut, err = helpers.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, nil, c.logger) } else if c.execMode == "sudo" { - stdOut, err = helpers.ExecuteWithTimeout("sudo", cmdSlice, 1, c.logger) + stdOut, err = helpers.ExecuteWithTimeout("sudo", cmdSlice, 1, nil, c.logger) } else if c.execMode == "native" { - stdOut, err = helpers.Execute(cmdSlice[0], cmdSlice[1:], c.logger) + stdOut, err = helpers.Execute(cmdSlice[0], cmdSlice[1:], nil, c.logger) } else { err = fmt.Errorf("Current process do not have permissions to execute %s", *ipmiDcmiCmd) } diff --git a/pkg/jobstats/base/base.go b/pkg/jobstats/base/base.go index eb259820..1c0a03a7 100644 --- a/pkg/jobstats/base/base.go +++ b/pkg/jobstats/base/base.go @@ -29,6 +29,9 @@ type BatchJob struct { Submit string `json:"submit"` Start string `json:"start"` End string `json:"end"` + SubmitTS string `json:"submitts"` + StartTS string `json:"startts"` + EndTS string `json:"endts"` Elapsed string `json:"elapsed"` Exitcode string `json:"exitcode"` State string `json:"state"` diff --git a/pkg/jobstats/helper/helper.go b/pkg/jobstats/helper/helper.go index d10d91eb..313c49bf 100644 --- a/pkg/jobstats/helper/helper.go +++ b/pkg/jobstats/helper/helper.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "time" ) var ( @@ -122,3 +123,11 @@ func expandNodelist(nodelistExp string) []string { func NodelistParser(nodelistExp string) []string { return expandNodelist(replaceNodelistDelimiter(nodelistExp)) } + +// Converts a date in a given layout to unix timestamp of the date +func TimeToTimestamp(layout string, date string) int64 { + if t, err := time.Parse(layout, date); err == nil { + return int64(t.Local().Unix()) + } + return 0 +} diff --git a/pkg/jobstats/schedulers/slurm.go b/pkg/jobstats/schedulers/slurm.go index a5ce3282..0e8df302 100644 --- a/pkg/jobstats/schedulers/slurm.go +++ b/pkg/jobstats/schedulers/slurm.go @@ -1,6 +1,7 @@ package schedulers import ( + "fmt" "os" "os/user" "strconv" @@ -12,6 +13,7 @@ import ( "github.com/go-kit/log/level" "github.com/mahendrapaipuri/batchjob_monitoring/internal/helpers" "github.com/mahendrapaipuri/batchjob_monitoring/pkg/jobstats/base" + "github.com/mahendrapaipuri/batchjob_monitoring/pkg/jobstats/helper" jobstats_helper "github.com/mahendrapaipuri/batchjob_monitoring/pkg/jobstats/helper" "github.com/prometheus/common/model" ) @@ -19,17 +21,18 @@ import ( type slurmScheduler struct { logger log.Logger execMode string - slurmDateFormat string slurmWalltimeCutoff time.Duration } const slurmBatchScheduler = "slurm" var ( - slurmUserUid int - slurmUserGid int - jobLock = sync.RWMutex{} - sacctPath = base.BatchJobStatsServerApp.Flag( + slurmUserUid int + slurmUserGid int + defaultLayout = fmt.Sprintf("%sT%s", time.DateOnly, time.TimeOnly) + slurmTimeFormat = fmt.Sprintf("%s-0700", defaultLayout) + jobLock = sync.RWMutex{} + sacctPath = base.BatchJobStatsServerApp.Flag( "slurm.sacct.path", "Absolute path to sacct executable.", ).Default("/usr/local/bin/sacct").String() @@ -88,7 +91,7 @@ func preflightChecks(logger log.Logger) (string, error) { goto sudomode } - if _, err := helpers.ExecuteAs(*sacctPath, []string{"--help"}, slurmUserUid, slurmUserGid, logger); err == nil { + if _, err := helpers.ExecuteAs(*sacctPath, []string{"--help"}, slurmUserUid, slurmUserGid, nil, logger); err == nil { execMode = "cap" level.Debug(logger).Log("msg", "Linux capabilities will be used to execute sacct as slurm user") return execMode, nil @@ -96,7 +99,7 @@ func preflightChecks(logger log.Logger) (string, error) { sudomode: // Last attempt to run sacct with sudo - if _, err := helpers.ExecuteWithTimeout("sudo", []string{*sacctPath, "--help"}, 5, logger); err == nil { + if _, err := helpers.ExecuteWithTimeout("sudo", []string{*sacctPath, "--help"}, 5, nil, logger); err == nil { execMode = "sudo" level.Debug(logger).Log("msg", "sudo will be used to execute sacct command") return execMode, nil @@ -120,15 +123,14 @@ func NewSlurmScheduler(logger log.Logger) (BatchJobFetcher, error) { return &slurmScheduler{ logger: logger, execMode: execMode, - slurmDateFormat: "2006-01-02T15:04:05", slurmWalltimeCutoff: time.Duration(slurmWalltimeCutoffString), }, nil } // Get jobs from slurm func (s *slurmScheduler) Fetch(start time.Time, end time.Time) ([]base.BatchJob, error) { - startTime := start.Format(s.slurmDateFormat) - endTime := end.Format(s.slurmDateFormat) + startTime := start.Format(defaultLayout) + endTime := end.Format(defaultLayout) // Execute sacct command between start and end times sacctOutput, err := runSacctCmd(s.execMode, startTime, endTime, s.logger) @@ -147,21 +149,24 @@ func (s *slurmScheduler) Fetch(start time.Time, end time.Time) ([]base.BatchJob, func runSacctCmd(execMode string, startTime string, endTime string, logger log.Logger) ([]byte, error) { // Use jobIDRaw that outputs the array jobs as regular job IDs instead of id_array format args := []string{ - "-D", "--allusers", "--parsable2", + "-D", "-X", "--allusers", "--parsable2", "--format", "jobidraw,partition,qos,account,group,gid,user,uid,submit,start,end,elapsed,elapsedraw,exitcode,state,allocnodes,alloccpus,nodelist,jobname,workdir", "--state", "CANCELLED,COMPLETED,FAILED,NODE_FAIL,PREEMPTED,TIMEOUT", "--starttime", startTime, "--endtime", endTime, } + // Use SLURM_TIME_FORMAT env var to get timezone offset + env := []string{"SLURM_TIME_FORMAT=\"%Y-%m-%dT%H:%M:%S%z\""} + // Run command as slurm user if execMode == "cap" { - return helpers.ExecuteAs(*sacctPath, args, slurmUserUid, slurmUserGid, logger) + return helpers.ExecuteAs(*sacctPath, args, slurmUserUid, slurmUserGid, env, logger) } else if execMode == "sudo" { args = append([]string{*sacctPath}, args...) - return helpers.Execute("sudo", args, logger) + return helpers.Execute("sudo", args, env, logger) } - return helpers.Execute(*sacctPath, args, logger) + return helpers.Execute(*sacctPath, args, env, logger) } // Parse sacct command output and return batchjob slice @@ -239,6 +244,9 @@ func parseSacctCmdOutput(sacctOutput string, elapsedCutoff time.Duration, logger Submit: components[8], Start: components[9], End: components[10], + SubmitTS: strconv.FormatInt(helper.TimeToTimestamp(slurmTimeFormat, components[8]), 10), + StartTS: strconv.FormatInt(helper.TimeToTimestamp(slurmTimeFormat, components[9]), 10), + EndTS: strconv.FormatInt(helper.TimeToTimestamp(slurmTimeFormat, components[10]), 10), Elapsed: components[11], Exitcode: components[13], State: components[14], diff --git a/pkg/jobstats/schedulers/slurm_test.go b/pkg/jobstats/schedulers/slurm_test.go index 7b4a879c..8ffd6d28 100644 --- a/pkg/jobstats/schedulers/slurm_test.go +++ b/pkg/jobstats/schedulers/slurm_test.go @@ -11,9 +11,9 @@ import ( var ( sacctCmdOutput = `JobID|Partition|QoS|Account|Group|GID|User|UID|Submit|Start|End|Elapsed|ElapsedRaw|ExitCode|State|NNodes|Ncpus|NodeList|JobName|WorkDir -1479763|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T14:37:02|2023-02-21T14:37:07|2023-02-21T15:26:29|00:49:22|3000|0:0|CANCELLED by 302137|1|8|compute-0|test_script1|/home/usr -1481508|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:08:17|4920|0:0|CANCELLED by 302137|2|16|compute-[0-2]|test_script2|/home/usr -1481510|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:00:17|17|0:0|CANCELLED by 302137|2|16|compute-[0-2]|test_script2|/home/usr` +1479763|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T14:37:02+0100|2023-02-21T14:37:07+0100|2023-02-21T15:26:29+0100|00:49:22|3000|0:0|CANCELLED by 302137|1|8|compute-0|test_script1|/home/usr +1481508|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:08:17|4920|0:0|CANCELLED by 302137|2|16|compute-[0-2]|test_script2|/home/usr +1481510|part1|qos1|acc1|grp|1000|usr|1000|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:00:17|17|0:0|CANCELLED by 302137|2|16|compute-[0-2]|test_script2|/home/usr` logger = log.NewNopLogger() expectedBatchJobs = []base.BatchJob{ { @@ -26,9 +26,12 @@ var ( Gid: "1000", Usr: "usr", Uid: "1000", - Submit: "2023-02-21T14:37:02", - Start: "2023-02-21T14:37:07", - End: "2023-02-21T15:26:29", + Submit: "2023-02-21T14:37:02+0100", + Start: "2023-02-21T14:37:07+0100", + End: "2023-02-21T15:26:29+0100", + SubmitTS: "1676986622", + StartTS: "1676986627", + EndTS: "1676989589", Elapsed: "00:49:22", Exitcode: "0:0", State: "CANCELLED by 302137", @@ -49,9 +52,12 @@ var ( Gid: "1000", Usr: "usr", Uid: "1000", - Submit: "2023-02-21T15:48:20", - Start: "2023-02-21T15:49:06", - End: "2023-02-21T15:57:23", + Submit: "2023-02-21T15:48:20+0100", + Start: "2023-02-21T15:49:06+0100", + End: "2023-02-21T15:57:23+0100", + SubmitTS: "1676990900", + StartTS: "1676990946", + EndTS: "1676991443", Elapsed: "00:08:17", Exitcode: "0:0", State: "CANCELLED by 302137", From d8d5503205971be067781bcf2583cfa1474fd7d5 Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 13:00:59 +0100 Subject: [PATCH 05/11] refactor: Use timestamp as query parameter * Grafana passes ISO time always in UTC zone * If we want to use local browser time, we need to use timestamp as query parameter * We simplify /api/jobs end point to accept timestamp as query parameter * Update tests and fixtures accordingly Signed-off-by: Mahendra Paipuri --- pkg/jobstats/db/db.go | 7 +- .../e2e-test-stats-server-admin-query-all.txt | 2 +- .../e2e-test-stats-server-admin-query.txt | 2 +- .../e2e-test-stats-server-jobid-query.txt | 2 +- .../e2e-test-stats-server-jobuuid-query.txt | 2 +- pkg/jobstats/fixtures/sacct | 18 +-- pkg/jobstats/server/server.go | 112 ++++++++++-------- pkg/jobstats/server/server_test.go | 6 +- scripts/e2e-test.sh | 20 ++-- 9 files changed, 98 insertions(+), 73 deletions(-) diff --git a/pkg/jobstats/db/db.go b/pkg/jobstats/db/db.go index 9af8b87b..4ba1c26d 100644 --- a/pkg/jobstats/db/db.go +++ b/pkg/jobstats/db/db.go @@ -436,13 +436,15 @@ func (j *jobStatsDB) prepareInsertStatement(tx *sql.Tx) (*sql.Stmt, error) { // Insert job stat into DB func (j *jobStatsDB) insertJobs(statement *sql.Stmt, jobStats []base.BatchJob) { + var err error for _, jobStat := range jobStats { // Empty job if jobStat == (base.BatchJob{}) { continue } + // level.Debug(j.logger).Log("msg", "Inserting job", "jobid", jobStat.Jobid) - _, err := statement.Exec( + _, err = statement.Exec( jobStat.Jobid, jobStat.Jobuuid, jobStat.Partition, @@ -455,6 +457,9 @@ func (j *jobStatsDB) insertJobs(statement *sql.Stmt, jobStats []base.BatchJob) { jobStat.Submit, jobStat.Start, jobStat.End, + jobStat.SubmitTS, + jobStat.StartTS, + jobStat.EndTS, jobStat.Elapsed, jobStat.Exitcode, jobStat.State, diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt index c54dfa1c..76a5b837 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"},{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"14508","id":"88a46e84-ffce-52ea-37e9-47b39d9ccfb3","partition":"part1","qos":"qos1","account":"acc4","group":"grp4","gid":"1004","user":"usr4","uid":"1004","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1004","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr4"},{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"},{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"},{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"14508","id":"88a46e84-ffce-52ea-37e9-47b39d9ccfb3","partition":"part1","qos":"qos1","account":"acc4","group":"grp4","gid":"1004","user":"usr4","uid":"1004","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1004","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr4"},{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"},{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt index fdb3df92..dd44c114 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt index 1c4637a6..7a62e12e 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02","start":"2023-02-21T14:37:07","end":"2023-02-21T15:26:29","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt index ff19904e..709912a6 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20","start":"2023-02-21T15:49:06","end":"2023-02-21T15:57:23","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} diff --git a/pkg/jobstats/fixtures/sacct b/pkg/jobstats/fixtures/sacct index cd522765..3a022a20 100755 --- a/pkg/jobstats/fixtures/sacct +++ b/pkg/jobstats/fixtures/sacct @@ -1,13 +1,13 @@ #!/bin/bash echo """JobID|Partition|QoS|Account|Group|GID|User|UID|Submit|Start|End|Elapsed|ElapsedRaw|ExitCode|State|NNodes|Ncpus|NodeList|JobName|WorkDir -1479763|part1|qos1|acc1|grp1|1001|usr1|1001|2022-02-21T14:37:02|2022-02-21T14:37:07|2022-02-21T15:26:29|00:49:22|3000|0:0|CANCELLED by 1001|1|8|compute-0|test_script1|/home/usr1 -1481508|part1|qos1|acc2|grp2|1002|usr2|1002|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:08:17|4500|0:0|CANCELLED by 1002|2|16|compute-[0-2]|test_script2|/home/usr2 -1481510|part1|qos1|acc3|grp3|1003|usr3|1003|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:00:17|789|0:0|CANCELLED by 1003|2|16|compute-[0-2]|test_script2|/home/usr3 -147973|part1|qos1|acc3|grp3|1003|usr3|1003|2023-02-21T14:37:02|2023-02-21T14:37:07|2023-02-21T15:26:29|00:49:22|3000|0:0|CANCELLED by 1003|1|8|compute-0|test_script1|/home/usr3 -14508|part1|qos1|acc4|grp4|1004|usr4|1004|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:08:17|4500|0:0|CANCELLED by 1004|2|16|compute-[0-2]|test_script2|/home/usr4 -147973|part1|qos1|acc1|gr1|1001|usr1|1001|2023-12-21T15:48:20|2023-12-21T15:49:06|2023-12-21T15:57:23|00:00:17|567|0:0|CANCELLED by 1001|2|16|compute-[0-2]|test_script2|/home/usr1 -1479763|part1|qos1|acc1|grp8|1008|usr8|1008|2023-02-21T14:37:02|2023-02-21T14:37:07|2023-02-21T15:26:29|00:49:22|3000|0:0|CANCELLED by 1008|1|8|compute-0|test_script1|/home/usr8 -11508|part1|qos1|acc1|grp15|1015|usr15|1015|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:08:17|4500|0:0|CANCELLED by 1015|2|16|compute-[0-2]|test_script2|/home/usr15 -81510|part1|qos1|acc1|grp15|1015|usr15|1015|2023-02-21T15:48:20|2023-02-21T15:49:06|2023-02-21T15:57:23|00:00:17|3533|0:0|CANCELLED by 1015|2|16|compute-[0-2]|test_script2|/home/usr23 +1479763|part1|qos1|acc1|grp1|1001|usr1|1001|2022-02-21T14:37:02+0100|2022-02-21T14:37:07+0100|2022-02-21T15:26:29+0100|00:49:22|3000|0:0|CANCELLED by 1001|1|8|compute-0|test_script1|/home/usr1 +1481508|part1|qos1|acc2|grp2|1002|usr2|1002|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:08:17|4500|0:0|CANCELLED by 1002|2|16|compute-[0-2]|test_script2|/home/usr2 +1481510|part1|qos1|acc3|grp3|1003|usr3|1003|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:00:17|789|0:0|CANCELLED by 1003|2|16|compute-[0-2]|test_script2|/home/usr3 +147973|part1|qos1|acc3|grp3|1003|usr3|1003|2023-02-21T14:37:02+0100|2023-02-21T14:37:07+0100|2023-02-21T15:26:29+0100|00:49:22|3000|0:0|CANCELLED by 1003|1|8|compute-0|test_script1|/home/usr3 +14508|part1|qos1|acc4|grp4|1004|usr4|1004|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:08:17|4500|0:0|CANCELLED by 1004|2|16|compute-[0-2]|test_script2|/home/usr4 +147973|part1|qos1|acc1|gr1|1001|usr1|1001|2023-12-21T15:48:20+0100|2023-12-21T15:49:06+0100|2023-12-21T15:57:23+0100|00:00:17|567|0:0|CANCELLED by 1001|2|16|compute-[0-2]|test_script2|/home/usr1 +1479763|part1|qos1|acc1|grp8|1008|usr8|1008|2023-02-21T14:37:02+0100|2023-02-21T14:37:07+0100|2023-02-21T15:26:29+0100|00:49:22|3000|0:0|CANCELLED by 1008|1|8|compute-0|test_script1|/home/usr8 +11508|part1|qos1|acc1|grp15|1015|usr15|1015|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:08:17|4500|0:0|CANCELLED by 1015|2|16|compute-[0-2]|test_script2|/home/usr15 +81510|part1|qos1|acc1|grp15|1015|usr15|1015|2023-02-21T15:48:20+0100|2023-02-21T15:49:06+0100|2023-02-21T15:57:23+0100|00:00:17|3533|0:0|CANCELLED by 1015|2|16|compute-[0-2]|test_script2|/home/usr23 """ diff --git a/pkg/jobstats/server/server.go b/pkg/jobstats/server/server.go index 9d609462..1e3ceb3a 100644 --- a/pkg/jobstats/server/server.go +++ b/pkg/jobstats/server/server.go @@ -8,6 +8,7 @@ import ( "net/http" _ "net/http/pprof" "slices" + "strconv" "strings" "sync" "time" @@ -55,17 +56,17 @@ func (q *Query) query(s string) { } // Add parameter and its placeholder -func (q *Query) param(val string) { - q.builder.WriteString("?") - q.params = append(q.params, val) -} - -// Add multiple parameters and its placeholder -func (q *Query) multiParam(val []string) { +func (q *Query) param(val []string) { q.builder.WriteString(fmt.Sprintf("(%s)", strings.Join(strings.Split(strings.Repeat("?", len(val)), ""), ","))) q.params = append(q.params, val...) } +// Add multiple parameters and its placeholder +// func (q *Query) multiParam(val []string) { +// q.builder.WriteString(fmt.Sprintf("(%s)", strings.Join(strings.Split(strings.Repeat("?", len(val)), ""), ","))) +// q.params = append(q.params, val...) +// } + // Get current query string and its parameters func (q *Query) get() (string, []string) { return q.builder.String(), q.params @@ -260,6 +261,7 @@ func (s *JobstatsServer) jobsErrorResponse(errorString string, errorType string, // GET /api/jobs // Get jobs of a user based on query params func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { + var fromTime, toTime time.Time var response base.JobsResponse s.setHeaders(w) w.WriteHeader(http.StatusOK) @@ -281,84 +283,91 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { q.query(fmt.Sprintf("SELECT * FROM %s", s.dbConfig.JobstatsDBTable)) // Add dummy condition at the beginning - q.query(" WHERE id > ") - q.param("0") + q.query(" WHERE id > 0") // Check if logged user is in admin users and if we are quering for "all" jobs // If that is not the case, add user condition in query if !slices.Contains(s.adminUsers, loggedUser) || dashboardUser != "all" { - q.query(" AND Usr = ") - q.param(dashboardUser) + q.query(" AND Usr IN ") + q.param([]string{dashboardUser}) } // Get account query parameters if any if accounts := r.URL.Query()["account"]; len(accounts) > 0 { q.query(" AND Account IN ") - q.multiParam(accounts) + q.param(accounts) } // Check if jobuuid present in query params and add them // If any of jobuuid or jobid query params are present // do not check query window as we are fetching a specific job(s) - // Consequently set defaultQueryWindow to maximum which is retention period if jobuuids := r.URL.Query()["jobuuid"]; len(jobuuids) > 0 { q.query(" AND Jobuuid IN ") - q.multiParam(jobuuids) + q.param(jobuuids) checkQueryWindow = false - defaultQueryWindow = s.dbConfig.RetentionPeriod } // Similarly check for jobid if jobids := r.URL.Query()["jobid"]; len(jobids) > 0 { q.query(" AND Jobid IN ") - q.multiParam(jobids) + q.param(jobids) checkQueryWindow = false - defaultQueryWindow = s.dbConfig.RetentionPeriod } - var from, to string - // If no from provided, use 1 week from now as from - if from = r.URL.Query().Get("from"); from == "" { - from = time.Now().Add(-defaultQueryWindow).Format(time.RFC3339) - } - if to = r.URL.Query().Get("to"); to == "" { - to = time.Now().Format(time.RFC3339) + // If we dont have to specific query window skip next section of code as it becomes + // irrelevant + if !checkQueryWindow { + goto queryJobs } - // Convert "from" and "to" to time.Time and return error response if they - // cannot be parsed - fromTime, err := time.Parse(time.RFC3339, from) - if err != nil { - level.Error(s.logger).Log("msg", "Failed to parse from datestring", "from", from, "err", err) - s.jobsErrorResponse("Internal server error", "Malformed 'from' time string", w) - return + // Get to and from query parameters and do checks on them + if f := r.URL.Query().Get("from"); f == "" { + // If from is not present in query params, use a default query window of 1 week + fromTime = time.Now().Add(-defaultQueryWindow) + } else { + // Return error response if from is not a timestamp + if ts, err := strconv.Atoi(f); err != nil { + level.Error(s.logger).Log("msg", "Failed to parse from timestamp", "from", f, "err", err) + s.jobsErrorResponse("Internal server error", "Malformed 'from' timestamp", w) + return + } else { + fromTime = time.Unix(int64(ts), 0) + } } - toTime, err := time.Parse(time.RFC3339, to) - if err != nil { - level.Error(s.logger).Log("msg", "Failed to parse from datestring", "to", to, "err", err) - s.jobsErrorResponse("Internal server error", "Malformed 'to' time string", w) - return + if t := r.URL.Query().Get("to"); t == "" { + // Use current time as default to + toTime = time.Now() + } else { + // Return error response if to is not a timestamp + if ts, err := strconv.Atoi(t); err != nil { + level.Error(s.logger).Log("msg", "Failed to parse to timestamp", "to", t, "err", err) + s.jobsErrorResponse("Internal server error", "Malformed 'to' timestamp", w) + return + } else { + toTime = time.Unix(int64(ts), 0) + } } - // If difference between from and to is more than 3 months, return with empty + // If difference between from and to is more than 1 months, return with empty // response. This is to prevent users from making "big" requests that can "potentially" // choke server and end up in OOM errors - if toTime.Sub(fromTime) > time.Duration(3*30*24)*time.Hour && checkQueryWindow { + if toTime.Sub(fromTime) > time.Duration(30*24)*time.Hour { level.Error(s.logger).Log( - "msg", "Exceeded maximum query time window of 3 months", "from", from, "to", to, + "msg", "Exceeded maximum query time window of 3 months", + "from", fromTime.Format(time.DateTime), "to", toTime.Format(time.DateTime), "queryWindow", toTime.Sub(fromTime).String(), ) s.jobsErrorResponse("Internal server error", "Maximum query window exceeded", w) return } - // Add from and to to query - if checkQueryWindow { - q.query(" AND Start BETWEEN ") - q.param(from) - q.query(" AND ") - q.param(to) - } + // Add from and to to query only when checkQueryWindow is true + q.query(" AND Start BETWEEN ") + q.param([]string{fromTime.Format(time.DateTime)}) + q.query(" AND ") + q.param([]string{toTime.Format(time.DateTime)}) + +queryJobs: // Get all user jobs in the given time window jobs, err := s.Jobs(q, s.logger) @@ -486,8 +495,9 @@ func fetchJobs( var jobid, jobuuid, partition, qos, account, group, gid, user, uid, - submit, start, end, elapsed, - exitcode, state, + submit, start, end, + submitTs, startTs, endTs, + elapsed, exitcode, state, nnodes, ncpus, nodelist, nodelistExp, jobName, workDir string if err := rows.Scan( @@ -503,6 +513,9 @@ func fetchJobs( &submit, &start, &end, + &submitTs, + &startTs, + &endTs, &elapsed, &exitcode, &state, @@ -528,6 +541,9 @@ func fetchJobs( Submit: submit, Start: start, End: end, + SubmitTS: submitTs, + StartTS: startTs, + EndTS: endTs, Elapsed: elapsed, Exitcode: exitcode, State: state, diff --git a/pkg/jobstats/server/server_test.go b/pkg/jobstats/server/server_test.go index 624f2221..4d35ca49 100644 --- a/pkg/jobstats/server/server_test.go +++ b/pkg/jobstats/server/server_test.go @@ -255,8 +255,8 @@ func TestJobsHandlerWithQueryWindowExceeded(t *testing.T) { req.Header.Set("X-Grafana-User", "foo") // Add from query parameter q := req.URL.Query() - q.Add("from", "2023-01-01T00:00:00Z") - q.Add("to", "2023-06-01T00:00:00Z") + q.Add("from", "1672527600") + q.Add("to", "1685570400") req.URL.RawQuery = q.Encode() // Start recorder @@ -296,8 +296,6 @@ func TestJobsHandlerWithJobuuidsQueryParams(t *testing.T) { req.Header.Set("X-Grafana-User", "foo") // Add from query parameter q := req.URL.Query() - q.Add("from", "2023-01-01T00:00:00Z") - q.Add("to", "2023-06-01T00:00:00Z") q.Add("jobuuid", "foo-bar") req.URL.RawQuery = q.Encode() diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 940c1392..512c75ab 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -155,6 +155,10 @@ get() { fi } +waitport() { + timeout 5 bash -c "while ! curl -s -f "http://localhost:${port}" > /dev/null; do sleep 0.1; done"; +} + if [[ "${scenario}" =~ "exporter" ]] then if [ ! -x ./bin/batchjob_exporter ] @@ -240,7 +244,8 @@ then echo $! > "${pidfile}" - sleep 1 + # sleep 1 + waitport get "127.0.0.1:${port}/metrics" | grep -E -v "${skip_re}" > "${fixture_output}" elif [[ "${scenario}" =~ "stats" ]] @@ -261,26 +266,27 @@ then echo $! > "${pidfile}" - sleep 2 + # sleep 2 + waitport if [ "${scenario}" = "stats-account-query" ] then get -H "X-Grafana-User: usr1" "127.0.0.1:${port}/api/accounts" > "${fixture_output}" elif [ "${scenario}" = "stats-jobuuid-query" ] then - get -H "X-Grafana-User: usr2" "127.0.0.1:${port}/api/jobs?jobuuid=baee651d-df44-af2c-fa09-50f5523b5e19&account=acc2&from=2023-02-21T00:00:00Z&to=2023-02-21T23:59:59Z" > "${fixture_output}" + get -H "X-Grafana-User: usr2" "127.0.0.1:${port}/api/jobs?jobuuid=baee651d-df44-af2c-fa09-50f5523b5e19&account=acc2" > "${fixture_output}" elif [ "${scenario}" = "stats-jobid-query" ] then - get -H "X-Grafana-User: usr8" "127.0.0.1:${port}/api/jobs?jobid=1479763&account=acc1&from=2023-02-21T00:00:00Z&to=2023-02-22T00:00:00Z" > "${fixture_output}" + get -H "X-Grafana-User: usr8" "127.0.0.1:${port}/api/jobs?jobid=1479763&account=acc1&from=1676934000&to=1677020400" > "${fixture_output}" elif [ "${scenario}" = "stats-jobuuid-jobid-query" ] then - get -H "X-Grafana-User: usr15" "127.0.0.1:${port}/api/jobs?jobuuid=e653f045-73b7-c928-e8df-00c4083cb9bc&jobid=11508&jobid=81510&account=acc1&from=2023-02-21T00:00:00Z&to=2023-02-22T00:00:00Z" > "${fixture_output}" + get -H "X-Grafana-User: usr15" "127.0.0.1:${port}/api/jobs?jobuuid=e653f045-73b7-c928-e8df-00c4083cb9bc&jobid=11508&jobid=81510&account=acc1" > "${fixture_output}" elif [ "${scenario}" = "stats-admin-query" ] then - get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: usr3" "127.0.0.1:${port}/api/jobs?account=acc3&from=2023-01-21T15:49:06Z&to=2023-02-27T15:49:06Z" > "${fixture_output}" + get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: usr3" "127.0.0.1:${port}/api/jobs?account=acc3&from=1676934000&to=1677538800" > "${fixture_output}" elif [ "${scenario}" = "stats-admin-query-all" ] then - get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: all" "127.0.0.1:${port}/api/jobs?from=2023-01-21T15:49:06Z&to=2023-02-27T15:49:06Z" > "${fixture_output}" + get -H "X-Grafana-User: grafana" -H "X-Dashboard-User: all" "127.0.0.1:${port}/api/jobs?from=1676934000&to=1677538800" > "${fixture_output}" fi fi From 9cd5314e7f84b54a3340520e3b2164a683fe3aad Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 16:02:33 +0100 Subject: [PATCH 06/11] fix: Remove unnecessary quotes * Quoting SLURM_TIME_FORMAT is messing up DB entry format Signed-off-by: Mahendra Paipuri --- pkg/jobstats/schedulers/slurm.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/jobstats/schedulers/slurm.go b/pkg/jobstats/schedulers/slurm.go index 0e8df302..c1424a9c 100644 --- a/pkg/jobstats/schedulers/slurm.go +++ b/pkg/jobstats/schedulers/slurm.go @@ -157,7 +157,7 @@ func runSacctCmd(execMode string, startTime string, endTime string, logger log.L } // Use SLURM_TIME_FORMAT env var to get timezone offset - env := []string{"SLURM_TIME_FORMAT=\"%Y-%m-%dT%H:%M:%S%z\""} + env := []string{"SLURM_TIME_FORMAT=%Y-%m-%dT%H:%M:%S%z"} // Run command as slurm user if execMode == "cap" { From d49268558c0e7f887d0d258596fcfd6f74431825 Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 16:23:35 +0100 Subject: [PATCH 07/11] feat: Query for changes after db row deletion * Emit log in debug mode on how many rows deleted Signed-off-by: Mahendra Paipuri --- pkg/jobstats/db/db.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/jobstats/db/db.go b/pkg/jobstats/db/db.go index 4ba1c26d..7f346ba1 100644 --- a/pkg/jobstats/db/db.go +++ b/pkg/jobstats/db/db.go @@ -401,16 +401,21 @@ func (j *jobStatsDB) vacuumDB() error { // Delete old entries in DB func (j *jobStatsDB) deleteOldJobs(tx *sql.Tx) error { - deleteSQLCmd := fmt.Sprintf( + deleteRowQuery := fmt.Sprintf( "DELETE FROM %s WHERE Start <= date('now', '-%d day')", j.jobstatDBTable, int(j.retentionPeriod.Hours()/24), ) - _, err := tx.Exec(deleteSQLCmd) + _, err := tx.Exec(deleteRowQuery) if err != nil { level.Error(j.logger).Log("msg", "Failed to delete old jobs", "err", err) return err } + + // Get changes + var rowsDeleted int + _ = tx.QueryRow("SELECT changes();").Scan(&rowsDeleted) + level.Debug(j.logger).Log("msg", "Queried for changes after deletion", "rowsDeleted", rowsDeleted) return nil } From 5dede8cfa93e3ee488887f565a7fae2be9d6e62c Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 17:02:07 +0100 Subject: [PATCH 08/11] chore: Update get accounts endpoint Signed-off-by: Mahendra Paipuri --- pkg/jobstats/server/server.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/jobstats/server/server.go b/pkg/jobstats/server/server.go index 1e3ceb3a..1b417e3b 100644 --- a/pkg/jobstats/server/server.go +++ b/pkg/jobstats/server/server.go @@ -188,9 +188,9 @@ func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) // Get current user from header - user, _ := s.getUser(r) + loggedUser, dashboardUser := s.getUser(r) // If no user found, return empty response - if user == "" { + if loggedUser == "" { response = base.AccountsResponse{ Response: base.Response{ Status: "error", @@ -208,9 +208,9 @@ func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { } // Get all user accounts - accounts, err := s.Accounts(s.dbConfig.JobstatsDBTable, user, s.logger) + accounts, err := s.Accounts(s.dbConfig.JobstatsDBTable, dashboardUser, s.logger) if err != nil { - level.Error(s.logger).Log("msg", "Failed to fetch accounts", "user", user, "err", err) + level.Error(s.logger).Log("msg", "Failed to fetch accounts", "user", dashboardUser, "err", err) response = base.AccountsResponse{ Response: base.Response{ Status: "error", From 90e0bb42a3f51600394f6a45a01b119d05d5123f Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 18:59:27 +0100 Subject: [PATCH 09/11] refactor: Use UnixMilli for slurm job times * Grafana expects a millisecond epoch for to and from query params Signed-off-by: Mahendra Paipuri --- pkg/jobstats/helper/helper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/jobstats/helper/helper.go b/pkg/jobstats/helper/helper.go index 313c49bf..bf247259 100644 --- a/pkg/jobstats/helper/helper.go +++ b/pkg/jobstats/helper/helper.go @@ -127,7 +127,7 @@ func NodelistParser(nodelistExp string) []string { // Converts a date in a given layout to unix timestamp of the date func TimeToTimestamp(layout string, date string) int64 { if t, err := time.Parse(layout, date); err == nil { - return int64(t.Local().Unix()) + return int64(t.Local().UnixMilli()) } return 0 } From dd91e1ddef21bd6230c2f2bc5ae02511f215a6df Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 4 Jan 2024 19:10:10 +0100 Subject: [PATCH 10/11] test: Update tests and fixtures Signed-off-by: Mahendra Paipuri --- .../output/e2e-test-stats-server-admin-query-all.txt | 2 +- .../output/e2e-test-stats-server-admin-query.txt | 2 +- .../output/e2e-test-stats-server-jobid-query.txt | 2 +- .../output/e2e-test-stats-server-jobuuid-query.txt | 2 +- pkg/jobstats/schedulers/slurm_test.go | 12 ++++++------ 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt index 76a5b837..8acf5920 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query-all.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"},{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"14508","id":"88a46e84-ffce-52ea-37e9-47b39d9ccfb3","partition":"part1","qos":"qos1","account":"acc4","group":"grp4","gid":"1004","user":"usr4","uid":"1004","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1004","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr4"},{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"},{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"},{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622000","startts":"1676986627000","endts":"1676989589000","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"14508","id":"88a46e84-ffce-52ea-37e9-47b39d9ccfb3","partition":"part1","qos":"qos1","account":"acc4","group":"grp4","gid":"1004","user":"usr4","uid":"1004","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1004","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr4"},{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622000","startts":"1676986627000","endts":"1676989589000","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"},{"jobid":"11508","id":"d4956307-af17-870a-2fa0-38375105d257","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr15"},{"jobid":"81510","id":"938832b4-33b4-3303-b002-8150f737de7e","partition":"part1","qos":"qos1","account":"acc1","group":"grp15","gid":"1015","user":"usr15","uid":"1015","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1015","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr23"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt index dd44c114..0f7c4ca1 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-admin-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"147973","id":"d8b28c2c-2011-d572-de94-8ec4facb4a2a","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622000","startts":"1676986627000","endts":"1676989589000","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr3"},{"jobid":"1481510","id":"b76ecf69-4d2f-076b-047d-2bcc8503b4cb","partition":"part1","qos":"qos1","account":"acc3","group":"grp3","gid":"1003","user":"usr3","uid":"1003","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:00:17","exitcode":"0:0","state":"CANCELLED by 1003","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr3"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt index 7a62e12e..5c980037 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622","startts":"1676986627","endts":"1676989589","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1479763","id":"a04088e8-2699-2a9b-bc27-30282679ebb3","partition":"part1","qos":"qos1","account":"acc1","group":"grp8","gid":"1008","user":"usr8","uid":"1008","submit":"2023-02-21T14:37:02+0100","start":"2023-02-21T14:37:07+0100","end":"2023-02-21T15:26:29+0100","submitts":"1676986622000","startts":"1676986627000","endts":"1676989589000","elapsed":"00:49:22","exitcode":"0:0","state":"CANCELLED by 1008","nnodes":"1","ncpus":"8","nodelist":"compute-0","nodelistexp":"compute-0","jobname":"test_script1","workdir":"/home/usr8"}]} diff --git a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt index 709912a6..c16d6d04 100644 --- a/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt +++ b/pkg/jobstats/fixtures/output/e2e-test-stats-server-jobuuid-query.txt @@ -1 +1 @@ -{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900","startts":"1676990946","endts":"1676991443","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} +{"status":"success","errorType":"","error":"","warnings":null,"data":[{"jobid":"1481508","id":"baee651d-df44-af2c-fa09-50f5523b5e19","partition":"part1","qos":"qos1","account":"acc2","group":"grp2","gid":"1002","user":"usr2","uid":"1002","submit":"2023-02-21T15:48:20+0100","start":"2023-02-21T15:49:06+0100","end":"2023-02-21T15:57:23+0100","submitts":"1676990900000","startts":"1676990946000","endts":"1676991443000","elapsed":"00:08:17","exitcode":"0:0","state":"CANCELLED by 1002","nnodes":"2","ncpus":"16","nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","jobname":"test_script2","workdir":"/home/usr2"}]} diff --git a/pkg/jobstats/schedulers/slurm_test.go b/pkg/jobstats/schedulers/slurm_test.go index 8ffd6d28..47f40020 100644 --- a/pkg/jobstats/schedulers/slurm_test.go +++ b/pkg/jobstats/schedulers/slurm_test.go @@ -29,9 +29,9 @@ var ( Submit: "2023-02-21T14:37:02+0100", Start: "2023-02-21T14:37:07+0100", End: "2023-02-21T15:26:29+0100", - SubmitTS: "1676986622", - StartTS: "1676986627", - EndTS: "1676989589", + SubmitTS: "1676986622000", + StartTS: "1676986627000", + EndTS: "1676989589000", Elapsed: "00:49:22", Exitcode: "0:0", State: "CANCELLED by 302137", @@ -55,9 +55,9 @@ var ( Submit: "2023-02-21T15:48:20+0100", Start: "2023-02-21T15:49:06+0100", End: "2023-02-21T15:57:23+0100", - SubmitTS: "1676990900", - StartTS: "1676990946", - EndTS: "1676991443", + SubmitTS: "1676990900000", + StartTS: "1676990946000", + EndTS: "1676991443000", Elapsed: "00:08:17", Exitcode: "0:0", State: "CANCELLED by 302137", From 866f946dad8f6ad191217356df8ffb738219e51d Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Fri, 5 Jan 2024 08:47:57 +0100 Subject: [PATCH 11/11] fix: Add correct http status codes * We are always returning 200 for all responses * Set correct response codes based on actual response * Update tests Signed-off-by: Mahendra Paipuri --- cmd/batchjob_stats_server/main_test.go | 3 ++- pkg/jobstats/cli/cli_test.go | 6 +++--- pkg/jobstats/server/server.go | 11 +++++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cmd/batchjob_stats_server/main_test.go b/cmd/batchjob_stats_server/main_test.go index e16ab8d4..7e611ab8 100644 --- a/cmd/batchjob_stats_server/main_test.go +++ b/cmd/batchjob_stats_server/main_test.go @@ -35,7 +35,8 @@ func TestBatchjobStatsExecutable(t *testing.T) { } jobstats := exec.Command( - binary, "--path.data", tmpDir, "--slurm.sacct.path", tmpSacctPath, + binary, "--path.data", tmpDir, + "--slurm.sacct.path", tmpSacctPath, "--batch.scheduler.slurm", "--web.listen-address", address, ) diff --git a/pkg/jobstats/cli/cli_test.go b/pkg/jobstats/cli/cli_test.go index b60c7f67..e635a36b 100644 --- a/pkg/jobstats/cli/cli_test.go +++ b/pkg/jobstats/cli/cli_test.go @@ -9,8 +9,8 @@ import ( "time" ) -func queryExporter(address string) error { - resp, err := http.Get(fmt.Sprintf("http://%s/api/jobs", address)) +func queryServer(address string) error { + resp, err := http.Get(fmt.Sprintf("http://%s/api/health", address)) if err != nil { return err } @@ -42,7 +42,7 @@ func TestBatchStatsServerMain(t *testing.T) { // Query exporter for i := 0; i < 10; i++ { - if err := queryExporter("localhost:9020"); err == nil { + if err := queryServer("localhost:9020"); err == nil { break } time.Sleep(500 * time.Millisecond) diff --git a/pkg/jobstats/server/server.go b/pkg/jobstats/server/server.go index 1b417e3b..c5c31643 100644 --- a/pkg/jobstats/server/server.go +++ b/pkg/jobstats/server/server.go @@ -185,12 +185,12 @@ func (s *JobstatsServer) setHeaders(w http.ResponseWriter) { func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { var response base.AccountsResponse s.setHeaders(w) - w.WriteHeader(http.StatusOK) // Get current user from header loggedUser, dashboardUser := s.getUser(r) // If no user found, return empty response if loggedUser == "" { + w.WriteHeader(http.StatusUnauthorized) response = base.AccountsResponse{ Response: base.Response{ Status: "error", @@ -211,6 +211,7 @@ func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { accounts, err := s.Accounts(s.dbConfig.JobstatsDBTable, dashboardUser, s.logger) if err != nil { level.Error(s.logger).Log("msg", "Failed to fetch accounts", "user", dashboardUser, "err", err) + w.WriteHeader(http.StatusInternalServerError) response = base.AccountsResponse{ Response: base.Response{ Status: "error", @@ -228,6 +229,7 @@ func (s *JobstatsServer) accounts(w http.ResponseWriter, r *http.Request) { } // Write response + w.WriteHeader(http.StatusOK) response = base.AccountsResponse{ Response: base.Response{ Status: "success", @@ -264,7 +266,6 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { var fromTime, toTime time.Time var response base.JobsResponse s.setHeaders(w) - w.WriteHeader(http.StatusOK) // Initialise utility vars checkQueryWindow := true // Check query window size @@ -274,6 +275,7 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { loggedUser, dashboardUser := s.getUser(r) // If no user found, return empty response if loggedUser == "" { + w.WriteHeader(http.StatusUnauthorized) s.jobsErrorResponse("User Error", "No user identified", w) return } @@ -328,6 +330,7 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { // Return error response if from is not a timestamp if ts, err := strconv.Atoi(f); err != nil { level.Error(s.logger).Log("msg", "Failed to parse from timestamp", "from", f, "err", err) + w.WriteHeader(http.StatusBadRequest) s.jobsErrorResponse("Internal server error", "Malformed 'from' timestamp", w) return } else { @@ -341,6 +344,7 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { // Return error response if to is not a timestamp if ts, err := strconv.Atoi(t); err != nil { level.Error(s.logger).Log("msg", "Failed to parse to timestamp", "to", t, "err", err) + w.WriteHeader(http.StatusBadRequest) s.jobsErrorResponse("Internal server error", "Malformed 'to' timestamp", w) return } else { @@ -357,6 +361,7 @@ func (s *JobstatsServer) jobs(w http.ResponseWriter, r *http.Request) { "from", fromTime.Format(time.DateTime), "to", toTime.Format(time.DateTime), "queryWindow", toTime.Sub(fromTime).String(), ) + w.WriteHeader(http.StatusBadRequest) s.jobsErrorResponse("Internal server error", "Maximum query window exceeded", w) return } @@ -373,11 +378,13 @@ queryJobs: jobs, err := s.Jobs(q, s.logger) if err != nil { level.Error(s.logger).Log("msg", "Failed to fetch jobs", "loggedUser", loggedUser, "err", err) + w.WriteHeader(http.StatusInternalServerError) s.jobsErrorResponse("Internal server error", "Failed to fetch user jobs", w) return } // Write response + w.WriteHeader(http.StatusOK) response = base.JobsResponse{ Response: base.Response{ Status: "success",