From d6dedd6870ac16b9a6767525a95e1e4703184189 Mon Sep 17 00:00:00 2001 From: mahendrapaipuri Date: Tue, 28 Nov 2023 15:37:21 +0100 Subject: [PATCH 1/4] build: Update deps version Signed-off-by: mahendrapaipuri --- go.mod | 4 ++-- go.sum | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 32b5af40..01cd2dd5 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/mahendrapaipuri/batchjob_exporter +module github.com/mahendrapaipuri/batchjob_monitoring go 1.21 @@ -24,7 +24,7 @@ require ( github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/jpillora/backoff v1.0.0 // indirect - github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/kr/text v0.2.0 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect diff --git a/go.sum b/go.sum index 7e784980..0cb5d45c 100644 --- a/go.sum +++ b/go.sum @@ -34,8 +34,8 @@ github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= -github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= +github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -81,6 +81,7 @@ golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= From c3f09f282558c1a55f75da358921d36d474f053d Mon Sep 17 00:00:00 2001 From: mahendrapaipuri Date: Tue, 28 Nov 2023 16:28:13 +0100 Subject: [PATCH 2/4] refactor: Use pkg repo structure * Change package name to batchjob_monitoring * Make utils into a separate package * Add nodelistparser function to parse slurm nodelist expression * Add unit tests for nodelist parser Signed-off-by: mahendrapaipuri --- .gitignore | 7 +- .promu-cgo.yml | 2 +- .promu.yml | 2 +- Makefile | 14 +-- cmd/batchjob_exporter/batchjob_exporter.go | 2 +- {collector => pkg/collector}/collector.go | 0 {collector => pkg/collector}/emissions.go | 4 +- .../collector}/emissions_test.go | 0 .../fixtures/e2e-test-cgroupsv1-output.txt | 4 +- .../fixtures/e2e-test-cgroupsv2-output.txt | 4 +- .../GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 | 0 .../GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e | 0 .../collector}/fixtures/ipmi-dcmi-wrapper.sh | 0 .../collector}/fixtures/nvidia-smi | 0 .../collector}/fixtures/slurmjobstat/1009248 | 0 .../collector}/fixtures/sys.ttar | 0 {collector => pkg/collector}/helper.go | 16 +--- {collector => pkg/collector}/ipmi.go | 4 +- {collector => pkg/collector}/ipmi_test.go | 0 {collector => pkg/collector}/nvidia_gpus.go | 4 +- .../collector}/nvidia_gpus_test.go | 0 {collector => pkg/collector}/paths.go | 0 {collector => pkg/collector}/paths_test.go | 0 {collector => pkg/collector}/rapl.go | 0 {collector => pkg/collector}/rapl_test.go | 0 {collector => pkg/collector}/slurm.go | 6 +- {collector => pkg/collector}/slurm_test.go | 0 pkg/collector/types.go | 23 +++++ {collector => pkg/utils}/types.go | 24 +---- {collector => pkg/utils}/utils.go | 89 ++++++++++++++++++- pkg/utils/utils_test.go | 38 ++++++++ scripts/e2e-test.sh | 16 ++-- 32 files changed, 190 insertions(+), 69 deletions(-) rename {collector => pkg/collector}/collector.go (100%) rename {collector => pkg/collector}/emissions.go (98%) rename {collector => pkg/collector}/emissions_test.go (100%) rename {collector => pkg/collector}/fixtures/e2e-test-cgroupsv1-output.txt (97%) rename {collector => pkg/collector}/fixtures/e2e-test-cgroupsv2-output.txt (97%) rename {collector => pkg/collector}/fixtures/gpustat/GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 (100%) rename {collector => pkg/collector}/fixtures/gpustat/GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e (100%) rename {collector => pkg/collector}/fixtures/ipmi-dcmi-wrapper.sh (100%) rename {collector => pkg/collector}/fixtures/nvidia-smi (100%) rename {collector => pkg/collector}/fixtures/slurmjobstat/1009248 (100%) rename {collector => pkg/collector}/fixtures/sys.ttar (100%) rename {collector => pkg/collector}/helper.go (70%) rename {collector => pkg/collector}/ipmi.go (95%) rename {collector => pkg/collector}/ipmi_test.go (100%) rename {collector => pkg/collector}/nvidia_gpus.go (96%) rename {collector => pkg/collector}/nvidia_gpus_test.go (100%) rename {collector => pkg/collector}/paths.go (100%) rename {collector => pkg/collector}/paths_test.go (100%) rename {collector => pkg/collector}/rapl.go (100%) rename {collector => pkg/collector}/rapl_test.go (100%) rename {collector => pkg/collector}/slurm.go (98%) rename {collector => pkg/collector}/slurm_test.go (100%) create mode 100644 pkg/collector/types.go rename {collector => pkg/utils}/types.go (81%) rename {collector => pkg/utils}/utils.go (50%) create mode 100644 pkg/utils/utils_test.go diff --git a/.gitignore b/.gitignore index 07d67793..ed672d91 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,7 @@ vendor/ go.work # Ignore fixtures -collector/fixtures/sys +pkg/collector/fixtures/sys # Ignore scripts run.sh @@ -34,3 +34,8 @@ run.sh # Ignore binaries /bin + +# Ignore test files +*.db +*.prof +lasttimestamp \ No newline at end of file diff --git a/.promu-cgo.yml b/.promu-cgo.yml index a30b5a12..7ee8fcc7 100644 --- a/.promu-cgo.yml +++ b/.promu-cgo.yml @@ -4,7 +4,7 @@ go: version: 1.21 cgo: true repository: - path: github.com/mahendrapaipuri/batchjob_exporter + path: github.com/mahendrapaipuri/batchjob_monitoring build: binaries: - name: batchjob_exporter diff --git a/.promu.yml b/.promu.yml index a0e9736a..d00a163b 100644 --- a/.promu.yml +++ b/.promu.yml @@ -3,7 +3,7 @@ go: # .promu-cgo.yml should also be updated. version: 1.21 repository: - path: github.com/mahendrapaipuri/batchjob_exporter + path: github.com/mahendrapaipuri/batchjob_monitoring build: binaries: - name: batchjob_exporter diff --git a/Makefile b/Makefile index 9005a937..b6e5c807 100644 --- a/Makefile +++ b/Makefile @@ -59,8 +59,8 @@ endif PROMU := $(FIRST_GOPATH)/bin/promu --config $(PROMU_CONF) -e2e-cgroupsv2-out = collector/fixtures/e2e-test-cgroupsv2-output.txt -e2e-cgroupsv1-out = collector/fixtures/e2e-test-cgroupsv1-output.txt +e2e-cgroupsv2-out = pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt +e2e-cgroupsv1-out = pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt ifeq ($(CGROUPS_MODE), unified) e2e-out = $(e2e-cgroupsv2-out) @@ -87,12 +87,12 @@ $(eval $(call goarch_pair,mips64el,mipsel)) all:: vet checkmetrics checkrules common-all $(cross-test) $(test-docker) $(test-e2e) .PHONY: test -test: collector/fixtures/sys/.unpacked +test: pkg/collector/fixtures/sys/.unpacked @echo ">> running tests" $(GO) test -short $(test-flags) $(pkgs) .PHONY: test-32bit -test-32bit: collector/fixtures/sys/.unpacked +test-32bit: pkg/collector/fixtures/sys/.unpacked @echo ">> running tests in 32-bit mode" @env GOARCH=$(GOARCH_CROSS) $(GO) test $(pkgs) @@ -107,11 +107,11 @@ skip-test-32bit: touch $@ update_fixtures: - rm -vf collector/fixtures/sys/.unpacked - ./scripts/ttar -C collector/fixtures -c -f collector/fixtures/sys.ttar sys + rm -vf pkg/collector/fixtures/sys/.unpacked + ./scripts/ttar -C pkg/collector/fixtures -c -f pkg/collector/fixtures/sys.ttar sys .PHONY: test-e2e -test-e2e: build collector/fixtures/sys/.unpacked +test-e2e: build pkg/collector/fixtures/sys/.unpacked @echo ">> running end-to-end tests" ./scripts/e2e-test.sh diff --git a/cmd/batchjob_exporter/batchjob_exporter.go b/cmd/batchjob_exporter/batchjob_exporter.go index bffa7934..64e78d9b 100644 --- a/cmd/batchjob_exporter/batchjob_exporter.go +++ b/cmd/batchjob_exporter/batchjob_exporter.go @@ -18,7 +18,7 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/mahendrapaipuri/batchjob_exporter/collector" + "github.com/mahendrapaipuri/batchjob_monitoring/pkg/collector" "github.com/prometheus/client_golang/prometheus" promcollectors "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" diff --git a/collector/collector.go b/pkg/collector/collector.go similarity index 100% rename from collector/collector.go rename to pkg/collector/collector.go diff --git a/collector/emissions.go b/pkg/collector/emissions.go similarity index 98% rename from collector/emissions.go rename to pkg/collector/emissions.go index 870f896b..857852a4 100644 --- a/collector/emissions.go +++ b/pkg/collector/emissions.go @@ -13,6 +13,8 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + + utils "github.com/mahendrapaipuri/batchjob_monitoring/pkg/utils" ) const emissionsCollectorSubsystem = "emissions" @@ -30,7 +32,7 @@ var ( countryCode = kingpin.Flag("collector.emissions.country.code", "ISO 3166-1 alpha-3 Country code. OWID energy data [https://github.com/owid/energy-data] estimated constant emission factor is used for all countries except for France. A real time emission factor will be used for France from RTE eCO2 mix [https://www.rte-france.com/en/eco2mix/co2-emissions] data.").Default("FRA").String() globalEnergyMixDataUrl = "https://raw.githubusercontent.com/mlco2/codecarbon/master/codecarbon/data/private_infra/global_energy_mix.json" globalEmissionFactor = 475 - getRteEnergyMixData = GetRteEnergyMixData + getRteEnergyMixData = utils.GetRteEnergyMixData ) func init() { diff --git a/collector/emissions_test.go b/pkg/collector/emissions_test.go similarity index 100% rename from collector/emissions_test.go rename to pkg/collector/emissions_test.go diff --git a/collector/fixtures/e2e-test-cgroupsv1-output.txt b/pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt similarity index 97% rename from collector/fixtures/e2e-test-cgroupsv1-output.txt rename to pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt index 6f523ccd..aff897d9 100644 --- a/collector/fixtures/e2e-test-cgroupsv1-output.txt +++ b/pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt @@ -45,8 +45,8 @@ batchjob_nvidia_gpu_jobid{uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000 batchjob_nvidia_gpu_jobid{uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000 # HELP batchjob_rapl_package_joules_total Current RAPL package value in joules # TYPE batchjob_rapl_package_joules_total counter -batchjob_rapl_package_joules_total{index="0",path="collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244 -batchjob_rapl_package_joules_total{index="1",path="collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826 +batchjob_rapl_package_joules_total{index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244 +batchjob_rapl_package_joules_total{index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826 # HELP batchjob_scrape_collector_duration_seconds batchjob_exporter: Duration of a collector scrape. # TYPE batchjob_scrape_collector_duration_seconds gauge # HELP batchjob_scrape_collector_success batchjob_exporter: Whether a collector succeeded. diff --git a/collector/fixtures/e2e-test-cgroupsv2-output.txt b/pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt similarity index 97% rename from collector/fixtures/e2e-test-cgroupsv2-output.txt rename to pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt index 07399ca0..602c252c 100644 --- a/collector/fixtures/e2e-test-cgroupsv2-output.txt +++ b/pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt @@ -45,8 +45,8 @@ batchjob_nvidia_gpu_jobid{uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000 batchjob_nvidia_gpu_jobid{uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000 # HELP batchjob_rapl_package_joules_total Current RAPL package value in joules # TYPE batchjob_rapl_package_joules_total counter -batchjob_rapl_package_joules_total{index="0",path="collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244 -batchjob_rapl_package_joules_total{index="1",path="collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826 +batchjob_rapl_package_joules_total{index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244 +batchjob_rapl_package_joules_total{index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826 # HELP batchjob_scrape_collector_duration_seconds batchjob_exporter: Duration of a collector scrape. # TYPE batchjob_scrape_collector_duration_seconds gauge # HELP batchjob_scrape_collector_success batchjob_exporter: Whether a collector succeeded. diff --git a/collector/fixtures/gpustat/GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 b/pkg/collector/fixtures/gpustat/GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 similarity index 100% rename from collector/fixtures/gpustat/GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 rename to pkg/collector/fixtures/gpustat/GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3 diff --git a/collector/fixtures/gpustat/GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e b/pkg/collector/fixtures/gpustat/GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e similarity index 100% rename from collector/fixtures/gpustat/GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e rename to pkg/collector/fixtures/gpustat/GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e diff --git a/collector/fixtures/ipmi-dcmi-wrapper.sh b/pkg/collector/fixtures/ipmi-dcmi-wrapper.sh similarity index 100% rename from collector/fixtures/ipmi-dcmi-wrapper.sh rename to pkg/collector/fixtures/ipmi-dcmi-wrapper.sh diff --git a/collector/fixtures/nvidia-smi b/pkg/collector/fixtures/nvidia-smi similarity index 100% rename from collector/fixtures/nvidia-smi rename to pkg/collector/fixtures/nvidia-smi diff --git a/collector/fixtures/slurmjobstat/1009248 b/pkg/collector/fixtures/slurmjobstat/1009248 similarity index 100% rename from collector/fixtures/slurmjobstat/1009248 rename to pkg/collector/fixtures/slurmjobstat/1009248 diff --git a/collector/fixtures/sys.ttar b/pkg/collector/fixtures/sys.ttar similarity index 100% rename from collector/fixtures/sys.ttar rename to pkg/collector/fixtures/sys.ttar diff --git a/collector/helper.go b/pkg/collector/helper.go similarity index 70% rename from collector/helper.go rename to pkg/collector/helper.go index 1b601591..6625bb59 100644 --- a/collector/helper.go +++ b/pkg/collector/helper.go @@ -1,13 +1,9 @@ package collector import ( - "fmt" "os" - "os/exec" - "regexp" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "regexp" ) var ( @@ -37,13 +33,3 @@ func fileExists(filename string) bool { func SanitizeMetricName(metricName string) string { return metricNameRegex.ReplaceAllString(metricName, "_") } - -// Execute command and return stdout/stderr -func Execute(cmd string, args []string, logger log.Logger) ([]byte, error) { - level.Debug(logger).Log("msg", "Executing", "command", cmd, "args", fmt.Sprintf("%+v", args)) - out, err := exec.Command(cmd, args...).CombinedOutput() - if err != nil { - err = fmt.Errorf("error running %s: %s", cmd, err) - } - return out, err -} diff --git a/collector/ipmi.go b/pkg/collector/ipmi.go similarity index 95% rename from collector/ipmi.go rename to pkg/collector/ipmi.go index c33aaf59..3ef4ba29 100644 --- a/collector/ipmi.go +++ b/pkg/collector/ipmi.go @@ -16,6 +16,8 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + + utils "github.com/mahendrapaipuri/batchjob_monitoring/pkg/utils" ) const ipmiCollectorSubsystem = "ipmi_dcmi" @@ -71,7 +73,7 @@ func getValue(ipmiOutput []byte, regex *regexp.Regexp) (string, error) { // Update implements Collector and exposes IPMI DCMI power related metrics. func (c *impiCollector) Update(ch chan<- prometheus.Metric) error { args := []string{""} - stdOut, err := Execute(*ipmiDcmiWrapperExec, args, c.logger) + stdOut, err := utils.Execute(*ipmiDcmiWrapperExec, args, c.logger) if err != nil { return err } diff --git a/collector/ipmi_test.go b/pkg/collector/ipmi_test.go similarity index 100% rename from collector/ipmi_test.go rename to pkg/collector/ipmi_test.go diff --git a/collector/nvidia_gpus.go b/pkg/collector/nvidia_gpus.go similarity index 96% rename from collector/nvidia_gpus.go rename to pkg/collector/nvidia_gpus.go index 7892b6e5..65eb513e 100644 --- a/collector/nvidia_gpus.go +++ b/pkg/collector/nvidia_gpus.go @@ -12,6 +12,8 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + + utils "github.com/mahendrapaipuri/batchjob_monitoring/pkg/utils" ) const nvidiaGpuJobMapCollectorSubsystem = "nvidia_gpu" @@ -50,7 +52,7 @@ func init() { // NOTE: Hoping this command returns MIG devices too func getAllDevices(logger log.Logger) ([]Device, error) { args := []string{"--query-gpu=name,uuid", "--format=csv"} - nvidiaSmiOutput, err := Execute("nvidia-smi", args, logger) + nvidiaSmiOutput, err := utils.Execute("nvidia-smi", args, logger) if err != nil { level.Error(logger).Log("msg", "nvidia-smi command to get list of devices failed", "err", err) return nil, err diff --git a/collector/nvidia_gpus_test.go b/pkg/collector/nvidia_gpus_test.go similarity index 100% rename from collector/nvidia_gpus_test.go rename to pkg/collector/nvidia_gpus_test.go diff --git a/collector/paths.go b/pkg/collector/paths.go similarity index 100% rename from collector/paths.go rename to pkg/collector/paths.go diff --git a/collector/paths_test.go b/pkg/collector/paths_test.go similarity index 100% rename from collector/paths_test.go rename to pkg/collector/paths_test.go diff --git a/collector/rapl.go b/pkg/collector/rapl.go similarity index 100% rename from collector/rapl.go rename to pkg/collector/rapl.go diff --git a/collector/rapl_test.go b/pkg/collector/rapl_test.go similarity index 100% rename from collector/rapl_test.go rename to pkg/collector/rapl_test.go diff --git a/collector/slurm.go b/pkg/collector/slurm.go similarity index 98% rename from collector/slurm.go rename to pkg/collector/slurm.go index 0a8a4a54..cac55953 100644 --- a/collector/slurm.go +++ b/pkg/collector/slurm.go @@ -18,6 +18,8 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + + utils "github.com/mahendrapaipuri/batchjob_monitoring/pkg/utils" ) const slurmCollectorSubsystem = "slurm_job" @@ -311,7 +313,7 @@ func (c *slurmCollector) getJobLabels(jobid string) (string, string, string) { } else { fmt.Sscanf(string(content), "%s %s %s %s", &jobUid, &jobGid, &jobNodes, &jobWorkDir) } - jobUuid, err = GetUuidFromString([]string{jobid, jobUid, jobGid, jobNodes, jobWorkDir}) + jobUuid, err = utils.GetUuidFromString([]string{jobid, jobUid, jobGid, jobNodes, jobWorkDir}) if err != nil { level.Error(c.logger).Log("msg", "Failed to generate UUID for job", "jobid", jobid, "err", err) jobUuid = jobid @@ -433,7 +435,7 @@ func (c *slurmCollector) getCgroupsV2Metrics(name string) (CgroupMetric, error) level.Debug(c.logger).Log("msg", "Loading cgroup v2", "path", name) // Files to parse out of the cgroup controllers := []string{"cpu.stat", "memory.current", "memory.events", "memory.max", "memory.stat"} - data, err := LoadCgroupsV2Metrics(name, controllers) + data, err := utils.LoadCgroupsV2Metrics(name, *cgroupfsPath, controllers) if err != nil { level.Error(c.logger).Log("msg", "Failed to load cgroups", "path", name, "err", err) metric.err = true diff --git a/collector/slurm_test.go b/pkg/collector/slurm_test.go similarity index 100% rename from collector/slurm_test.go rename to pkg/collector/slurm_test.go diff --git a/pkg/collector/types.go b/pkg/collector/types.go new file mode 100644 index 00000000..596264fb --- /dev/null +++ b/pkg/collector/types.go @@ -0,0 +1,23 @@ +package collector + +type energyMixDataFields struct { + BioFuel float64 `json:"biofuel_TWh"` + CarbonIntensity float64 `json:"carbon_intensity"` + Coal float64 `json:"coal_TWh"` + CountryName string `json:"country_name"` + Fossil float64 `json:"fossil_TWh"` + Gas float64 `json:"gas_TWh"` + Hydro float64 `json:"hydroelectricity_TWh"` + IsoCode string `json:"iso_code"` + LowCarbon float64 `json:"low_carbon_TWh"` + Nuclear float64 `json:"nuclear_TWh"` + Oil float64 `json:"oil_TWh"` + OtherRenewable float64 `json:"other_renewable_TWh"` + OtherRenewableExcluBioFuel float64 `json:"other_renewable_exc_biofuel_TWh"` + PerCapita float64 `json:"per_capita_Wh"` + Renewables float64 `json:"renewables_TWh"` + Solar float64 `json:"solar_TWh"` + Total float64 `json:"total_TWh"` + Wind float64 `json:"wind_TWh"` + Year int64 `json:"year"` +} diff --git a/collector/types.go b/pkg/utils/types.go similarity index 81% rename from collector/types.go rename to pkg/utils/types.go index 8b54d5e7..3d51a830 100644 --- a/collector/types.go +++ b/pkg/utils/types.go @@ -1,4 +1,4 @@ -package collector +package utils // Nicked from https://github.com/nmasse-itix/ego2mix type nationalRealTimeFields struct { @@ -78,25 +78,3 @@ type nationalRealTimeResponse struct { } `json:"parameters"` Records []nationalRealTimeRecord `json:"records"` } - -type energyMixDataFields struct { - BioFuel float64 `json:"biofuel_TWh"` - CarbonIntensity float64 `json:"carbon_intensity"` - Coal float64 `json:"coal_TWh"` - CountryName string `json:"country_name"` - Fossil float64 `json:"fossil_TWh"` - Gas float64 `json:"gas_TWh"` - Hydro float64 `json:"hydroelectricity_TWh"` - IsoCode string `json:"iso_code"` - LowCarbon float64 `json:"low_carbon_TWh"` - Nuclear float64 `json:"nuclear_TWh"` - Oil float64 `json:"oil_TWh"` - OtherRenewable float64 `json:"other_renewable_TWh"` - OtherRenewableExcluBioFuel float64 `json:"other_renewable_exc_biofuel_TWh"` - PerCapita float64 `json:"per_capita_Wh"` - Renewables float64 `json:"renewables_TWh"` - Solar float64 `json:"solar_TWh"` - Total float64 `json:"total_TWh"` - Wind float64 `json:"wind_TWh"` - Year int64 `json:"year"` -} diff --git a/collector/utils.go b/pkg/utils/utils.go similarity index 50% rename from collector/utils.go rename to pkg/utils/utils.go index 37849817..3288c01d 100644 --- a/collector/utils.go +++ b/pkg/utils/utils.go @@ -1,4 +1,7 @@ -package collector +//go:build !utils +// +build !utils + +package utils import ( "encoding/json" @@ -7,11 +10,16 @@ import ( "net/http" "net/url" "os" + "os/exec" "path/filepath" + "reflect" + "regexp" "strconv" "strings" "time" + "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/google/uuid" "github.com/zeebo/xxh3" ) @@ -21,12 +29,87 @@ const ( OPENDATASOFT_API_BASEURL = `https://odre.opendatasoft.com` ) +var ( + NODENAME_REGEXP = regexp.MustCompile(`(\[\d+\-\d+\])`) +) + +// Execute command and return stdout/stderr +func Execute(cmd string, args []string, logger log.Logger) ([]byte, error) { + level.Debug(logger).Log("msg", "Executing", "command", cmd, "args", fmt.Sprintf("%+v", args)) + out, err := exec.Command(cmd, args...).CombinedOutput() + if err != nil { + err = fmt.Errorf("error running %s: %s", cmd, err) + } + return out, err +} + +// Get all fields in a given struct +func GetStructFieldName(Struct interface{}) []string { + var fields []string + + v := reflect.ValueOf(Struct) + typeOfS := v.Type() + + for i := 0; i < v.NumField(); i++ { + fields = append(fields, typeOfS.Field(i).Name) + } + return fields +} + +// Get all values in a given struct +func GetStructFieldValue(Struct interface{}) []interface{} { + v := reflect.ValueOf(Struct) + values := make([]interface{}, v.NumField()) + + for i := 0; i < v.NumField(); i++ { + f := v.Field(i) + values = append(values, f.Interface()) + } + return values +} + +// Expand SLURM NODELIST into slice of nodenames +func NodelistParser(nodelistExp string) []string { + var nodeNames []string + // First split by , to get individual nodes + for _, nodeexp := range strings.Split(nodelistExp, ",") { + // If it contains "[", it means they are range of nodes + if strings.Contains(nodeexp, "[") { + matches := NODENAME_REGEXP.FindAllString(nodeexp, -1) + if len(matches) == 0 { + continue + } + // Get only first match as we use recursion + for _, match := range matches[0:1] { + matchSansBrackets := match[1 : len(match)-1] + startIdx, err := strconv.Atoi(strings.Split(matchSansBrackets, "-")[0]) + if err != nil { + continue + } + endIdx, err := strconv.Atoi(strings.Split(matchSansBrackets, "-")[1]) + if err != nil { + continue + } + for i := startIdx; i <= endIdx; i++ { + nodename := strings.Replace(nodeexp, match, strconv.Itoa(i), -1) + // Add them to slice and call function again + nodeNames = append(nodeNames, NodelistParser(nodename)...) + } + } + + } else { + nodeNames = append(nodeNames, regexp.QuoteMeta(nodeexp)) + } + } + return nodeNames +} + // Load cgroups v2 metrics from a given path -func LoadCgroupsV2Metrics(name string, controllers []string) (map[string]float64, error) { +func LoadCgroupsV2Metrics(name string, cgroupfsPath string, controllers []string) (map[string]float64, error) { data := make(map[string]float64) for _, fName := range controllers { - contents, err := os.ReadFile(filepath.Join(*cgroupfsPath, name, fName)) + contents, err := os.ReadFile(filepath.Join(cgroupfsPath, name, fName)) if err != nil { return data, err } diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go new file mode 100644 index 00000000..5633fcd7 --- /dev/null +++ b/pkg/utils/utils_test.go @@ -0,0 +1,38 @@ +//go:build !utils +// +build !utils + +package utils + +import ( + "reflect" + "testing" +) + +type nodelistParserTest struct { + nodelist string; + expected []string; +} + +var nodelistParserTests = []nodelistParserTest{ + { + "compute-a-[0-1]", []string{"compute-a-0", "compute-a-1"}, + }, + { + "compute-a-[0-1]-b-[3-4]", + []string{"compute-a-0-b-3", "compute-a-0-b-4", "compute-a-1-b-3", "compute-a-1-b-4"}, + }, + { + "compute-a-[0-1]-b-[3-4],compute-c,compute-d", + []string{"compute-a-0-b-3", "compute-a-0-b-4", + "compute-a-1-b-3", "compute-a-1-b-4", "compute-c", "compute-d"}, + }, +} + + +func TestNodelistParser(t *testing.T) { + for _, test := range nodelistParserTests{ + if output := NodelistParser(test.nodelist); !reflect.DeepEqual(output, test.expected) { + t.Errorf("Expected %q not equal to output %q", test.expected, output) + } + } +} diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index fdc46bb6..f97eba04 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -16,8 +16,8 @@ cgroups_mode=$([ $(stat -fc %T /sys/fs/cgroup/) = "cgroup2fs" ] && echo "unified echo "cgroups mode detected is ${cgroups_mode}" case "${cgroups_mode}" in - legacy|hybrid) fixture='collector/fixtures/e2e-test-cgroupsv1-output.txt' ;; - *) fixture='collector/fixtures/e2e-test-cgroupsv2-output.txt' ;; + legacy|hybrid) fixture='pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt' ;; + *) fixture='pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt' ;; esac keep=0; update=0; verbose=0 @@ -50,14 +50,14 @@ then exit 1 fi -PATH=$PWD/collector/fixtures:$PATH ./bin/batchjob_exporter \ - --path.sysfs="collector/fixtures/sys" \ - --path.cgroupfs="collector/fixtures/sys/fs/cgroup" \ +PATH=$PWD/pkg/collector/fixtures:$PATH ./bin/batchjob_exporter \ + --path.sysfs="pkg/collector/fixtures/sys" \ + --path.cgroupfs="pkg/collector/fixtures/sys/fs/cgroup" \ --collector.slurm.unique.jobid \ - --collector.slurm.job.stat.path="collector/fixtures/slurmjobstat" \ - --collector.ipmi.dcmi.wrapper.path="collector/fixtures/ipmi-dcmi-wrapper.sh" \ + --collector.slurm.job.stat.path="pkg/collector/fixtures/slurmjobstat" \ + --collector.ipmi.dcmi.wrapper.path="pkg/collector/fixtures/ipmi-dcmi-wrapper.sh" \ --collector.nvidia_gpu \ - --collector.nvidia.gpu.stat.path="collector/fixtures/gpustat" \ + --collector.nvidia.gpu.stat.path="pkg/collector/fixtures/gpustat" \ --web.listen-address "127.0.0.1:${port}" \ --log.level="debug" > "${tmpdir}/batchjob_exporter.log" 2>&1 & From 6938894ef75d47522e3e054328d12108d95abaaf Mon Sep 17 00:00:00 2001 From: mahendrapaipuri Date: Tue, 28 Nov 2023 16:30:52 +0100 Subject: [PATCH 3/4] style: Formatted with gofmt Signed-off-by: mahendrapaipuri --- pkg/utils/utils_test.go | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index 5633fcd7..7f279634 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -9,30 +9,29 @@ import ( ) type nodelistParserTest struct { - nodelist string; - expected []string; + nodelist string + expected []string } var nodelistParserTests = []nodelistParserTest{ - { + { "compute-a-[0-1]", []string{"compute-a-0", "compute-a-1"}, }, - { - "compute-a-[0-1]-b-[3-4]", + { + "compute-a-[0-1]-b-[3-4]", []string{"compute-a-0-b-3", "compute-a-0-b-4", "compute-a-1-b-3", "compute-a-1-b-4"}, }, - { - "compute-a-[0-1]-b-[3-4],compute-c,compute-d", - []string{"compute-a-0-b-3", "compute-a-0-b-4", - "compute-a-1-b-3", "compute-a-1-b-4", "compute-c", "compute-d"}, + { + "compute-a-[0-1]-b-[3-4],compute-c,compute-d", + []string{"compute-a-0-b-3", "compute-a-0-b-4", + "compute-a-1-b-3", "compute-a-1-b-4", "compute-c", "compute-d"}, }, } - func TestNodelistParser(t *testing.T) { - for _, test := range nodelistParserTests{ - if output := NodelistParser(test.nodelist); !reflect.DeepEqual(output, test.expected) { - t.Errorf("Expected %q not equal to output %q", test.expected, output) - } - } + for _, test := range nodelistParserTests { + if output := NodelistParser(test.nodelist); !reflect.DeepEqual(output, test.expected) { + t.Errorf("Expected %q not equal to output %q", test.expected, output) + } + } } From 63d017c38b8a5df7cd51c5906b898327e8453d4e Mon Sep 17 00:00:00 2001 From: mahendrapaipuri Date: Tue, 28 Nov 2023 16:32:28 +0100 Subject: [PATCH 4/4] refactor: Use camelCase for vars Signed-off-by: mahendrapaipuri --- pkg/utils/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 3288c01d..350ff065 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -30,7 +30,7 @@ const ( ) var ( - NODENAME_REGEXP = regexp.MustCompile(`(\[\d+\-\d+\])`) + nodelistRegExp = regexp.MustCompile(`(\[\d+\-\d+\])`) ) // Execute command and return stdout/stderr @@ -75,7 +75,7 @@ func NodelistParser(nodelistExp string) []string { for _, nodeexp := range strings.Split(nodelistExp, ",") { // If it contains "[", it means they are range of nodes if strings.Contains(nodeexp, "[") { - matches := NODENAME_REGEXP.FindAllString(nodeexp, -1) + matches := nodelistRegExp.FindAllString(nodeexp, -1) if len(matches) == 0 { continue }