diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 95ef9ed..426c9f4 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -14,13 +14,6 @@ builds: nfpms: - vendor: rivosinc maintainer: abhinavDhulipala - contents: - - src: exporter/templates - dst: /usr/share/prometheus-slurm-exporter/templates - file_info: - mode: 0644 - group: root - owner: root formats: - apk - deb @@ -29,6 +22,3 @@ nfpms: - archlinux archives: - name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}' - files: - - src: ./exporter/templates/* - dst: templates diff --git a/exporter/templates/proc_traces.html b/exporter/templates/proc_traces.html deleted file mode 100644 index 0c98efc..0000000 --- a/exporter/templates/proc_traces.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - - {{ range . }} - - - - - - - - - - - {{ end }} -
Job Id Process Id Cpu % I/O Wait Memory Usage Username Hostname
{{ .JobId }} {{ .Pid}} {{ .Cpus }} {{ .WriteBytes }} {{ .ReadBytes }} {{ .Mem }} {{ .Username }} {{ .Hostname }}
- - diff --git a/exporter/trace.go b/exporter/trace.go index 2f07e8c..8faf667 100644 --- a/exporter/trace.go +++ b/exporter/trace.go @@ -8,7 +8,6 @@ import ( "encoding/json" "errors" "fmt" - "log" "net/http" "os" "path/filepath" @@ -20,10 +19,39 @@ import ( "log/slog" ) -// cleanup on add if greater than this threshold const ( - cleanupThreshold uint64 = 1_000 - templateDirName string = "templates" + // cleanup on add if greater than this threshold + cleanupThreshold uint64 = 1_000 + templateDirName string = "templates" + proctraceTemplate string = ` + + + + + + + + + + + + + {{ range . }} + + + + + + + + + + + {{ end }} +
Job Id Process Id Cpu % I/O Wait Memory Usage Username Hostname
{{ .JobId }} {{ .Pid}} {{ .Cpus }} {{ .WriteBytes }} {{ .ReadBytes }} {{ .Mem }} {{ .Username }} {{ .Hostname }}
+ + +` ) // store a jobs published proc stats @@ -94,7 +122,6 @@ type TraceCollector struct { ProcessFetcher *AtomicProcFetcher squeueFetcher SlurmMetricFetcher[JobMetric] fallback bool - templatesDir string // actual proc monitoring jobAllocMem *prometheus.Desc jobAllocCpus *prometheus.Desc @@ -108,16 +135,10 @@ type TraceCollector struct { func NewTraceCollector(config *Config) *TraceCollector { traceConfig := config.TraceConf - traceDir := detectTraceTemplatePath() - if traceDir == "" { - log.Fatal("no template found") - } - slog.Debug("using trace template path: " + traceDir) return &TraceCollector{ ProcessFetcher: NewAtomicProFetcher(traceConfig.rate), squeueFetcher: traceConfig.sharedFetcher, fallback: config.cliOpts.fallback, - templatesDir: traceDir, // add for job id correlation jobAllocMem: prometheus.NewDesc("slurm_job_mem_alloc", "running job mem allocated", []string{"jobid"}, nil), jobAllocCpus: prometheus.NewDesc("slurm_job_cpu_alloc", "running job cpus allocated", []string{"jobid"}, nil), @@ -178,7 +199,8 @@ func (c *TraceCollector) uploadTrace(w http.ResponseWriter, r *http.Request) { } } if r.Method == http.MethodGet { - tmpl := template.Must(template.ParseFiles(filepath.Join(c.templatesDir, "proc_traces.html"))) + + tmpl := template.Must(template.New("proc_traces").Parse(proctraceTemplate)) procs := c.ProcessFetcher.Fetch() traces := make([]TraceInfo, 0, len(procs)) for _, info := range procs {