Skip to content

Commit

Permalink
[ci] apt & rpm distribution packages (#97)
Browse files Browse the repository at this point in the history
* Testing goreleaser

* Adding packaging pieces

* Testing goreleaser

Adding packaging pieces

* Adding license bits

* Fixing newline

* Fixing whitespace

* Oops forgot conflict

* Adding trace templates to distro package

* Fixing spacing

* Testing goreleaser and re-adding license

* Fixing whitespace

* [ci] archive static templates with binary (#95)

* archive static templates with binary

* update docs

* Adding in trace path detection logic

* Testing goreleaser

Adding packaging pieces

Testing goreleaser and re-adding license

Fixing whitespace

Adding in trace path detection logic

Testing goreleaser

Adding license bits

Oops forgot conflict

Adding trace templates to distro package

Fixing spacing

* trace logic

* Update .goreleaser.yaml

Fixing bad copy/paste, oops!

Co-authored-by: abhinavDhulipala <[email protected]>

* Removing systemd files for dist packages

* Update .goreleaser.yaml

Fixing bad copy/paste, oops!

Co-authored-by: abhinavDhulipala <[email protected]>

make systemctl edit copy/pastable

---------

Co-authored-by: abhinavDhulipala <[email protected]>
  • Loading branch information
drewstinnett and abhinavDhulipala authored Sep 10, 2024
1 parent 3afae44 commit e285f3e
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 19 deletions.
18 changes: 16 additions & 2 deletions .goreleaser.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# SPDX-FileCopyrightText: 2023 Rivos Inc.
#
# SPDX-License-Identifier: Apache-2.0

version: 2
builds:
- goos:
Expand All @@ -10,7 +9,22 @@ builds:
- amd64
- arm
- arm64

nfpms:
- vendor: rivosinc
maintainer: abhinavDhulipala
contents:
- src: exporter/templates
dst: /usr/share/prometheus-slurm-exporter/templates
file_info:
mode: 0644
group: root
owner: root
formats:
- apk
- deb
- rpm
- termux.deb
- archlinux
archives:
- name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}'
files:
Expand Down
90 changes: 90 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,96 @@ Env vars can be sepcified in a `.env` file, while using the `just`
| CLI_TIMEOUT | 10. | # seconds before the exporter terminates command. |
| TRACE_ROOT_PATH | "cwd" | path to ./templates directory where html files are located |

### RPM/DEB Packages

You can download RPM or DEB versions from the [Releases](releases/) tab. The
packages are configured to use systemd to start and stop the service.

Configuring the systemd service

`$ systemctl edit prometheus-slurm-exporter.service`

```text
### Editing /etc/systemd/system/prometheus-slurm-exporter.service.d/override.conf
### Anything between here and the comment below will become the new contents of the file
[Service]
Environment="PATH=/opt/slurm/bin"
Environment="POLL_INTERVAL=300"
Environment="CLI_TIMEOUT=60"
Environment="LOGLEVEL=debug"
### Lines below this comment will be discarded
### /usr/lib/systemd/system/prometheus-slurm-exporter.service
# [Unit]
# Description=Prometheus SLURM Exporter
#
# [Service]
# ExecStart=/usr/bin/prometheus-slurm-exporter
# Restart=always
# RestartSec=15
#
# [Install]
# WantedBy=multi-user.target
```

### RPM/DEB Packages

You can download RPM or DEB versions from the [Releases](releases/) tab. The
packages are configured to use systemd to start and stop the service.

### Running with Systemd

You can install a systemd service definition using the following command:

```bash
sudo bash -c 'cat << EOF > /etc/systemd/system/prometheus-slurm-exporter.service
[Unit]
Description=Prometheus SLURM Exporter
[Service]
ExecStart=/usr/bin/prometheus-slurm-exporter
Restart=always
RestartSec=15
[Install]
WantedBy=multi-user.target
EOF'
sudo systemctl daemon-reload
sudo systemctl enable --now prometheus-slurm-exporter.service
```

Customizing the systemd service with environment variables:

```bash
sudo systemctl edit prometheus-slurm-exporter.service`
```

```text
### Editing /etc/systemd/system/prometheus-slurm-exporter.service.d/override.conf
### Anything between here and the comment below will become the new contents of the file
[Service]
Environment="PATH=/opt/slurm/bin"
Environment="POLL_INTERVAL=300"
Environment="CLI_TIMEOUT=60"
Environment="LOGLEVEL=debug"
### Lines below this comment will be discarded
### /usr/lib/systemd/system/prometheus-slurm-exporter.service
# [Unit]
# Description=Prometheus SLURM Exporter
#
# [Service]
# ExecStart=/usr/bin/prometheus-slurm-exporter
# Restart=always
# RestartSec=15
#
# [Install]
# WantedBy=multi-user.target
```

### Future work
slurmrestd support
45 changes: 28 additions & 17 deletions exporter/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
"net/http"
"os"
Expand All @@ -22,8 +21,10 @@ import (
)

// cleanup on add if greater than this threshold
const cleanupThreshold uint64 = 1_000
const templateDirName string = "templates"
const (
cleanupThreshold uint64 = 1_000
templateDirName string = "templates"
)

// store a jobs published proc stats
type TraceInfo struct {
Expand Down Expand Up @@ -107,22 +108,11 @@ type TraceCollector struct {

func NewTraceCollector(config *Config) *TraceCollector {
traceConfig := config.TraceConf
templateRootDir := "."
// path to look for the /templates directory. Defaults to cwd
if path, ok := os.LookupEnv("TRACE_ROOT_PATH"); ok {
templateRootDir = path
}
traceDir := ""
err := filepath.WalkDir(templateRootDir, func(path string, d fs.DirEntry, err error) error {
if err == nil && d.IsDir() && d.Name() == templateDirName {
traceDir = path
return nil
}
return nil
})
if err != nil || traceDir == "" {
traceDir := detectTraceTemplatePath()
if traceDir == "" {
log.Fatal("no template found")
}
slog.Debug("using trace template path: " + traceDir)
return &TraceCollector{
ProcessFetcher: NewAtomicProFetcher(traceConfig.rate),
squeueFetcher: traceConfig.sharedFetcher,
Expand Down Expand Up @@ -200,3 +190,24 @@ func (c *TraceCollector) uploadTrace(w http.ResponseWriter, r *http.Request) {
}
}
}

// detectTraceTemplatePath returns the trace_root path based on the following criteria:
// 1. If TRACE_ROOT_PATH is specified, search that directory. If we don't find a templates dir, let's panic and crash the program.
// 2. If TRACE_ROOT_PATH isn't specified, we can search cwd and /usr/share/prometheus-slurm-exporter.
// If no templates path is found, returns an empty string
func detectTraceTemplatePath() string {
if rpath, ok := os.LookupEnv("TRACE_ROOT_PATH"); ok {
templateP := filepath.Join(rpath, templateDirName)
if _, err := os.Stat(templateP); err != nil {
panic("TRACE_ROOT_PATH must include a directory called: templates")
}
return templateP
}
for _, rpath := range []string{".", "/usr/share/prometheus-slurm-exporter"} {
templateP := filepath.Join(rpath, templateDirName)
if _, err := os.Stat(templateP); err == nil {
return templateP
}
}
return ""
}
28 changes: 28 additions & 0 deletions exporter/trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAtomicFetcher_Cleanup(t *testing.T) {
Expand All @@ -26,6 +28,7 @@ func TestAtomicFetcher_Cleanup(t *testing.T) {
fetcher.cleanup()
assert.Contains(fetcher.Info, int64(10))
}

func TestAtomicFetcher_Add(t *testing.T) {
assert := assert.New(t)
fetcher := NewAtomicProFetcher(10)
Expand Down Expand Up @@ -202,3 +205,28 @@ func TestPython3Wrapper(t *testing.T) {
json.Unmarshal(wrapperOut, &info)
assert.Equal(int64(10), info.JobId)
}

func TestDetectTraceRootPath_Env(t *testing.T) {
os.Clearenv()
testDir := t.TempDir()
t.Setenv("TRACE_ROOT_PATH", testDir)
// Ensure that the function panics if given a TRACE_ROOT_PATh with no 'templates' subdirectory
assert.PanicsWithValue(t, "TRACE_ROOT_PATH must include a directory called: templates", func() { detectTraceTemplatePath() })
require.NoError(t, os.Mkdir(filepath.Join(testDir, templateDirName), 0o700))

// Now that we have a 'templates' subdir, it should no longer panic
assert.Equal(t, filepath.Join(testDir, templateDirName), detectTraceTemplatePath())
}

func TestDetectTraceRootPath_Default(t *testing.T) {
os.Clearenv()
testDir := t.TempDir()
os.Chdir(testDir)

// Should come back empty if since we don't yet have a 'templates' subdir
assert.Equal(t, detectTraceTemplatePath(), "")
require.NoError(t, os.Mkdir(filepath.Join(testDir, templateDirName), 0o700))

// Now that we have 'templates' subdir, cwd is a valid path
assert.Equal(t, templateDirName, detectTraceTemplatePath())
}

0 comments on commit e285f3e

Please sign in to comment.