Skip to content

Commit 795983c

Browse files
committed
More pulse sanity checking
Not really satisfactory Part of #60
1 parent 7e1c8e5 commit 795983c

File tree

3 files changed

+59
-36
lines changed

3 files changed

+59
-36
lines changed

internal/mon/monitor.go

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,20 @@ import (
1414
const sampleWindowSize = 60
1515

1616
type Monitor struct {
17-
samples *sampleWindow
18-
lastSampleTime time.Time
19-
leapSecond ptime.LeapSecond
20-
servo Servo // never nil
21-
lg *slog.Logger
22-
gm *Grandmaster // maybe nil
23-
rc *ProxyRefClock // maybe nil
24-
inSync bool
25-
lastRefTime ptime.Time
26-
ppsStopped bool
27-
sseCh chan<- sse.Event
28-
stats stats
29-
lf logfile.LogFile
17+
samples *sampleWindow
18+
lastSampleTime time.Time
19+
leapSecond ptime.LeapSecond
20+
servo Servo // never nil
21+
lg *slog.Logger
22+
gm *Grandmaster // maybe nil
23+
rc *ProxyRefClock // maybe nil
24+
inSync bool
25+
lastRefTime ptime.Time
26+
lastSyncRefTime ptime.Time
27+
ppsStopped bool
28+
sseCh chan<- sse.Event
29+
stats stats
30+
lf logfile.LogFile
3031
}
3132

3233
type stats struct {
@@ -91,23 +92,27 @@ func (mon *Monitor) ReopenLog() {
9192

9293
func (mon *Monitor) Sample(ref ptime.Time, local ptime.ClockTime, delayed bool) {
9394
mon.addMissingOffsets(ref)
94-
off := local.T.Sub(ref)
9595
kind := sampleOK
96-
if !delayed && mon.isOutlier(off, local.Era) {
97-
kind = sampleOutlier
96+
if !delayed && mon.isInvalid(ref, local) {
97+
kind = sampleInvalid
9898
} else {
9999
mon.servo.Sample(ref, local, delayed)
100100
}
101101
freq := mon.servo.FreqOffset()
102+
off := local.T.Sub(ref)
102103
mon.recordSample(kind, off, local.Era, freq)
103104
mon.writeLogEntry(kind, ref, off, local.Era, freq)
104105
mon.sendEvent(kind, off, local.Era, freq)
105106
if delayed {
106107
return
107108
}
108109
inSync := mon.isInSync()
109-
now := time.Now()
110-
mon.lastSampleTime = now
110+
if kind == sampleOK {
111+
mon.lastSampleTime = time.Now()
112+
if inSync {
113+
mon.lastSyncRefTime = ref
114+
}
115+
}
111116
if mon.ppsStopped {
112117
mon.lg.Warn("1PPS signal restored")
113118
mon.ppsStopped = false
@@ -151,7 +156,7 @@ func (mon *Monitor) recordSample(kind sampleKind, off time.Duration, era ptime.E
151156
mon.lg.Info("missed 1PPS sample")
152157
return
153158
}
154-
if kind == sampleOutlier {
159+
if kind == sampleInvalid {
155160
mon.lg.Info("outlier sample", "off", off, "freq", freq)
156161
return
157162
}
@@ -172,7 +177,7 @@ func (mon *Monitor) writeLogEntry(kind sampleKind, ref ptime.Time, off time.Dura
172177
return
173178
}
174179
outlierFlag := 0
175-
if kind == sampleOutlier {
180+
if kind == sampleInvalid {
176181
outlierFlag = 1
177182
}
178183
// Stable32 treats 0 as meaning a gap, so we output 1e-99 for 0.
@@ -209,7 +214,7 @@ func (mon *Monitor) sendEvent(kind sampleKind, off time.Duration, era ptime.Era,
209214
StepCount: uint32(stepCount),
210215
StepCountChanging: changing,
211216
Freq: freq,
212-
Outlier: kind == sampleOutlier,
217+
Outlier: kind == sampleInvalid,
213218
})
214219
if err != nil {
215220
mon.lg.Error("error creating sample event", "err", err)
@@ -257,20 +262,38 @@ func (mon *Monitor) isInSync() bool {
257262
return mon.samples.isInSync(mon.inSync, &defaultSampleConfig)
258263
}
259264

260-
func (mon *Monitor) isOutlier(off time.Duration, era ptime.Era) bool {
261-
offSecs := off.Seconds()
265+
// maxValidDriftPPM is the maximum drift in PPM before considering sample invalid
266+
// This should kick in if there is some sort of hardware problem giving us crazy offsets
267+
const maxValidDriftPPM = 50
268+
269+
func (mon *Monitor) isInvalid(ref ptime.Time, local ptime.ClockTime) bool {
270+
off := local.T.Sub(ref).Seconds()
262271

263272
// if this offset isn't bad enough to take use out of sync,
264273
// then there's no need to consider it as an outlier
265274
// this should be a quick check that succeeds most of the time
266-
if math.Abs(offSecs) <= defaultSampleConfig.maxOffset {
275+
if math.Abs(off) <= defaultSampleConfig.maxOffset {
267276
return false
268277
}
269278
// don't do outlier detection unless we are using the PI controller
270-
if !mon.servo.Locked(era) {
279+
if !mon.servo.Locked(local.Era) {
271280
return false
272281
}
273-
return mon.samples.madIsOutlier(offSecs, &defaultSampleConfig)
282+
return mon.isInsane(off, ref) || mon.samples.madIsOutlier(off, &defaultSampleConfig)
283+
}
284+
285+
func (mon *Monitor) isInsane(off float64, ref ptime.Time) bool {
286+
if mon.lastSyncRefTime.IsZero() {
287+
return false
288+
}
289+
if math.Abs(mon.samples.last(0).off - off) < defaultSampleConfig.maxOffset {
290+
return false
291+
}
292+
syncDiff := ref.Sub(mon.lastSyncRefTime).Seconds()
293+
if math.Abs(off) > syncDiff*(maxValidDriftPPM/1e6) {
294+
return true
295+
}
296+
return false
274297
}
275298

276299
func (mon *Monitor) updateInSync(inSync bool) {
@@ -342,7 +365,7 @@ func (a *accumPhase) add(kind sampleKind, v float64) {
342365
a.maxAbs = math.Max(a.maxAbs, av)
343366
case sampleMissing:
344367
a.nMissing++
345-
case sampleOutlier:
368+
case sampleInvalid:
346369
a.nOutliers++
347370
}
348371
}

internal/mon/sample.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ type sampleKind int
1212
const (
1313
sampleMissing sampleKind = iota
1414
sampleOK
15-
sampleOutlier
15+
sampleInvalid
1616
)
1717

1818
type sampleData struct {

internal/mon/sample_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ var inSyncTests = [][]sampleData{
1515
sampleData{-19e-9, sampleOK},
1616
sampleData{12e-9, sampleOK},
1717
sampleData{-4e-9, sampleOK},
18-
sampleData{-1e6, sampleOutlier},
18+
sampleData{-1e6, sampleInvalid},
1919
sampleData{0, sampleMissing},
20-
sampleData{1e3, sampleOutlier},
20+
sampleData{1e3, sampleInvalid},
2121
sampleData{10e-9, sampleOK},
2222
sampleData{-4e-9, sampleOK},
2323
},
@@ -36,9 +36,9 @@ var inSyncTests = [][]sampleData{
3636
sampleData{49e-9, sampleOK},
3737
sampleData{3e-9, sampleOK},
3838
sampleData{-4e-9, sampleOK},
39-
sampleData{-1e6, sampleOutlier},
39+
sampleData{-1e6, sampleInvalid},
4040
sampleData{0, sampleMissing},
41-
sampleData{1e3, sampleOutlier},
41+
sampleData{1e3, sampleInvalid},
4242
sampleData{10e-9, sampleOK},
4343
sampleData{-4e-9, sampleOK},
4444
sampleData{0, sampleMissing},
@@ -48,8 +48,8 @@ var inSyncTests = [][]sampleData{
4848
sampleData{20e-9, sampleOK},
4949
sampleData{3e-9, sampleOK},
5050
sampleData{-4e-9, sampleOK},
51-
sampleData{-2e6, sampleOutlier},
52-
sampleData{-2e6, sampleOutlier},
51+
sampleData{-2e6, sampleInvalid},
52+
sampleData{-2e6, sampleInvalid},
5353
sampleData{19e-9, sampleOK},
5454
sampleData{-2e-9, sampleOK},
5555
},
@@ -60,9 +60,9 @@ func TestInSync(t *testing.T) {
6060
inSync := false
6161
w := newSampleWindow(sampleWindowSize)
6262
for j, s := range test {
63-
if s.kind != sampleMissing && w.madIsOutlier(s.off, &defaultSampleConfig) != (s.kind == sampleOutlier) {
63+
if s.kind != sampleMissing && w.madIsOutlier(s.off, &defaultSampleConfig) != (s.kind == sampleInvalid) {
6464
n, min, max := w.mad(defaultSampleConfig.madMultiple)
65-
t.Errorf("Test %d, sample %d, expected madIsOutlier == %v (n = %d, min = %v, max = %v)", i, j, s.kind == sampleOutlier, n, min, max)
65+
t.Errorf("Test %d, sample %d, expected madIsOutlier == %v (n = %d, min = %v, max = %v)", i, j, s.kind == sampleInvalid, n, min, max)
6666
}
6767
w.append(s.kind, s.off, 1)
6868
inSync = w.isInSync(inSync, &defaultSampleConfig)

0 commit comments

Comments
 (0)