@@ -14,19 +14,20 @@ import (
14
14
const sampleWindowSize = 60
15
15
16
16
type Monitor struct {
17
- samples * sampleWindow
18
- lastSampleTime time.Time
19
- leapSecond ptime.LeapSecond
20
- servo Servo // never nil
21
- lg * slog.Logger
22
- gm * Grandmaster // maybe nil
23
- rc * ProxyRefClock // maybe nil
24
- inSync bool
25
- lastRefTime ptime.Time
26
- ppsStopped bool
27
- sseCh chan <- sse.Event
28
- stats stats
29
- lf logfile.LogFile
17
+ samples * sampleWindow
18
+ lastSampleTime time.Time
19
+ leapSecond ptime.LeapSecond
20
+ servo Servo // never nil
21
+ lg * slog.Logger
22
+ gm * Grandmaster // maybe nil
23
+ rc * ProxyRefClock // maybe nil
24
+ inSync bool
25
+ lastRefTime ptime.Time
26
+ lastSyncRefTime ptime.Time
27
+ ppsStopped bool
28
+ sseCh chan <- sse.Event
29
+ stats stats
30
+ lf logfile.LogFile
30
31
}
31
32
32
33
type stats struct {
@@ -91,23 +92,27 @@ func (mon *Monitor) ReopenLog() {
91
92
92
93
func (mon * Monitor ) Sample (ref ptime.Time , local ptime.ClockTime , delayed bool ) {
93
94
mon .addMissingOffsets (ref )
94
- off := local .T .Sub (ref )
95
95
kind := sampleOK
96
- if ! delayed && mon .isOutlier ( off , local . Era ) {
97
- kind = sampleOutlier
96
+ if ! delayed && mon .isInvalid ( ref , local ) {
97
+ kind = sampleInvalid
98
98
} else {
99
99
mon .servo .Sample (ref , local , delayed )
100
100
}
101
101
freq := mon .servo .FreqOffset ()
102
+ off := local .T .Sub (ref )
102
103
mon .recordSample (kind , off , local .Era , freq )
103
104
mon .writeLogEntry (kind , ref , off , local .Era , freq )
104
105
mon .sendEvent (kind , off , local .Era , freq )
105
106
if delayed {
106
107
return
107
108
}
108
109
inSync := mon .isInSync ()
109
- now := time .Now ()
110
- mon .lastSampleTime = now
110
+ if kind == sampleOK {
111
+ mon .lastSampleTime = time .Now ()
112
+ if inSync {
113
+ mon .lastSyncRefTime = ref
114
+ }
115
+ }
111
116
if mon .ppsStopped {
112
117
mon .lg .Warn ("1PPS signal restored" )
113
118
mon .ppsStopped = false
@@ -151,7 +156,7 @@ func (mon *Monitor) recordSample(kind sampleKind, off time.Duration, era ptime.E
151
156
mon .lg .Info ("missed 1PPS sample" )
152
157
return
153
158
}
154
- if kind == sampleOutlier {
159
+ if kind == sampleInvalid {
155
160
mon .lg .Info ("outlier sample" , "off" , off , "freq" , freq )
156
161
return
157
162
}
@@ -172,7 +177,7 @@ func (mon *Monitor) writeLogEntry(kind sampleKind, ref ptime.Time, off time.Dura
172
177
return
173
178
}
174
179
outlierFlag := 0
175
- if kind == sampleOutlier {
180
+ if kind == sampleInvalid {
176
181
outlierFlag = 1
177
182
}
178
183
// Stable32 treats 0 as meaning a gap, so we output 1e-99 for 0.
@@ -209,7 +214,7 @@ func (mon *Monitor) sendEvent(kind sampleKind, off time.Duration, era ptime.Era,
209
214
StepCount : uint32 (stepCount ),
210
215
StepCountChanging : changing ,
211
216
Freq : freq ,
212
- Outlier : kind == sampleOutlier ,
217
+ Outlier : kind == sampleInvalid ,
213
218
})
214
219
if err != nil {
215
220
mon .lg .Error ("error creating sample event" , "err" , err )
@@ -257,20 +262,38 @@ func (mon *Monitor) isInSync() bool {
257
262
return mon .samples .isInSync (mon .inSync , & defaultSampleConfig )
258
263
}
259
264
260
- func (mon * Monitor ) isOutlier (off time.Duration , era ptime.Era ) bool {
261
- offSecs := off .Seconds ()
265
+ // maxValidDriftPPM is the maximum drift in PPM before considering sample invalid
266
+ // This should kick in if there is some sort of hardware problem giving us crazy offsets
267
+ const maxValidDriftPPM = 50
268
+
269
+ func (mon * Monitor ) isInvalid (ref ptime.Time , local ptime.ClockTime ) bool {
270
+ off := local .T .Sub (ref ).Seconds ()
262
271
263
272
// if this offset isn't bad enough to take use out of sync,
264
273
// then there's no need to consider it as an outlier
265
274
// this should be a quick check that succeeds most of the time
266
- if math .Abs (offSecs ) <= defaultSampleConfig .maxOffset {
275
+ if math .Abs (off ) <= defaultSampleConfig .maxOffset {
267
276
return false
268
277
}
269
278
// don't do outlier detection unless we are using the PI controller
270
- if ! mon .servo .Locked (era ) {
279
+ if ! mon .servo .Locked (local . Era ) {
271
280
return false
272
281
}
273
- return mon .samples .madIsOutlier (offSecs , & defaultSampleConfig )
282
+ return mon .isInsane (off , ref ) || mon .samples .madIsOutlier (off , & defaultSampleConfig )
283
+ }
284
+
285
+ func (mon * Monitor ) isInsane (off float64 , ref ptime.Time ) bool {
286
+ if mon .lastSyncRefTime .IsZero () {
287
+ return false
288
+ }
289
+ if math .Abs (mon .samples .last (0 ).off - off ) < defaultSampleConfig .maxOffset {
290
+ return false
291
+ }
292
+ syncDiff := ref .Sub (mon .lastSyncRefTime ).Seconds ()
293
+ if math .Abs (off ) > syncDiff * (maxValidDriftPPM / 1e6 ) {
294
+ return true
295
+ }
296
+ return false
274
297
}
275
298
276
299
func (mon * Monitor ) updateInSync (inSync bool ) {
@@ -342,7 +365,7 @@ func (a *accumPhase) add(kind sampleKind, v float64) {
342
365
a .maxAbs = math .Max (a .maxAbs , av )
343
366
case sampleMissing :
344
367
a .nMissing ++
345
- case sampleOutlier :
368
+ case sampleInvalid :
346
369
a .nOutliers ++
347
370
}
348
371
}
0 commit comments