@@ -78,9 +78,7 @@ func TestMetrics(t *testing.T) {
78
78
closeFn , restConfig , clientSet , ns := setup (t , "simple" )
79
79
defer closeFn ()
80
80
ctx , cancel := startJobControllerAndWaitForCaches (restConfig )
81
- defer func () {
82
- cancel ()
83
- }()
81
+ defer cancel ()
84
82
85
83
testCases := map [string ]struct {
86
84
job * batchv1.Job
@@ -144,13 +142,14 @@ func TestMetrics(t *testing.T) {
144
142
validateJobSucceeded (ctx , t , clientSet , jobObj )
145
143
146
144
// verify metric values after the job is finished
147
- validateMetricValue (t , metrics .JobFinishedNum , tc .wantJobFinishedNumMetric )
148
- validateMetricValue (t , metrics .JobPodsFinished , tc .wantJobPodsFinishedMetric )
145
+ validateCounterMetric (t , metrics .JobFinishedNum , tc .wantJobFinishedNumMetric )
146
+ validateCounterMetric (t , metrics .JobPodsFinished , tc .wantJobPodsFinishedMetric )
147
+ validateTerminatedPodsTrackingFinalizerMetric (t , int (* jobObj .Spec .Parallelism ))
149
148
})
150
149
}
151
150
}
152
151
153
- func validateMetricValue (t * testing.T , counterVec * basemetrics.CounterVec , wantMetric metricLabelsWithValue ) {
152
+ func validateCounterMetric (t * testing.T , counterVec * basemetrics.CounterVec , wantMetric metricLabelsWithValue ) {
154
153
t .Helper ()
155
154
var cmpErr error
156
155
err := wait .PollImmediate (10 * time .Millisecond , 10 * time .Second , func () (bool , error ) {
@@ -166,13 +165,24 @@ func validateMetricValue(t *testing.T, counterVec *basemetrics.CounterVec, wantM
166
165
return true , nil
167
166
})
168
167
if err != nil {
169
- t .Errorf ("Failed waiting for expected metric delta : %q" , err )
168
+ t .Errorf ("Failed waiting for expected metric: %q" , err )
170
169
}
171
170
if cmpErr != nil {
172
171
t .Error (cmpErr )
173
172
}
174
173
}
175
174
175
+ func validateTerminatedPodsTrackingFinalizerMetric (t * testing.T , want int ) {
176
+ validateCounterMetric (t , metrics .TerminatedPodsTrackingFinalizerTotal , metricLabelsWithValue {
177
+ Value : want ,
178
+ Labels : []string {metrics .Add },
179
+ })
180
+ validateCounterMetric (t , metrics .TerminatedPodsTrackingFinalizerTotal , metricLabelsWithValue {
181
+ Value : want ,
182
+ Labels : []string {metrics .Delete },
183
+ })
184
+ }
185
+
176
186
// TestJobPodFailurePolicyWithFailedPodDeletedDuringControllerRestart verifies that the job is properly marked as Failed
177
187
// in a scenario when the job controller crashes between removing pod finalizers and marking the job as Failed (based on
178
188
// the pod failure policy). After the finalizer for the failed pod is removed we remove the failed pod. This step is
@@ -238,6 +248,7 @@ func TestJobPodFailurePolicyWithFailedPodDeletedDuringControllerRestart(t *testi
238
248
defer func () {
239
249
cancel ()
240
250
}()
251
+ resetMetrics ()
241
252
restConfig .QPS = 200
242
253
restConfig .Burst = 200
243
254
@@ -556,6 +567,7 @@ func TestParallelJob(t *testing.T) {
556
567
defer closeFn ()
557
568
ctx , cancel := startJobControllerAndWaitForCaches (restConfig )
558
569
defer cancel ()
570
+ resetMetrics ()
559
571
560
572
jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , & batchv1.Job {
561
573
Spec : batchv1.JobSpec {
@@ -631,6 +643,9 @@ func TestParallelJob(t *testing.T) {
631
643
}
632
644
validateJobPodsStatus (ctx , t , clientSet , jobObj , want , false )
633
645
validateFinishedPodsNoFinalizer (ctx , t , clientSet , jobObj )
646
+ if tc .trackWithFinalizers {
647
+ validateTerminatedPodsTrackingFinalizerMetric (t , 7 )
648
+ }
634
649
})
635
650
}
636
651
}
@@ -803,9 +818,8 @@ func TestIndexedJob(t *testing.T) {
803
818
closeFn , restConfig , clientSet , ns := setup (t , "indexed" )
804
819
defer closeFn ()
805
820
ctx , cancel := startJobControllerAndWaitForCaches (restConfig )
806
- defer func () {
807
- cancel ()
808
- }()
821
+ defer cancel ()
822
+ resetMetrics ()
809
823
810
824
mode := batchv1 .IndexedCompletion
811
825
jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , & batchv1.Job {
@@ -863,6 +877,9 @@ func TestIndexedJob(t *testing.T) {
863
877
validateIndexedJobPods (ctx , t , clientSet , jobObj , nil , "0-3" )
864
878
validateJobSucceeded (ctx , t , clientSet , jobObj )
865
879
validateFinishedPodsNoFinalizer (ctx , t , clientSet , jobObj )
880
+ if wFinalizers {
881
+ validateTerminatedPodsTrackingFinalizerMetric (t , 5 )
882
+ }
866
883
})
867
884
}
868
885
}
@@ -957,6 +974,7 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) {
957
974
restConfig .QPS = 1
958
975
restConfig .Burst = 1
959
976
jc , ctx , cancel := createJobControllerWithSharedInformers (restConfig , informerSet )
977
+ resetMetrics ()
960
978
defer cancel ()
961
979
restConfig .QPS = 200
962
980
restConfig .Burst = 200
@@ -989,6 +1007,8 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) {
989
1007
t .Fatalf ("Failed to delete job: %v" , err )
990
1008
}
991
1009
validateNoOrphanPodsWithFinalizers (ctx , t , clientSet , jobObj )
1010
+ // Pods never finished, so they are not counted in the metric.
1011
+ validateTerminatedPodsTrackingFinalizerMetric (t , 0 )
992
1012
})
993
1013
}
994
1014
}
@@ -1676,6 +1696,7 @@ func startJobControllerAndWaitForCaches(restConfig *restclient.Config) (context.
1676
1696
}
1677
1697
1678
1698
func resetMetrics () {
1699
+ metrics .TerminatedPodsTrackingFinalizerTotal .Reset ()
1679
1700
metrics .JobFinishedNum .Reset ()
1680
1701
metrics .JobPodsFinished .Reset ()
1681
1702
}
0 commit comments