-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathcontainer_resources.go
197 lines (173 loc) · 6.69 KB
/
container_resources.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
package collector
import (
"github.com/google-cloud-tools/kube-eagle/options"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
v1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1"
"sync"
)
type containerResourcesCollector struct {
// Resource limits
limitCPUCoresDesc *prometheus.Desc
limitMemoryBytesDesc *prometheus.Desc
// Resource requests
requestCPUCoresDesc *prometheus.Desc
requestMemoryBytesDesc *prometheus.Desc
// Resource usage
usageCPUCoresDesc *prometheus.Desc
usageMemoryBytesDesc *prometheus.Desc
}
func init() {
registerCollector("container_resources", newContainerResourcesCollector)
}
func newContainerResourcesCollector(opts *options.Options) (Collector, error) {
subsystem := "pod_container_resource"
labels := []string{"pod", "container", "qos", "phase", "namespace", "node"}
return &containerResourcesCollector{
// Prometheus metrics
// Resource limits
limitCPUCoresDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "limits_cpu_cores"),
"The container's CPU limit in Kubernetes",
labels,
prometheus.Labels{},
),
limitMemoryBytesDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "limits_memory_bytes"),
"The container's RAM limit in Kubernetes",
labels,
prometheus.Labels{},
),
// Resource requests
requestCPUCoresDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "requests_cpu_cores"),
"The container's requested CPU resources in Kubernetes",
labels,
prometheus.Labels{},
),
requestMemoryBytesDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "requests_memory_bytes"),
"The container's requested RAM resources in Kubernetes",
labels,
prometheus.Labels{},
),
// Resource usage
usageCPUCoresDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "usage_cpu_cores"),
"CPU usage in number of cores",
labels,
prometheus.Labels{},
),
usageMemoryBytesDesc: prometheus.NewDesc(
prometheus.BuildFQName(opts.Namespace, subsystem, "usage_memory_bytes"),
"RAM usage in bytes",
labels,
prometheus.Labels{},
),
}, nil
}
func (c *containerResourcesCollector) updateMetrics(ch chan<- prometheus.Metric) error {
log.Debug("Collecting container metrics")
var wg sync.WaitGroup
var podList *corev1.PodList
var podListError error
var podMetricses *v1beta1.PodMetricsList
var podMetricsesError error
// Get pod list
wg.Add(1)
go func() {
defer wg.Done()
podList, podListError = kubernetesClient.PodList()
}()
// Get node resource usage metrics
wg.Add(1)
go func() {
defer wg.Done()
podMetricses, podMetricsesError = kubernetesClient.PodMetricses()
}()
wg.Wait()
if podListError != nil {
log.Warn("Failed to get podList from Kubernetes", podListError)
return podListError
}
if podMetricsesError != nil {
log.Warn("Failed to get podMetricses from Kubernetes", podMetricsesError)
return podMetricsesError
}
containerMetricses := buildEnrichedContainerMetricses(podList, podMetricses)
for _, containerMetrics := range containerMetricses {
cm := *containerMetrics
log.Debugf("Test")
labelValues := []string{cm.Pod, cm.Container, cm.Qos, cm.Phase, cm.Namespace, cm.Node}
ch <- prometheus.MustNewConstMetric(c.requestCPUCoresDesc, prometheus.GaugeValue, cm.RequestCPUCores, labelValues...)
ch <- prometheus.MustNewConstMetric(c.requestMemoryBytesDesc, prometheus.GaugeValue, cm.RequestMemoryBytes, labelValues...)
ch <- prometheus.MustNewConstMetric(c.limitCPUCoresDesc, prometheus.GaugeValue, cm.LimitCPUCores, labelValues...)
ch <- prometheus.MustNewConstMetric(c.limitMemoryBytesDesc, prometheus.GaugeValue, cm.LimitMemoryBytes, labelValues...)
ch <- prometheus.MustNewConstMetric(c.usageCPUCoresDesc, prometheus.GaugeValue, cm.UsageCPUCores, labelValues...)
ch <- prometheus.MustNewConstMetric(c.usageMemoryBytesDesc, prometheus.GaugeValue, cm.UsageMemoryBytes, labelValues...)
}
return nil
}
type enrichedContainerMetricses struct {
Node string
Pod string
Container string
Qos string
Phase string
Namespace string
RequestCPUCores float64
RequestMemoryBytes float64
LimitCPUCores float64
LimitMemoryBytes float64
UsageCPUCores float64
UsageMemoryBytes float64
}
// buildEnrichedContainerMetricses merges the container metrics from two requests (podList request and podMetrics request) into
// one, so that we can expose valuable metadata (such as a nodename) as prometheus labels which is just present
// in one of the both responses.
func buildEnrichedContainerMetricses(podList *corev1.PodList, podMetricses *v1beta1.PodMetricsList) []*enrichedContainerMetricses {
// Group container metricses by pod name
containerMetricsesByPod := make(map[string]map[string]v1beta1.ContainerMetrics)
for _, pm := range podMetricses.Items {
containerMetricses := make(map[string]v1beta1.ContainerMetrics)
for _, c := range pm.Containers {
containerMetricses[c.Name] = c
}
containerMetricsesByPod[pm.Name] = containerMetricses
}
var containerMetricses []*enrichedContainerMetricses
for _, podInfo := range podList.Items {
containers := append(podInfo.Spec.Containers, podInfo.Spec.InitContainers...)
for _, containerInfo := range containers {
qos := string(podInfo.Status.QOSClass)
// Resources requested
requestCPUCores := float64(containerInfo.Resources.Requests.Cpu().MilliValue()) / 1000
requestMemoryBytes := float64(containerInfo.Resources.Requests.Memory().MilliValue()) / 1000
// Resources limit
limitCPUCores := float64(containerInfo.Resources.Limits.Cpu().MilliValue()) / 1000
limitMemoryBytes := float64(containerInfo.Resources.Limits.Memory().MilliValue()) / 1000
// Resources usage
containerUsageMetrics := containerMetricsesByPod[podInfo.Name][containerInfo.Name]
usageCPUCores := float64(containerUsageMetrics.Usage.Cpu().MilliValue()) / 1000
usageMemoryBytes := float64(containerUsageMetrics.Usage.Memory().MilliValue()) / 1000
nodeName := podInfo.Spec.NodeName
metric := &enrichedContainerMetricses{
Node: nodeName,
Container: containerInfo.Name,
Pod: podInfo.Name,
Qos: qos,
Phase: string(podInfo.Status.Phase),
Namespace: podInfo.Namespace,
RequestCPUCores: requestCPUCores,
RequestMemoryBytes: requestMemoryBytes,
LimitCPUCores: limitCPUCores,
LimitMemoryBytes: limitMemoryBytes,
UsageCPUCores: usageCPUCores,
UsageMemoryBytes: usageMemoryBytes,
}
containerMetricses = append(containerMetricses, metric)
}
}
return containerMetricses
}