diff --git a/.gitignore b/.gitignore index fcafc5f..51f73b0 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ debug .vscode +.idea diff --git a/collector/node_resources.go b/collector/node_resources.go index 130c633..46a5bf9 100644 --- a/collector/node_resources.go +++ b/collector/node_resources.go @@ -28,6 +28,7 @@ type nodeResourcesCollector struct { // Resource usage usageCPUCoresDesc *prometheus.Desc usageMemoryBytesDesc *prometheus.Desc + usagePodCount *prometheus.Desc } func init() { @@ -92,6 +93,12 @@ func newNodeResourcesCollector(opts *options.Options) (Collector, error) { labels, prometheus.Labels{}, ), + usagePodCount: prometheus.NewDesc( + prometheus.BuildFQName(opts.Namespace, subsystem, "usage_pod_count"), + "Total number of running pods for each kubernetes node", + labels, + prometheus.Labels{}, + ), }, nil } @@ -163,6 +170,7 @@ func (c *nodeResourcesCollector) updateMetrics(ch chan<- prometheus.Metric) erro ch <- prometheus.MustNewConstMetric(c.requestMemoryBytesDesc, prometheus.GaugeValue, float64(podMetrics.requestedMemoryBytes), n.Name) ch <- prometheus.MustNewConstMetric(c.limitCPUCoresDesc, prometheus.GaugeValue, podMetrics.limitCPUCores, n.Name) ch <- prometheus.MustNewConstMetric(c.limitMemoryBytesDesc, prometheus.GaugeValue, float64(podMetrics.limitMemoryBytes), n.Name) + ch <- prometheus.MustNewConstMetric(c.usagePodCount, prometheus.GaugeValue, float64(podMetrics.podCount), n.Name) } return nil @@ -194,7 +202,6 @@ func getAggregatedPodMetricsByNodeName(pods *corev1.PodList) map[string]aggregat // Iterate through all pod definitions to sum and group pods' resource requests and limits by node name for _, podInfo := range pods.Items { nodeName := podInfo.Spec.NodeName - podCount := podMetrics[nodeName].podCount + 1 // skip not running pods (e. g. failed/succeeded jobs, evicted pods etc.) podPhase := podInfo.Status.Phase @@ -202,6 +209,9 @@ func getAggregatedPodMetricsByNodeName(pods *corev1.PodList) map[string]aggregat continue } + // Don't increment this counter for failed / non running pods + podCount := podMetrics[nodeName].podCount + 1 + for _, c := range podInfo.Spec.Containers { requestedCPUCores := float64(c.Resources.Requests.Cpu().MilliValue()) / 1000 requestedMemoryBytes := c.Resources.Requests.Memory().MilliValue() / 1000