Skip to content

Commit

Permalink
koordlet: collect metric for host application memory usage with page …
Browse files Browse the repository at this point in the history
…cache (#2273)

Signed-off-by: j4ckstraw <[email protected]>
  • Loading branch information
j4ckstraw authored Dec 18, 2024
1 parent 44b57d8 commit 0615332
Show file tree
Hide file tree
Showing 2 changed files with 310 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"

slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache"
"github.com/koordinator-sh/koordinator/pkg/koordlet/metrics"
"github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/framework"
Expand Down Expand Up @@ -86,13 +87,26 @@ func (h *hostAppCollector) Started() bool {
return h.started.Load()
}

var (
defaultMemoryCollectPolicy slov1alpha1.NodeMemoryCollectPolicy = slov1alpha1.UsageWithoutPageCache
)

func (h *hostAppCollector) collectHostAppResUsed() {
klog.V(6).Info("start collectHostAppResUsed")
nodeSLO := h.statesInformer.GetNodeSLO()
if nodeSLO == nil {
klog.Warningf("get nil node slo during collect host application resource usage")
return
}

nodeMetricSpec := h.statesInformer.GetNodeMetricSpec()
nodeMemoryCollectPolicy := defaultMemoryCollectPolicy
if nodeMetricSpec == nil {
klog.Warningf("get nil nodemetric, use default node memory collect policy: %v", defaultMemoryCollectPolicy)
} else if nodeMetricSpec.CollectPolicy != nil && nodeMetricSpec.CollectPolicy.NodeMemoryCollectPolicy != nil {
nodeMemoryCollectPolicy = *nodeMetricSpec.CollectPolicy.NodeMemoryCollectPolicy
}

count := 0
resourceMetrics := make([]metriccache.MetricSample, 0)
allCPUUsageCores := metriccache.Point{Timestamp: timeNow(), Value: 0}
Expand All @@ -103,9 +117,10 @@ func (h *hostAppCollector) collectHostAppResUsed() {
cgroupDir := util.GetHostAppCgroupRelativePath(&hostApp)
currentCPUUsage, errCPU := h.cgroupReader.ReadCPUAcctUsage(cgroupDir)
memStat, errMem := h.cgroupReader.ReadMemoryStat(cgroupDir)
if errCPU != nil || errMem != nil {
klog.V(4).Infof("cannot collect host application resource usage, cpu reason %v, memory reason %v",
errCPU, errMem)
memUsageWithPageCache, errMem2 := h.cgroupReader.ReadMemoryUsage(cgroupDir)
if errCPU != nil || errMem != nil || errMem2 != nil {
klog.V(4).Infof("cannot collect host application resource usage, cpu reason %v, memoryStat reason %v, memoryUsage reason %v",
errCPU, errMem, errMem2)
continue
}
if memStat == nil {
Expand Down Expand Up @@ -138,17 +153,34 @@ func (h *hostAppCollector) collectHostAppResUsed() {
metriccache.MetricPropertiesFunc.HostApplication(hostApp.Name),
collectTime, float64(memoryUsageValue))
if err != nil {
klog.V(4).Infof("failed to generate memory metrics for host application %s , err %v", hostApp.Name, err)
klog.V(4).Infof("failed to generate memoryUsage metrics for host application %s , err %v", hostApp.Name, err)
return
}

memUsageWithPageCacheMetric, err := metriccache.HostAppMemoryUsageWithPageCacheMetric.GenerateSample(
metriccache.MetricPropertiesFunc.HostApplication(hostApp.Name),
collectTime, float64(memUsageWithPageCache))
if err != nil {
klog.V(4).Infof("failed to generate memoryUsageWithPageCache metrics for host application %s , err %v", hostApp.Name, err)
return
}

metrics.RecordHostApplicationResourceUsage(string(corev1.ResourceCPU), &hostApp, cpuUsageValue)
metrics.RecordHostApplicationResourceUsage(string(corev1.ResourceMemory), &hostApp, float64(memoryUsageValue))
resourceMetrics = append(resourceMetrics, cpuUsageMetric, memUsageMetric)
resourceMetrics = append(resourceMetrics, cpuUsageMetric, memUsageMetric, memUsageWithPageCacheMetric)
klog.V(6).Infof("collect host application %v finished, metric cpu=%v, memory=%v", hostApp.Name, cpuUsageValue, memoryUsageValue)
count++
allCPUUsageCores.Value += cpuUsageValue
allMemoryUsage.Value += float64(memoryUsageValue)
// sum memory usage according to NodeMemoryCollectPolicy
switch nodeMemoryCollectPolicy {
case slov1alpha1.UsageWithoutPageCache:
allMemoryUsage.Value += float64(memoryUsageValue)
case slov1alpha1.UsageWithPageCache:
allMemoryUsage.Value += float64(memUsageWithPageCache)
default:
klog.Warning("unrecognized node memory collect policy, use UsageWithoutPageCache as default")
allMemoryUsage.Value += float64(memoryUsageValue)
}
}

appender := h.appendableDB.Appender()
Expand Down
Loading

0 comments on commit 0615332

Please sign in to comment.