From 6287632d2ce705df3d09796903d7064abed49c3a Mon Sep 17 00:00:00 2001 From: baoyinghai_yewu Date: Thu, 26 Sep 2024 16:30:46 +0800 Subject: [PATCH] feat: add log for clustertree node controller Signed-off-by: baoyinghai_yewu --- .gitignore | 6 +++++- .../controllers/node_lease_controller.go | 6 +++++- .../controllers/node_resources_controller.go | 12 ++++++++++-- .../controllers/pod/root_pod_controller.go | 2 +- .../daemonset/host_daemon_controller.go | 4 ++-- .../extensions/daemonset/update.go | 16 ++++++++-------- pkg/kubenest/tasks/endpoint.go | 1 + 7 files changed, 32 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 5654b91d9..c74279251 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,8 @@ ignore_dir cmd/kubenest/node-agent/app.log cmd/kubenest/node-agent/cert.pem cmd/kubenest/node-agent/key.pem -cmd/kubenest/node-agent/agent.env \ No newline at end of file +cmd/kubenest/node-agent/agent.env +hack/k8s-in-k8s/nodes.txt +develop + +cmd/kubenest/node-agent/app/client/app.log diff --git a/pkg/clustertree/cluster-manager/controllers/node_lease_controller.go b/pkg/clustertree/cluster-manager/controllers/node_lease_controller.go index 31eedd15c..afb2970c3 100644 --- a/pkg/clustertree/cluster-manager/controllers/node_lease_controller.go +++ b/pkg/clustertree/cluster-manager/controllers/node_lease_controller.go @@ -66,6 +66,8 @@ func (c *NodeLeaseController) Start(ctx context.Context) error { } func (c *NodeLeaseController) syncNodeStatus(ctx context.Context) { + klog.V(4).Infof("NODESYNC syncNodeStatus start") + defer klog.V(4).Infof("NODESYNC syncNodeStatus done") nodes := make([]*corev1.Node, 0) c.nodeLock.Lock() for _, nodeIndex := range c.nodes { @@ -90,6 +92,8 @@ func (c *NodeLeaseController) updateNodeStatus(ctx context.Context, n []*corev1. } func (c *NodeLeaseController) syncLease(ctx context.Context) { + klog.V(4).Infof("NODESYNC syncLease start") + defer klog.V(4).Infof("NODESYNC syncLease done") nodes := make([]*corev1.Node, 0) c.nodeLock.Lock() for _, nodeIndex := range c.nodes { @@ -115,7 +119,7 @@ func (c *NodeLeaseController) syncLease(ctx context.Context) { return } - klog.V(5).Infof("Successfully updated lease") + klog.V(4).Infof("Successfully updated lease") } func (c *NodeLeaseController) createLeaseIfNotExists(ctx context.Context, nodes []*corev1.Node) error { diff --git a/pkg/clustertree/cluster-manager/controllers/node_resources_controller.go b/pkg/clustertree/cluster-manager/controllers/node_resources_controller.go index a396ddd0a..9da203a13 100644 --- a/pkg/clustertree/cluster-manager/controllers/node_resources_controller.go +++ b/pkg/clustertree/cluster-manager/controllers/node_resources_controller.go @@ -161,6 +161,12 @@ func (c *NodeResourcesController) Reconcile(ctx context.Context, request reconci clone.Status.Allocatable = clusterResources clone.Status.Capacity = clusterResources + if !utils.NodeReady(clone) { + klog.V(4).Infof("NODESYNC syncResource, node not ready, node name: %s", clone.Name) + } else { + klog.V(4).Infof("NODESYNC syncResource, node name: %s", clone.Name) + } + patch, err := utils.CreateMergePatch(nodeInRoot, clone) if err != nil { klog.Errorf("Could not CreateMergePatch,Error: %v", err) @@ -168,15 +174,17 @@ func (c *NodeResourcesController) Reconcile(ctx context.Context, request reconci } if _, err = c.RootClientset.CoreV1().Nodes().Patch(ctx, rootNode.Name, types.MergePatchType, patch, metav1.PatchOptions{}); err != nil { + klog.Errorf("(patch) failed to patch node resources: %v, will requeue, node name: %s", err, rootNode.Name) return reconcile.Result{ RequeueAfter: RequeueTime, - }, fmt.Errorf("(patch) failed to patch node resources: %v, will requeue", err) + }, err } if _, err = c.RootClientset.CoreV1().Nodes().PatchStatus(ctx, rootNode.Name, patch); err != nil { + klog.Errorf("(patch-status) failed to patch node resources: %v, will requeue, node name: %s", err, rootNode.Name) return reconcile.Result{ RequeueAfter: RequeueTime, - }, fmt.Errorf("(patch-status) failed to patch node resources: %v, will requeue", err) + }, err } } return reconcile.Result{}, nil diff --git a/pkg/clustertree/cluster-manager/controllers/pod/root_pod_controller.go b/pkg/clustertree/cluster-manager/controllers/pod/root_pod_controller.go index efc9ce699..69329818c 100644 --- a/pkg/clustertree/cluster-manager/controllers/pod/root_pod_controller.go +++ b/pkg/clustertree/cluster-manager/controllers/pod/root_pod_controller.go @@ -1010,7 +1010,7 @@ func (r *RootPodReconciler) UpdatePodInLeafCluster(ctx context.Context, lr *leaf r.changeToMasterCoreDNS(ctx, podCopy, r.Options) } - klog.V(5).Infof("Updating pod %+v", podCopy) + klog.V(4).Infof("Updating pod %+v", podCopy) lcr, err := r.leafClientResource(lr) if err != nil { diff --git a/pkg/clustertree/cluster-manager/extensions/daemonset/host_daemon_controller.go b/pkg/clustertree/cluster-manager/extensions/daemonset/host_daemon_controller.go index 7e602b33a..b14e3c2c3 100644 --- a/pkg/clustertree/cluster-manager/extensions/daemonset/host_daemon_controller.go +++ b/pkg/clustertree/cluster-manager/extensions/daemonset/host_daemon_controller.go @@ -897,10 +897,10 @@ func (dsc *HostDaemonSetsController) podsShouldBeOnNode( if oldestNewPod != nil && oldestOldPod != nil { switch { case !podutil.IsPodReady(oldestOldPod): - klog.V(5).Infof("Pod %s/%s from daemonset %s is no longer ready and will be replaced with newer pod %s", oldestOldPod.Namespace, oldestOldPod.Name, ds.Name, oldestNewPod.Name) + klog.V(4).Infof("Pod %s/%s from daemonset %s is no longer ready and will be replaced with newer pod %s", oldestOldPod.Namespace, oldestOldPod.Name, ds.Name, oldestNewPod.Name) podsToDelete = append(podsToDelete, oldestOldPod.Name) case podutil.IsPodAvailable(oldestNewPod, ds.DaemonSetSpec.MinReadySeconds, metav1.Time{Time: dsc.failedPodsBackoff.Clock.Now()}): - klog.V(5).Infof("Pod %s/%s from daemonset %s is now ready and will replace older pod %s", oldestNewPod.Namespace, oldestNewPod.Name, ds.Name, oldestOldPod.Name) + klog.V(4).Infof("Pod %s/%s from daemonset %s is now ready and will replace older pod %s", oldestNewPod.Namespace, oldestNewPod.Name, ds.Name, oldestOldPod.Name) podsToDelete = append(podsToDelete, oldestOldPod.Name) } } diff --git a/pkg/clustertree/cluster-manager/extensions/daemonset/update.go b/pkg/clustertree/cluster-manager/extensions/daemonset/update.go index 40e75c6e6..56b937160 100644 --- a/pkg/clustertree/cluster-manager/extensions/daemonset/update.go +++ b/pkg/clustertree/cluster-manager/extensions/daemonset/update.go @@ -78,7 +78,7 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm switch { case !podutil.IsPodAvailable(oldPod, ds.DaemonSetSpec.MinReadySeconds, metav1.Time{Time: now}): // the old pod isn't available, so it needs to be replaced - klog.V(5).Infof("DaemonSet %s/%s pod %s on node %s is out of date and not available, allowing replacement", ds.Namespace, ds.Name, oldPod.Name, nodeName) + klog.V(4).Infof("DaemonSet %s/%s pod %s on node %s is out of date and not available, allowing replacement", ds.Namespace, ds.Name, oldPod.Name, nodeName) // record the replacement if allowedReplacementPods == nil { allowedReplacementPods = make([]string, 0, len(nodeToDaemonPods)) @@ -88,7 +88,7 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm // no point considering any other candidates continue default: - klog.V(5).Infof("DaemonSet %s/%s pod %s on node %s is out of date, this is a candidate to replace", ds.Namespace, ds.Name, oldPod.Name, nodeName) + klog.V(4).Infof("DaemonSet %s/%s pod %s on node %s is out of date, this is a candidate to replace", ds.Namespace, ds.Name, oldPod.Name, nodeName) // record the candidate if candidatePodsToDelete == nil { candidatePodsToDelete = make([]string, 0, maxUnavailable) @@ -99,7 +99,7 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm } // use any of the candidates we can, including the allowedReplacemnntPods - klog.V(5).Infof("DaemonSet %s/%s allowing %d replacements, up to %d unavailable, %d new are unavailable, %d candidates", ds.Namespace, ds.Name, len(allowedReplacementPods), maxUnavailable, numUnavailable, len(candidatePodsToDelete)) + klog.V(4).Infof("DaemonSet %s/%s allowing %d replacements, up to %d unavailable, %d new are unavailable, %d candidates", ds.Namespace, ds.Name, len(allowedReplacementPods), maxUnavailable, numUnavailable, len(candidatePodsToDelete)) remainingUnavailable := maxUnavailable - numUnavailable if remainingUnavailable < 0 { remainingUnavailable = 0 @@ -146,7 +146,7 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm switch { case !podutil.IsPodAvailable(oldPod, ds.DaemonSetSpec.MinReadySeconds, metav1.Time{Time: now}): // the old pod isn't available, allow it to become a replacement - klog.V(5).Infof("Pod %s on node %s is out of date and not available, allowing replacement", ds.Namespace, ds.Name, oldPod.Name, nodeName) + klog.V(4).Infof("Pod %s on node %s is out of date and not available, allowing replacement", ds.Namespace, ds.Name, oldPod.Name, nodeName) // record the replacement if allowedNewNodes == nil { allowedNewNodes = make([]string, 0, len(nodeToDaemonPods)) @@ -156,7 +156,7 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm // no point considering any other candidates continue default: - klog.V(5).Infof("DaemonSet %s/%s pod %s on node %s is out of date, this is a surge candidate", ds.Namespace, ds.Name, oldPod.Name, nodeName) + klog.V(4).Infof("DaemonSet %s/%s pod %s on node %s is out of date, this is a surge candidate", ds.Namespace, ds.Name, oldPod.Name, nodeName) // record the candidate if candidateNewNodes == nil { candidateNewNodes = make([]string, 0, maxSurge) @@ -171,13 +171,13 @@ func (dsc *HostDaemonSetsController) rollingUpdate(ctx context.Context, ds *kosm continue } // we're available, delete the old pod - klog.V(5).Infof("DaemonSet %s/%s pod %s on node %s is available, remove %s", ds.Namespace, ds.Name, newPod.Name, nodeName, oldPod.Name) + klog.V(4).Infof("DaemonSet %s/%s pod %s on node %s is available, remove %s", ds.Namespace, ds.Name, newPod.Name, nodeName, oldPod.Name) oldPodsToDelete = append(oldPodsToDelete, oldPod.Name) } } // use any of the candidates we can, including the allowedNewNodes - klog.V(5).Infof("DaemonSet %s/%s allowing %d replacements, surge up to %d, %d are in progress, %d candidates", ds.Namespace, ds.Name, len(allowedNewNodes), maxSurge, numSurge, len(candidateNewNodes)) + klog.V(4).Infof("DaemonSet %s/%s allowing %d replacements, surge up to %d, %d are in progress, %d candidates", ds.Namespace, ds.Name, len(allowedNewNodes), maxSurge, numSurge, len(candidateNewNodes)) remainingSurge := maxSurge - numSurge if remainingSurge < 0 { remainingSurge = 0 @@ -548,7 +548,7 @@ func (dsc *HostDaemonSetsController) updatedDesiredNodeCounts(ds *kosmosv1alpha1 klog.Warningf("DaemonSet %s/%s is not configured for surge or unavailability, defaulting to accepting unavailability", ds.Namespace, ds.Name) maxUnavailable = 1 } - klog.V(5).Infof("DaemonSet %s/%s, maxSurge: %d, maxUnavailable: %d", ds.Namespace, ds.Name, maxSurge, maxUnavailable) + klog.V(4).Infof("DaemonSet %s/%s, maxSurge: %d, maxUnavailable: %d", ds.Namespace, ds.Name, maxSurge, maxUnavailable) return maxSurge, maxUnavailable, nil } diff --git a/pkg/kubenest/tasks/endpoint.go b/pkg/kubenest/tasks/endpoint.go index 4de1231e9..4224b0c68 100644 --- a/pkg/kubenest/tasks/endpoint.go +++ b/pkg/kubenest/tasks/endpoint.go @@ -3,6 +3,7 @@ package tasks import ( "context" "fmt" + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes"